* [PATCH 1 of 6] dm-userspace xen kernel patch
[not found] <patchbomb.1156540578@venkman-64>
@ 2006-08-25 21:23 ` Ryan Grimm
2006-08-25 21:23 ` [PATCH 2 of 6] dm-userspace userspace tool base patch Ryan Grimm
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Ryan Grimm @ 2006-08-25 21:23 UTC (permalink / raw)
To: Xen Devel; +Cc: Dan Smith
Signed-off-by: Ryan Grimm <grimm@us.ibm.com>
Signed-off-by: Dan Smith <danms@us.ibm.com>
# HG changeset patch
# User Ryan Grimm <grimm@us.ibm.com>
# Date 1156521490 18000
# Node ID 2cb702dcea0e44dcfb9c243943d3e523245ad495
# Parent d5eb5205ff3588cc151fb15d6201c26c67e87506
dm-userspace xen kernel patch
diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen0_ia64
--- a/buildconfigs/linux-defconfig_xen0_ia64 Thu Aug 24 16:25:49 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen0_ia64 Fri Aug 25 10:58:10 2006 -0500
@@ -485,6 +485,7 @@ CONFIG_MD=y
CONFIG_MD=y
# CONFIG_BLK_DEV_MD is not set
# CONFIG_BLK_DEV_DM is not set
+CONFIG_DM_USERSPACE=n
#
# Fusion MPT device support
diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen0_x86_32
--- a/buildconfigs/linux-defconfig_xen0_x86_32 Thu Aug 24 16:25:49 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen0_x86_32 Fri Aug 25 10:58:10 2006 -0500
@@ -600,6 +600,7 @@ CONFIG_DM_MIRROR=y
CONFIG_DM_MIRROR=y
# CONFIG_DM_ZERO is not set
# CONFIG_DM_MULTIPATH is not set
+CONFIG_DM_USERSPACE=m
#
# Fusion MPT device support
diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen0_x86_64
--- a/buildconfigs/linux-defconfig_xen0_x86_64 Thu Aug 24 16:25:49 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen0_x86_64 Fri Aug 25 10:58:10 2006 -0500
@@ -552,6 +552,7 @@ CONFIG_DM_MIRROR=y
# CONFIG_DM_ZERO is not set
CONFIG_DM_MULTIPATH=y
CONFIG_DM_MULTIPATH_EMC=y
+CONFIG_DM_USERSPACE=m
#
# Fusion MPT device support
diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen_x86_32
--- a/buildconfigs/linux-defconfig_xen_x86_32 Thu Aug 24 16:25:49 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen_x86_32 Fri Aug 25 10:58:10 2006 -0500
@@ -1187,6 +1187,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_EMC=m
+CONFIG_DM_USERSPACE=m
#
# Fusion MPT device support
diff -r d5eb5205ff35 -r 2cb702dcea0e buildconfigs/linux-defconfig_xen_x86_64
--- a/buildconfigs/linux-defconfig_xen_x86_64 Thu Aug 24 16:25:49 2006 +0100
+++ b/buildconfigs/linux-defconfig_xen_x86_64 Fri Aug 25 10:58:10 2006 -0500
@@ -1130,6 +1130,7 @@ CONFIG_DM_ZERO=m
CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_EMC=m
+CONFIG_DM_USERSPACE=m
#
# Fusion MPT device support
diff -r d5eb5205ff35 -r 2cb702dcea0e patches/linux-2.6.16.13/series
--- a/patches/linux-2.6.16.13/series Thu Aug 24 16:25:49 2006 +0100
+++ b/patches/linux-2.6.16.13/series Fri Aug 25 10:58:10 2006 -0500
@@ -1,5 +1,6 @@ blktap-aio-16_03_06.patch
blktap-aio-16_03_06.patch
device_bind.patch
+dmu.patch
fix-hz-suspend.patch
fix-ide-cd-pio-mode.patch
i386-mach-io-check-nmi.patch
diff -r d5eb5205ff35 -r 2cb702dcea0e patches/linux-2.6.16.13/dmu.patch
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/patches/linux-2.6.16.13/dmu.patch Fri Aug 25 10:58:10 2006 -0500
@@ -0,0 +1,2591 @@
+diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-user.h ./drivers/md/dm-user.h
+--- ../pristine-linux-2.6.16.13/drivers/md/dm-user.h 1969-12-31 18:00:00.000000000 -0600
++++ ./drivers/md/dm-user.h 2006-08-16 18:48:18.000000000 -0500
+@@ -0,0 +1,209 @@
++/*
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Dan Smith <danms@us.ibm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ *
++ */
++
++#ifndef __DM_USER_H
++#define __DM_USER_H
++
++#include <linux/hardirq.h>
++
++#define DMU_KEY_LEN 256
++
++extern struct target_type userspace_target;
++extern mempool_t *request_pool;
++extern dev_t dmu_dev;
++extern spinlock_t devices_lock;
++extern struct list_head devices;
++
++/*
++ * A hash table of remaps
++ */
++struct hash_table {
++ struct list_head *table; /* Array of lists (buckets) */
++ uint64_t size; /* Number of buckets */
++ uint32_t mask; /* Mask used to determine bucket */
++ uint64_t count; /* Number of remaps in entire table */
++};
++
++/*
++ * A block device that we can send bios to
++ */
++struct target_device {
++ struct list_head list; /* Our place in the targets list */
++ struct block_device *bdev; /* The target block_device */
++ struct kref users; /* Self-destructing reference count */
++};
++
++/*
++ * A dm-userspace device, which consists of multiple targets sharing a
++ * common key
++ */
++struct dmu_device {
++ struct list_head list; /* Our place in the devices list */
++
++ spinlock_t lock; /* Protects all the fields below */
++
++ struct list_head requests; /* List of pending requests */
++ struct list_head target_devs; /* List of devices we can target */
++ struct hash_table remaps; /* Hash table of all our maps */
++
++ void *transport_private; /* Private data for userspace comms */
++
++ char key[DMU_KEY_LEN]; /* Unique name string for device */
++ struct kref users; /* Self-destructing reference count */
++
++ wait_queue_head_t wqueue; /* To block while waiting for reqs */
++
++ uint64_t block_size; /* Block size for this device */
++ uint64_t block_mask; /* Mask for offset in block */
++ unsigned int block_shift; /* Shift to convert to/from block */
++
++ struct kcopyd_client *kcopy; /* Interface to kcopyd */
++
++ uint32_t id_counter; /* Used to generate request IDs */
++};
++
++struct userspace_request {
++ struct list_head list; /* Our place on the request queue */
++
++ spinlock_t lock; /* Protects all the fields below */
++
++ struct dmu_device *dev; /* The DMU device that owns us */
++
++ int type; /* Type of request */
++ int sent; /* Non-zero if we've been sent */
++ uint32_t flags; /* Attribute flags */
++ uint32_t id; /* Unique ID for sync with userspace */
++ union {
++ uint64_t block; /* The block in question */
++ } u;
++ atomic_t refcnt; /* Reference count */
++
++ struct dmu_map *remap; /* The remap we represent */
++};
++
++struct dmu_map {
++ struct list_head list; /* Our place in a remap bucket chain */
++ struct list_head mru_list; /* Our place on the MRU list */
++
++ spinlock_t lock; /* Protects all the fields below */
++
++ uint64_t org_block; /* Original block */
++ uint64_t new_block; /* Destination block */
++ int64_t offset; /* Sectors to offset remapped block */
++ uint32_t flags; /* Attribute flags */
++ uint32_t id; /* Unique ID for sync with userspace */
++
++ struct target_device *src; /* Source blkdev for COPY_FIRST */
++ struct target_device *dest; /* Where the remapped block is */
++
++ struct bio_list bios; /* Bios queued for remapping */
++ struct bio_list bios_waiting; /* Bios waiting for endio sync */
++
++ struct dmu_device *dev; /* The DMU device that owns us */
++ struct dmu_map *next; /* Next remap that depends on us */
++
++ struct work_struct endio_task;/* Work to be done on bio endios */
++};
++
++/* Find and grab a reference to a target device */
++struct target_device *find_target(struct dmu_device *dev,
++ dev_t devno);
++
++/* Object allocation, destruction, and initialization routines */
++void init_remap(struct dmu_device *dev, struct dmu_map *remap);
++void init_request(struct dmu_device *dev,
++ int type,
++ struct userspace_request *req);
++void free_remap(struct dmu_map *remap);
++void __free_remap(struct dmu_map *remap);
++struct dmu_map *alloc_remap_atomic(struct dmu_device *dev);
++
++/* Hash table manipulation */
++struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block);
++void ht_insert_map(struct hash_table *ht, struct dmu_map *map);
++struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block);
++void ht_delete_map(struct hash_table *ht, struct dmu_map *map);
++
++/* Character device transport functions */
++int register_chardev_transport(struct dmu_device *dev);
++void unregister_chardev_transport(struct dmu_device *dev);
++int init_chardev_transport(void);
++void cleanup_chardev_transport(void);
++void write_chardev_transport_info(struct dmu_device *dev,
++ char *buf, unsigned int maxlen);
++
++/* Return the block number for @sector */
++static inline u64 dmu_block(struct dmu_device *dev,
++ sector_t sector)
++{
++ return sector >> dev->block_shift;
++}
++
++/* Return the sector offset in a block for @sector */
++static inline u64 dmu_sector_offset(struct dmu_device *dev,
++ sector_t sector)
++{
++ return sector & dev->block_mask;
++}
++
++/* Return the starting sector for @block */
++static inline u64 dmu_sector(struct dmu_device *dev,
++ uint64_t block)
++{
++ return block << dev->block_shift;
++}
++
++/* Add a request to a device's request queue */
++static void add_request(struct dmu_device *dev,
++ struct userspace_request *req)
++{
++ spin_lock(&dev->lock);
++ list_add_tail(&req->list, &dev->requests);
++ spin_unlock(&dev->lock);
++
++ wake_up(&dev->wqueue);
++}
++
++/* Remap @bio based on the information in @remap */
++static void __bio_remap(struct bio *bio,
++ struct dmu_map *remap)
++{
++ BUG_ON(remap->dest == NULL);
++
++ bio->bi_sector = dmu_sector(remap->dev, remap->new_block) +
++ dmu_sector_offset(remap->dev, bio->bi_sector) +
++ remap->offset;
++
++ bio->bi_bdev = remap->dest->bdev;
++}
++
++/* Increase the usage count for @dev */
++static inline void get_dev(struct dmu_device *dev)
++{
++ kref_get(&dev->users);
++}
++
++/* Decrease the usage count for @dev */
++void destroy_dmu_device(struct kref *ref);
++static inline void put_dev(struct dmu_device *dev)
++{
++ kref_put(&dev->users, destroy_dmu_device);
++}
++
++#endif
+diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.c ./drivers/md/dm-userspace.c
+--- ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.c 1969-12-31 18:00:00.000000000 -0600
++++ ./drivers/md/dm-userspace.c 2006-08-16 18:48:18.000000000 -0500
+@@ -0,0 +1,1132 @@
++/*
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Dan Smith <danms@us.ibm.com>
++ * Author: Ryan Grimm <grimm@us.ibm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/bio.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/list.h>
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <linux/types.h>
++#include <linux/poll.h>
++
++#include <linux/dm-userspace.h>
++
++#include "dm.h"
++#include "dm-bio-list.h"
++#include "kcopyd.h"
++#include "dm-user.h"
++
++#define DMU_COPY_PAGES 256
++#define DMU_REMAP_RESERVE 128
++
++#define DM_MSG_PREFIX "dm-userspace"
++
++static kmem_cache_t *request_cache;
++static kmem_cache_t *remap_cache;
++
++mempool_t *request_pool;
++
++static int enable_watchdog = 0;
++static struct work_struct wd;
++
++spinlock_t devices_lock;
++LIST_HEAD(devices);
++
++static spinlock_t mru_list_lock;
++static LIST_HEAD(mru_list);
++
++/* Device number for the control device */
++dev_t dmu_dev;
++
++static int error_bios(struct bio_list *bios)
++{
++ struct bio *bio;
++ int count = 0;
++
++ while ((bio = bio_list_pop(bios)) != NULL) {
++ bio_io_error(bio, bio->bi_size);
++ count++;
++ }
++
++ if (count)
++ DMERR("*** Failed %i requests", count);
++
++ return count;
++}
++
++static void remap_hit(struct dmu_map *remap)
++{
++ spin_lock(&mru_list_lock);
++
++ list_del_init(&remap->mru_list);
++ list_add(&remap->mru_list, &mru_list);
++
++ spin_unlock(&mru_list_lock);
++}
++
++struct dmu_map *alloc_remap_atomic(struct dmu_device *dev)
++{
++ struct dmu_map *remap = NULL;
++
++ /* Try to allocate one from the cache */
++ remap = kmem_cache_alloc(remap_cache, GFP_NOIO);
++ if (remap) {
++ INIT_LIST_HEAD(&remap->mru_list);
++
++ spin_lock(&mru_list_lock);
++ list_add_tail(&remap->mru_list, &mru_list);
++ spin_unlock(&mru_list_lock);
++
++ goto out;
++ }
++
++ /* Unable to alloc one, so get the LRU item off the list */
++ spin_lock(&mru_list_lock);
++ remap = list_entry(mru_list.prev, struct dmu_map, mru_list);
++ spin_unlock(&mru_list_lock);
++
++ if (remap) {
++ struct dmu_device *dev = remap->dev;
++ unsigned long flags;
++
++ DMINFO("Memory is low. Stealing the LRU remap...");
++
++ spin_lock(&dev->lock);
++ spin_lock_irqsave(&remap->lock, flags);
++ if (dmu_get_flag(&remap->flags, DMU_FLAG_INUSE)) {
++ /* Remove it from whatever device owns it */
++
++ if (!list_empty(&remap->list))
++ list_del_init(&remap->list);
++
++ dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE);
++
++ if (!dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) {
++ /* If the LRU remap is not valid,
++ we're in trouble */
++ spin_unlock_irqrestore(&remap->lock, flags);
++ spin_unlock(&dev->lock);
++ printk(KERN_EMERG
++ "dm-userspace: Unable to allocate "
++ "or steal a remap!\n");
++ goto out;
++ }
++ }
++
++ spin_unlock_irqrestore(&remap->lock, flags);
++ spin_unlock(&dev->lock);
++
++ remap_hit(remap);
++ } else {
++ DMERR("Failed to alloc or steal a remap!");
++ }
++
++ out:
++ return remap;
++}
++
++void free_remap(struct dmu_map *remap)
++{
++ unsigned long flags;
++
++ if (error_bios(&remap->bios)) {
++ DMERR("Freed a map with in-flight data!");
++ BUG();
++ }
++
++ spin_lock_irqsave(&remap->lock, flags);
++ dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE);
++ spin_unlock_irqrestore(&remap->lock, flags);
++
++ spin_lock(&remap->dev->lock);
++ list_del(&remap->list);
++ spin_unlock(&remap->dev->lock);
++
++ spin_lock(&mru_list_lock);
++ list_del_init(&remap->mru_list);
++ list_add_tail(&remap->mru_list, &mru_list);
++ spin_unlock(&mru_list_lock);
++}
++
++void __free_remap(struct dmu_map *remap)
++{
++ if (error_bios(&remap->bios)) {
++ DMERR("Freed a map with in-flight data!");
++ BUG();
++ }
++
++ dmu_clr_flag(&remap->flags, DMU_FLAG_INUSE);
++
++ list_del(&remap->list);
++
++ spin_lock(&mru_list_lock);
++ list_del_init(&remap->mru_list);
++ list_add_tail(&remap->mru_list, &mru_list);
++ spin_unlock(&mru_list_lock);
++}
++
++static struct userspace_request *make_sync_req(struct dmu_device *dev,
++ struct dmu_map *remap)
++{
++ struct userspace_request *req;
++ unsigned long flags;
++
++ req = mempool_alloc(request_pool, GFP_NOIO);
++ if (!req) {
++ DMERR("Failed to allocate copy response");
++ return NULL;
++ }
++ init_request(dev, DM_USERSPACE_SYNC_COMPLETE, req);
++
++ spin_lock_irqsave(&remap->lock, flags);
++ req->id = remap->id;
++ spin_unlock_irqrestore(&remap->lock, flags);
++
++ req->remap = remap;
++
++ return req;
++}
++
++static void endio_worker(void *data)
++{
++ struct dmu_map *remap = data;
++ struct userspace_request *req = NULL;
++
++ req = make_sync_req(remap->dev, remap);
++
++ if (req)
++ add_request(req->dev, req);
++}
++
++void init_remap(struct dmu_device *dev, struct dmu_map *remap)
++{
++ spin_lock_init(&remap->lock);
++ remap->org_block = remap->new_block = 0;
++ remap->offset = 0;
++ remap->flags = 0;
++ dmu_set_flag(&remap->flags, DMU_FLAG_INUSE);
++ remap->src = remap->dest = NULL;
++ bio_list_init(&remap->bios);
++ bio_list_init(&remap->bios_waiting);
++ INIT_LIST_HEAD(&remap->list);
++ remap->dev = dev;
++ remap->next = NULL;
++
++ INIT_WORK(&remap->endio_task, endio_worker, remap);
++}
++
++void init_request(struct dmu_device *dev,
++ int type,
++ struct userspace_request *req)
++{
++ spin_lock_init(&req->lock);
++ INIT_LIST_HEAD(&req->list);
++ req->dev = dev;
++ req->type = type;
++ req->sent = 0;
++ req->flags = 0;
++ if (type == DM_USERSPACE_SYNC_COMPLETE) {
++ req->u.block = 0;
++ req->id = 0;
++ } else {
++ spin_lock(&dev->lock);
++ dev->id_counter++;
++ if (dev->id_counter == 0)
++ dev->id_counter = 1;
++ req->id = dev->id_counter;
++ spin_unlock(&dev->lock);
++ }
++ atomic_set(&req->refcnt, 0);
++}
++
++/*
++ * For an even block distribution, this is not too bad, but it could
++ * probably be better
++ */
++static uint32_t ht_hash(struct hash_table *ht, uint64_t block)
++{
++ return (uint32_t)block & ht->mask;
++}
++
++static int ht_init(struct hash_table *ht, unsigned long size)
++{
++ uint64_t i;
++ unsigned long pages;
++ unsigned int order = ffs((size * sizeof(struct list_head *)) /
++ PAGE_SIZE);
++
++ if (order > 9)
++ return 0;
++
++ pages = __get_free_pages(GFP_ATOMIC, order);
++ if (!pages)
++ return 0;
++
++ ht->table = (void *)pages;
++ ht->size = size;
++ ht->count = 0;
++ ht->mask = size - 1;
++
++ for (i = 0; i < size; i++)
++ INIT_LIST_HEAD(&ht->table[i]);
++
++ return 1;
++}
++
++static void ht_insert_bucket(struct dmu_map *map, struct list_head *list)
++{
++ list_add_tail(&map->list, list);
++}
++
++/*
++ * I'm sure this is quite dumb, but it works for now
++ */
++static int ht_should_grow(struct hash_table *ht)
++{
++ return ht->count > (2 * (ht->size / 4));
++}
++
++static void ht_grow_table(struct hash_table *ht);
++void ht_insert_map(struct hash_table *ht, struct dmu_map *map)
++{
++ uint32_t addr;
++
++ addr = ht_hash(ht, map->org_block) & ht->mask;
++
++ BUG_ON(addr >= ht->size);
++
++ ht_insert_bucket(map, &ht->table[addr]);
++ ht->count++;
++
++ if (ht_should_grow(ht))
++ ht_grow_table(ht);
++}
++
++void ht_delete_map(struct hash_table *ht, struct dmu_map *map)
++{
++ list_del_init(&map->list);
++ BUG_ON(ht->count == 0);
++ ht->count--;
++}
++
++struct dmu_map *ht_find_map(struct hash_table *ht, uint64_t block)
++{
++ uint32_t addr;
++ struct dmu_map *m;
++
++ addr = ht_hash(ht, block) & ht->mask;
++
++ BUG_ON(addr >= ht->size);
++
++ list_for_each_entry(m, &ht->table[addr], list) {
++ if (m->org_block == block) {
++ remap_hit(m);
++ return m;
++ }
++ }
++
++ return NULL;
++}
++
++struct dmu_map *ht_find_map_dev(struct dmu_device *dev, uint64_t block)
++{
++ struct dmu_map *remap;
++
++ spin_lock(&dev->lock);
++
++ remap = ht_find_map(&dev->remaps, block);
++
++ spin_unlock(&dev->lock);
++
++ return remap;
++}
++
++static void ht_grow_table(struct hash_table *ht)
++{
++ struct hash_table old_table;
++ uint64_t i;
++
++ old_table = *ht;
++
++ if (!ht_init(ht, old_table.size * 2))
++ return;
++
++ for (i = 0; i < old_table.size; i++ ) {
++ struct dmu_map *m, *n;
++ list_for_each_entry_safe(m, n, &old_table.table[i],
++ list) {
++ list_del_init(&m->list);
++ ht_insert_map(ht, m);
++ }
++ }
++
++ free_pages((unsigned long)old_table.table,
++ ffs((old_table.size * sizeof(struct list_head *))
++ / PAGE_SIZE));
++}
++
++static uint64_t ht_destroy_table(struct hash_table *ht)
++{
++ uint64_t i, count = 0;
++ struct dmu_map *m, *n;
++
++ for (i = 0; i < ht->size; i++) {
++ list_for_each_entry_safe(m, n, &ht->table[i], list) {
++ ht_delete_map(ht, m);
++ free_remap(m);
++ count++;
++ }
++ }
++
++ free_pages((unsigned long)ht->table,
++ ffs((ht->size * sizeof(struct list_head *))
++ / PAGE_SIZE));
++
++ return count;
++}
++
++struct target_device *find_target(struct dmu_device *dev,
++ dev_t devno)
++{
++ struct target_device *target, *match = NULL;
++
++ spin_lock(&dev->lock);
++ list_for_each_entry(target, &dev->target_devs, list) {
++ if (target->bdev->bd_dev == devno) {
++ match = target;
++ break;
++ }
++ }
++ spin_unlock(&dev->lock);
++
++ return match;
++}
++
++static struct target_device *get_target(struct dmu_device *dev,
++ dev_t devno)
++{
++
++ struct target_device *target;
++ struct block_device *bdev;
++
++ target = find_target(dev, devno);
++ if (target)
++ return target;
++
++ bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE);
++ if (IS_ERR(bdev)) {
++ DMERR("Unable to lookup device %x", devno);
++ return NULL;
++ }
++
++ target = kmalloc(sizeof(*target), GFP_KERNEL);
++ if (!target) {
++ DMERR("Unable to alloc new target device");
++ return NULL;
++ }
++
++ target->bdev = bdev;
++ INIT_LIST_HEAD(&target->list);
++
++ spin_lock(&dev->lock);
++ list_add_tail(&target->list, &dev->target_devs);
++ spin_unlock(&dev->lock);
++
++ return target;
++}
++
++/* Caller must hold dev->lock */
++static void put_target(struct dmu_device *dev,
++ struct target_device *target)
++{
++ list_del(&target->list);
++
++ bd_release(target->bdev);
++ blkdev_put(target->bdev);
++
++ kfree(target);
++}
++
++/*
++ * This periodically dumps out some debug information. It's really
++ * only useful while developing.
++ */
++static void watchdog(void *data)
++{
++ unsigned int v_remaps, i_remaps, reqs, s_reqs, devs = 0;
++ struct dmu_device *dev;
++ struct dmu_map *map;
++ struct userspace_request *req;
++ uint64_t i;
++
++ spin_lock(&devices_lock);
++
++ list_for_each_entry(dev, &devices, list) {
++ spin_lock(&dev->lock);
++
++ v_remaps = i_remaps = reqs = s_reqs = 0;
++
++ for (i = 0; i < dev->remaps.size; i++) {
++ list_for_each_entry(map, &dev->remaps.table[i], list)
++ if (dmu_get_flag(&map->flags, DMU_FLAG_VALID))
++ v_remaps++;
++ else
++ i_remaps++;
++ }
++
++ list_for_each_entry(req, &dev->requests, list)
++ if (req->sent)
++ s_reqs++;
++ else
++ reqs++;
++
++ printk("Device "
++ " reqs: %u/%u "
++ " inv maps: %u "
++ " val maps: %u\n",
++ reqs, s_reqs, i_remaps, v_remaps);
++ devs++;
++
++ spin_unlock(&dev->lock);
++ }
++
++ spin_unlock(&devices_lock);
++
++ schedule_delayed_work(&wd, HZ);
++}
++
++void destroy_dmu_device(struct kref *ref)
++{
++ struct dmu_device *dev;
++ struct list_head *cursor, *next;
++ uint64_t remaps;
++
++ dev = container_of(ref, struct dmu_device, users);
++
++ spin_lock(&devices_lock);
++ list_del(&dev->list);
++ spin_unlock(&devices_lock);
++
++ list_for_each_safe(cursor, next, &dev->target_devs) {
++ struct target_device *target;
++
++ target = list_entry(cursor,
++ struct target_device,
++ list);
++
++ put_target(dev, target);
++ }
++
++ remaps = ht_destroy_table(&dev->remaps);
++
++ list_for_each_safe(cursor, next, &dev->requests) {
++ struct userspace_request *req;
++
++ req = list_entry(cursor,
++ struct userspace_request,
++ list);
++
++ list_del(&req->list);
++
++ mempool_free(req, request_pool);
++ }
++
++ kcopyd_client_destroy(dev->kcopy);
++ unregister_chardev_transport(dev);
++
++ kfree(dev);
++}
++
++static int init_dmu_device(struct dmu_device *dev, u32 block_size)
++{
++ int ret;
++
++ init_waitqueue_head(&dev->wqueue);
++ INIT_LIST_HEAD(&dev->list);
++ INIT_LIST_HEAD(&dev->requests);
++ INIT_LIST_HEAD(&dev->target_devs);
++ kref_init(&dev->users);
++ spin_lock_init(&dev->lock);
++
++ dev->id_counter = 1; /* reserve 0 for unsolicited maps */
++
++ if (!ht_init(&dev->remaps, 2048)) {
++ DMERR("Unable to allocate hash table");
++ return 0;
++ }
++
++ dev->block_size = block_size;
++ dev->block_mask = block_size - 1;
++ dev->block_shift = ffs(block_size) - 1;
++
++ ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopy);
++ if (ret) {
++ DMERR("Failed to initialize kcopyd client");
++ return 0;
++ }
++
++ return 1;
++}
++
++static struct dmu_device *new_dmu_device(char *key,
++ struct dm_target *ti,
++ u32 block_size)
++{
++ struct dmu_device *dev;
++ int ret;
++
++ dev = kmalloc(sizeof(*dev), GFP_KERNEL);
++ if (dev == NULL) {
++ DMERR("Failed to allocate new userspace device");
++ return NULL;
++ }
++
++ if (!init_dmu_device(dev, block_size))
++ goto bad1;
++
++ snprintf(dev->key, DMU_KEY_LEN, "%s", key);
++
++ ret = register_chardev_transport(dev);
++ if (!ret)
++ goto bad2;
++
++ spin_lock(&devices_lock);
++ list_add(&dev->list, &devices);
++ spin_unlock(&devices_lock);
++
++ return dev;
++
++ bad2:
++ put_dev(dev);
++ bad1:
++ kfree(dev);
++ DMERR("Failed to create device");
++ return NULL;
++}
++
++static struct dmu_device *find_dmu_device(const char *key)
++{
++ struct dmu_device *dev;
++ struct dmu_device *match = NULL;
++
++ spin_lock(&devices_lock);
++
++ list_for_each_entry(dev, &devices, list) {
++ spin_lock(&dev->lock);
++ if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) {
++ match = dev;
++ spin_unlock(&dev->lock);
++ break;
++ }
++ spin_unlock(&dev->lock);
++ }
++
++ spin_unlock(&devices_lock);
++
++ return match;
++}
++
++static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv)
++{
++ uint64_t block_size;
++ struct dmu_device *dev;
++ char *device_key;
++ char *block_size_param;
++ int target_idx = 2;
++
++ if (argc < 3) {
++ ti->error = "Invalid argument count";
++ return -EINVAL;
++ }
++
++ device_key = argv[0];
++ block_size_param = argv[1];
++
++ block_size = simple_strtoul(block_size_param, NULL, 10) / 512;
++
++ dev = find_dmu_device(device_key);
++ if (dev == NULL) {
++ dev = new_dmu_device(device_key,
++ ti,
++ block_size);
++ if (dev == NULL) {
++ ti->error = "Failed to create device";
++ goto bad;
++ }
++ } else {
++ get_dev(dev);
++ }
++
++ spin_lock(&dev->lock);
++ if (dev->block_size != block_size) {
++ ti->error = "Invalid block size";
++ goto bad;
++ }
++ spin_unlock(&dev->lock);
++
++ /* Resolve target devices */
++ do {
++ int maj, min;
++ sscanf(argv[target_idx], "%i:%i", &maj, &min);
++ if (!get_target(dev, MKDEV(maj, min))) {
++ DMERR("Failed to find target device %i:%i (%s)",
++ maj, min, argv[target_idx]);
++ goto out;
++ }
++ } while (++target_idx < argc);
++
++ ti->private = dev;
++ ti->split_io = block_size;
++
++ return 0;
++
++ bad:
++ if (dev) {
++ spin_unlock(&dev->lock);
++ }
++ out:
++ if (dev) {
++ put_dev(dev);
++ }
++
++ return -EINVAL;
++}
++
++static void dmu_dtr(struct dm_target *ti)
++{
++ struct dmu_device *dev = (struct dmu_device *) ti->private;
++
++ put_dev(dev);
++}
++
++/* Search @dev for an outstanding request for remapping @block */
++static struct userspace_request *find_existing_req(struct dmu_device *dev,
++ uint64_t block)
++{
++ struct userspace_request *req;
++ struct userspace_request *match = NULL;
++
++ spin_lock(&dev->lock);
++
++ list_for_each_entry_reverse(req, &dev->requests, list) {
++ if ((req->type == DM_USERSPACE_MAP_BLOCK_REQ) &&
++ (req->remap->org_block == block)) {
++ match = req;
++ atomic_inc(&match->refcnt);
++ break;
++ }
++ }
++
++ spin_unlock(&dev->lock);
++
++ return match;
++}
++
++static int make_new_request(struct dmu_device *dev,
++ struct bio *bio,
++ void **ctxptr)
++{
++ struct userspace_request *req;
++
++ req = mempool_alloc(request_pool, GFP_NOIO);
++ if (req == NULL)
++ goto bad;
++
++ init_request(dev, DM_USERSPACE_MAP_BLOCK_REQ, req);
++
++ dmu_set_flag(&req->flags, DMU_FLAG_RD);
++ if (bio_rw(bio))
++ dmu_set_flag(&req->flags, DMU_FLAG_WR);
++ else
++ dmu_clr_flag(&req->flags, DMU_FLAG_WR);
++
++ req->remap = alloc_remap_atomic(dev);
++ if (!req->remap) {
++ DMERR("Failed to alloc remap!");
++ goto bad;
++ }
++ init_remap(dev, req->remap);
++
++ bio_list_add(&req->remap->bios, bio);
++ req->remap->org_block = dmu_block(dev, bio->bi_sector);
++
++ *ctxptr = req->remap;
++
++ add_request(dev, req);
++
++ return 0;
++
++ bad:
++ DMERR("Failed to queue bio!");
++ return -1;
++}
++
++static int dmu_map_remap_case(struct dmu_device *dev,
++ struct dmu_map *remap,
++ struct bio *bio)
++{
++ int ret = 0;
++ int rw;
++ unsigned long flags;
++
++ spin_lock_irqsave(&remap->lock, flags);
++
++ /*
++ * We've got it locked, so make sure the info is still valid
++ * before we use it
++ */
++ if (!dmu_get_flag(&remap->flags, DMU_FLAG_INUSE)) {
++ ret = -1;
++ DMERR("Got an invalid remap from hashtable");
++ goto unlock;
++ } else if (remap->org_block != dmu_block(dev, bio->bi_sector)) {
++ ret = -1;
++ DMERR("Aiee, org block changed underneath us!");
++ goto unlock;
++ }
++
++ rw = dmu_get_flag(&remap->flags, DMU_FLAG_WR);
++
++
++ if (rw || (bio_rw(bio) == rw)) {
++ if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID)) {
++ __bio_remap(bio, remap);
++ ret = 1;
++ } else {
++ bio_list_add(&remap->bios, bio);
++ }
++ } else {
++ ret = -1;
++// printk("Remap doesn't match perms: %llu (%c!=%c)\n",
++// remap->org_block,
++// rw ? 'W':'R',
++// bio_rw(bio) ? 'W':'R');
++ }
++
++ unlock:
++ spin_unlock_irqrestore(&remap->lock, flags);
++
++ return ret;
++}
++
++static int dmu_map_request_case(struct dmu_device *dev,
++ struct userspace_request *req,
++ struct bio *bio)
++{
++ int ret = 0;
++ int req_rw = dmu_get_flag(&req->flags, DMU_FLAG_WR);
++ unsigned long flags;
++
++ spin_lock(&req->lock);
++ spin_lock_irqsave(&req->remap->lock, flags);
++
++ if (!req_rw && bio_rw(bio) && !req->sent) {
++ /* Convert to R/W and Queue */
++ dmu_set_flag(&req->flags, DMU_FLAG_WR);
++ bio_list_add(&req->remap->bios, bio);
++ } else if (!req_rw && bio_rw(bio) && req->sent) {
++ /* Can't convert, must re-request */
++ ret = -1;
++ } else {
++ /* Queue */
++ bio_list_add(&req->remap->bios, bio);
++ }
++
++ spin_unlock_irqrestore(&req->remap->lock, flags);
++ spin_unlock(&req->lock);
++
++ return ret;
++}
++
++DECLARE_MUTEX(map_mutex);
++
++static int dmu_map(struct dm_target *ti, struct bio *bio,
++ union map_info *map_context)
++{
++ struct dmu_device *dev = (struct dmu_device *) ti->private;
++ struct dmu_map *remap;
++ struct userspace_request *req;
++ int ret = 0;
++ u64 block;
++
++ down(&map_mutex);
++
++ map_context->ptr = NULL;
++
++ block = dmu_block(dev, bio->bi_sector);
++
++ remap = ht_find_map_dev(dev, block);
++ if (remap) {
++ ret = dmu_map_remap_case(dev, remap, bio);
++ if (ret >= 0) {
++ map_context->ptr = remap;
++ goto done;
++ }
++
++ }
++
++ req = find_existing_req(dev, block);
++ if (req) {
++ ret = dmu_map_request_case(dev, req, bio);
++ atomic_dec(&req->refcnt);
++ if (ret >= 0) {
++ map_context->ptr = req->remap;
++ goto done;
++ }
++ }
++
++ ret = make_new_request(dev, bio, &map_context->ptr);
++
++ done:
++ up(&map_mutex);
++
++ return ret;
++}
++
++static int dmu_status(struct dm_target *ti, status_type_t type,
++ char *result, unsigned int maxlen)
++{
++ struct dmu_device *dev = (struct dmu_device *) ti->private;
++
++ switch (type) {
++ case STATUSTYPE_INFO:
++ write_chardev_transport_info(dev, result, maxlen);
++ break;
++
++ case STATUSTYPE_TABLE:
++ snprintf(result, maxlen, "%s %llu",
++ dev->key,
++ dev->block_size * 512);
++ break;
++ }
++
++ return 0;
++}
++
++static int __handle_bio_endio(struct dmu_map *remap,
++ struct bio *bio,
++ struct userspace_request **req)
++{
++ int ret = 0;
++ unsigned long flags;
++
++ spin_lock_irqsave(&remap->lock, flags);
++ if (dmu_get_flag(&remap->flags, DMU_FLAG_WAITING) &&
++ remap->bios_waiting.head == NULL) {
++ /* First endio and waiting for resp from userspace */
++ bio_list_add(&remap->bios_waiting, bio);
++
++ /* Schedule request worker */
++ INIT_WORK(&remap->endio_task, endio_worker, remap);
++ schedule_work(&remap->endio_task);
++
++ ret = 1;
++ } else if (dmu_get_flag(&remap->flags, DMU_FLAG_WAITING)) {
++ /* Still waiting for resp from userspace */
++ bio_list_add(&remap->bios_waiting, bio);
++ ret = 1;
++ } else if (remap->bios_waiting.head != NULL) {
++ /* Got resp from userspace but bios waiting list nonempty */
++ if (bio == remap->bios_waiting.head) {
++ bio_list_pop(&remap->bios_waiting);
++ ret = 0;
++ } else {
++ bio_list_add(&remap->bios_waiting, bio);
++ ret = 1;
++ }
++ }
++ spin_unlock_irqrestore(&remap->lock, flags);
++
++ return ret;
++}
++
++static int dmu_end_io(struct dm_target *ti, struct bio *bio,
++ int error, union map_info *map_context)
++{
++ struct dmu_map *remap;
++ struct userspace_request *req = NULL;
++ int ret = 0;
++
++ remap = map_context->ptr;
++
++ if (error) {
++ DMERR("Error in dmu_end_io");
++ return -1;
++ } else if (!remap) {
++ return 0;
++ }
++
++ ret = __handle_bio_endio(remap, bio, &req);
++
++ return ret;
++}
++
++struct target_type userspace_target = {
++ .name = "userspace",
++ .version = {0, 1, 0},
++ .module = THIS_MODULE,
++ .ctr = dmu_ctr,
++ .dtr = dmu_dtr,
++ .map = dmu_map,
++ .status = dmu_status,
++ .end_io = dmu_end_io
++};
++
++static int destroy_mru_list(void)
++{
++ struct dmu_map *map, *next;
++ int count = 0;
++
++ spin_lock(&mru_list_lock);
++
++ list_for_each_entry_safe(map, next, &mru_list, mru_list) {
++ list_del(&map->mru_list);
++ kmem_cache_free(remap_cache, map);
++ count++;
++ }
++
++ spin_unlock(&mru_list_lock);
++
++ return count;
++}
++
++int __init dm_userspace_init(void)
++{
++ int i;
++ int r = dm_register_target(&userspace_target);
++ if (r < 0) {
++ DMERR("Register failed %d", r);
++ return 0;
++ }
++
++ spin_lock_init(&devices_lock);
++ spin_lock_init(&mru_list_lock);
++
++ if (enable_watchdog) {
++ INIT_WORK(&wd, watchdog, NULL);
++ schedule_delayed_work(&wd, HZ);
++ }
++
++ request_cache =
++ kmem_cache_create("dm-userspace-requests",
++ sizeof(struct userspace_request),
++ __alignof__ (struct userspace_request),
++ 0, NULL, NULL);
++ if (!request_cache) {
++ DMERR("Failed to allocate request cache");
++ goto bad;
++ }
++
++ remap_cache =
++ kmem_cache_create("dm-userspace-remaps",
++ sizeof(struct dmu_map),
++ __alignof__ (struct dmu_map),
++ 0, NULL, NULL);
++ if (!remap_cache) {
++ DMERR("Failed to allocate remap cache");
++ goto bad2;
++ }
++
++ request_pool = mempool_create(64,
++ mempool_alloc_slab, mempool_free_slab,
++ request_cache);
++ if (!request_pool) {
++ DMERR("Failed to allocate request pool");
++ goto bad3;
++ }
++
++ r = init_chardev_transport();
++ if (!r)
++ goto bad4;
++
++ for (i = 0; i < DMU_REMAP_RESERVE; i++) {
++ struct dmu_map *remap;
++
++ remap = alloc_remap_atomic(NULL);
++ if (!remap) {
++ DMERR("Failed to allocate %i/%i reserve remap",
++ i, DMU_REMAP_RESERVE);
++ goto bad5;
++ }
++ init_remap(NULL, remap);
++ remap_hit(remap);
++ }
++
++ return 1;
++
++ bad5:
++ destroy_mru_list();
++ bad4:
++ mempool_destroy(request_pool);
++ bad3:
++ kmem_cache_destroy(remap_cache);
++ bad2:
++ kmem_cache_destroy(request_cache);
++ bad:
++ dm_unregister_target(&userspace_target);
++
++ return 0;
++}
++
++void __exit dm_userspace_exit(void)
++{
++ int r;
++ struct list_head *cursor, *next;
++ struct dmu_device *dev;
++
++ if (enable_watchdog)
++ if (!cancel_delayed_work(&wd))
++ flush_scheduled_work();
++
++ spin_lock(&devices_lock);
++
++ list_for_each_safe(cursor, next, &devices) {
++ dev = list_entry(cursor, struct dmu_device, list);
++ list_del(cursor);
++ destroy_dmu_device(&dev->users);
++ DMERR("Destroying hanging device %s", dev->key);
++ }
++
++ spin_unlock(&devices_lock);
++
++ cleanup_chardev_transport();
++
++ r = destroy_mru_list();
++
++ mempool_destroy(request_pool);
++ kmem_cache_destroy(request_cache);
++ kmem_cache_destroy(remap_cache);
++
++ r = dm_unregister_target(&userspace_target);
++ if (r < 0)
++ DMERR("unregister failed %d", r);
++}
++
++module_init(dm_userspace_init);
++module_exit(dm_userspace_exit);
++
++module_param(enable_watchdog, int, S_IRUGO);
++
++MODULE_DESCRIPTION(DM_NAME " userspace target");
++MODULE_AUTHOR("Dan Smith");
++MODULE_LICENSE("GPL");
+diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-userspace-chardev.c ./drivers/md/dm-userspace-chardev.c
+--- ../pristine-linux-2.6.16.13/drivers/md/dm-userspace-chardev.c 1969-12-31 18:00:00.000000000 -0600
++++ ./drivers/md/dm-userspace-chardev.c 2006-08-16 18:48:18.000000000 -0500
+@@ -0,0 +1,900 @@
++/*
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Dan Smith <danms@us.ibm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ *
++ */
++
++#include <linux/spinlock.h>
++#include <linux/blkdev.h>
++#include <linux/mempool.h>
++#include <linux/dm-userspace.h>
++#include <linux/list.h>
++#include <linux/sched.h>
++#include <linux/wait.h>
++#include <linux/poll.h>
++#include <linux/fs.h>
++#include <linux/cdev.h>
++#include <asm/uaccess.h>
++
++#include "dm.h"
++#include "dm-bio-list.h"
++#include "kcopyd.h"
++#include "dm-user.h"
++
++#define DM_MSG_PREFIX "dm-userspace"
++
++/* This allows for a cleaner separation between the dm-userspace
++ * device-mapper target, and the userspace transport used. Right now,
++ * only a chardev transport exists, but it's possible that there could
++ * be more in the future
++ */
++struct chardev_transport {
++ struct cdev cdev;
++ dev_t ctl_dev;
++ struct dmu_device *parent;
++};
++
++static void remap_flusher(struct dmu_map *remap);
++
++static int have_pending_requests(struct dmu_device *dev)
++{
++ struct userspace_request *req;
++ int ret = 0;
++
++ /* FIXME: We could keep a count of how many waiting reqs
++ * there are, eliminating the need to count, and possibly the
++ * need to lock
++ */
++
++ spin_lock(&dev->lock);
++
++ list_for_each_entry(req, &dev->requests, list) {
++ if (!req->sent) {
++ ret = 1;
++ break;
++ }
++ }
++
++ spin_unlock(&dev->lock);
++
++ return ret;
++}
++
++static void copy_callback(int read_err,
++ unsigned int write_err,
++ void *data)
++{
++ remap_flusher((struct dmu_map *)data);
++}
++
++static void copy_block(struct dmu_map *remap)
++{
++ struct io_region src, dst;
++ struct kcopyd_client *client;
++ unsigned long flags;
++
++ spin_lock_irqsave(&remap->lock, flags);
++
++ src.bdev = remap->src->bdev;
++ src.sector = remap->org_block << remap->dev->block_shift;
++ src.count = remap->dev->block_size;
++
++ dst.bdev = remap->dest->bdev;
++ dst.sector = (remap->new_block << remap->dev->block_shift);
++ dst.sector += remap->offset;
++ dst.count = remap->dev->block_size;
++
++ client = remap->dev->kcopy;
++
++ spin_unlock_irqrestore(&remap->lock, flags);
++
++ kcopyd_copy(client, &src, 1, &dst, 0, copy_callback, remap);
++}
++
++static void copy_or_flush(struct dmu_map *remap)
++{
++ int copy;
++ unsigned long flags;
++
++ spin_lock_irqsave(&remap->lock, flags);
++ copy = dmu_get_flag(&remap->flags, DMU_FLAG_COPY_FIRST);
++ spin_unlock_irqrestore(&remap->lock, flags);
++
++ if (copy)
++ copy_block(remap);
++ else
++ remap_flusher(remap);
++}
++
++static struct bio *pop_and_remap(struct dmu_map *remap)
++{
++ struct bio *bio = NULL;
++ unsigned long flags;
++
++ spin_lock_irqsave(&remap->lock, flags);
++
++ bio = bio_list_pop(&remap->bios);
++ if (bio)
++ __bio_remap(bio, remap);
++ else {
++ /* If there are no more bios, we must set the VALID
++ * flag before we release the lock
++ */
++ dmu_set_flag(&remap->flags, DMU_FLAG_VALID);
++ }
++
++ spin_unlock_irqrestore(&remap->lock, flags);
++
++ return bio;
++}
++
++static void get_remap_attrs(struct dmu_map *remap,
++ int *temporary,
++ struct dmu_map **next)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&remap->lock, flags);
++
++ *temporary = dmu_get_flag(&remap->flags, DMU_FLAG_TEMPORARY);
++ *next = remap->next;
++ remap->next = NULL;
++
++ spin_unlock_irqrestore(&remap->lock, flags);
++}
++
++static void remap_flusher(struct dmu_map *remap)
++{
++ struct bio *bio;
++ int temporary = 0;
++ struct dmu_map *next;
++
++ while (1) {
++
++ bio = pop_and_remap(remap);
++
++ if (bio)
++ generic_make_request(bio);
++ else
++ break;
++ }
++
++ get_remap_attrs(remap, &temporary, &next);
++
++ if (next)
++ copy_or_flush(next);
++
++ if (temporary) {
++ free_remap(remap);
++ }
++}
++
++static int send_userspace_message(uint8_t __user *buffer,
++ struct userspace_request *req)
++{
++ int ret = 0;
++ struct dmu_msg_header hdr;
++ union {
++ struct dmu_msg_map_request map_req;
++ struct dmu_msg_status status_req;
++ struct dmu_msg_version ver_req;
++ } msgs;
++
++ memset(&msgs, 0, sizeof(msgs));
++ spin_lock(&req->lock);
++
++ hdr.id = req->id;
++
++ switch (req->type) {
++ case DM_USERSPACE_GET_VERSION:
++ hdr.msg_type = req->type;
++ hdr.payload_len = sizeof(msgs.ver_req);
++ msgs.ver_req.kernel_ver =
++ userspace_target.version[0] << 16 |
++ userspace_target.version[1] << 8 |
++ userspace_target.version[2];
++
++ break;
++
++ case DM_USERSPACE_MAP_BLOCK_REQ:
++ hdr.msg_type = req->type;
++ hdr.payload_len = sizeof(msgs.map_req);
++ msgs.map_req.org_block =
++ dmu_block(req->dev, req->remap->bios.head->bi_sector);
++ dmu_cpy_flag(&msgs.map_req.flags, req->flags, DMU_FLAG_RD);
++ dmu_cpy_flag(&msgs.map_req.flags, req->flags, DMU_FLAG_WR);
++
++ break;
++
++ case DM_USERSPACE_SYNC_COMPLETE:
++ case DM_USERSPACE_INVAL_COMPLETE:
++ case DM_USERSPACE_INVAL_FAILED:
++ hdr.msg_type = DM_USERSPACE_STATUS;
++ hdr.payload_len = sizeof(msgs.status_req);
++ msgs.status_req.status = req->type;
++ msgs.status_req.id_of_op = req->id;
++
++ break;
++
++ default:
++ DMWARN("Unknown message type %i", req->type);
++ ret = 0;
++ }
++
++ spin_unlock(&req->lock);
++
++ if (copy_to_user(buffer, &hdr, sizeof(hdr)))
++ return -EFAULT;
++ if (copy_to_user(buffer + sizeof(hdr), &msgs, hdr.payload_len))
++ return -EFAULT;
++
++ ret = sizeof(hdr) + hdr.payload_len;
++
++ if ((req->type != DM_USERSPACE_MAP_BLOCK_REQ) &&
++ (req->type != DM_USERSPACE_SYNC_COMPLETE)) {
++ /* Only some requests get responses, so we take others
++ * off the request queue here
++ */
++ spin_lock(&req->dev->lock);
++ list_del(&req->list);
++ spin_unlock(&req->dev->lock);
++ mempool_free(req, request_pool);
++ }
++
++ return ret;
++}
++
++struct userspace_request *pluck_next_request(struct dmu_device *dev,
++ int size_available)
++{
++ struct userspace_request *req, *match = NULL;
++
++ spin_lock(&dev->lock);
++
++ list_for_each_entry(req, &dev->requests, list) {
++ spin_lock(&req->lock);
++ if (!req->sent) {
++ if (dmu_get_msg_len(req->type) < size_available) {
++ req->sent = 1;
++ match = req;
++ } else {
++ /* Must break here to preserve order */
++ spin_unlock(&req->lock);
++ break;
++ }
++ }
++ spin_unlock(&req->lock);
++
++ if (match)
++ break;
++ }
++
++ spin_unlock(&dev->lock);
++
++ return match;
++}
++
++ssize_t dmu_ctl_read(struct file *file, char __user *buffer,
++ size_t size, loff_t *offset)
++{
++
++ struct dmu_device *dev = (struct dmu_device *)file->private_data;
++ struct userspace_request *req = NULL;
++ int ret = 0, r;
++
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
++
++ while (!have_pending_requests(dev)) {
++ if (file->f_flags & O_NONBLOCK) {
++ return 0;
++ }
++
++ if (wait_event_interruptible(dev->wqueue,
++ have_pending_requests(dev)))
++ return -ERESTARTSYS;
++ }
++
++ while(ret < size) {
++ req = pluck_next_request(dev, size - ret);
++ if (!req)
++ /* One or more of the following conditions is true:
++ * 1. No more requests available for sending
++ * 2. No more room in the outgoing buffer
++ */
++ break;
++
++ r = send_userspace_message((void *)(buffer + ret), req);
++ if (r == 0)
++ continue;
++ else if (r < 0)
++ return r;
++
++ ret += r;
++ }
++
++ return ret;
++}
++
++/*
++ * Returns:
++ * 1 if we're chained to our parent
++ * 0 if parent is valid and was removed
++ * -1 if we gave our bios to the invalid parent
++ */
++static int handle_parent_remap(struct dmu_map *parent,
++ struct dmu_map *remap,
++ struct dmu_msg_map_response *msg)
++{
++ int ret = 0;
++ int free_parent = 0;
++ unsigned long flags;
++
++ spin_lock_irqsave(&parent->lock, flags);
++
++ if (!dmu_get_flag(&parent->flags, DMU_FLAG_INUSE)) {
++ /* This is in the process of being destroyed,
++ * so we can't use it
++ */
++ goto end_parent;
++ }
++
++ if (!dmu_get_flag(&parent->flags, DMU_FLAG_VALID)) {
++ if (dmu_get_flag(&parent->flags, DMU_FLAG_WR) ==
++ dmu_get_flag(&msg->flags, DMU_FLAG_WR) &&
++ (parent->new_block == msg->new_block)) {
++ /* Perms match for this not-yet-valid remap,
++ so tag our bios on to it and bail */
++ bio_list_merge(&parent->bios, &remap->bios);
++ bio_list_init(&remap->bios);
++ ret = -1;
++ } else {
++ /* Remove parent from remap table, and
++ * chain our new remap to this one so
++ * it will fire when parent goes
++ * valid
++ */
++ list_del_init(&parent->list);
++ if (parent->next) {
++ DMERR("Parent already chained!");
++ BUG();
++ }
++ parent->next = remap;
++ dmu_set_flag(&parent->flags, DMU_FLAG_TEMPORARY);
++ ret = 1;
++ }
++ } else {
++ /* Remove existing valid remap */
++ free_parent = 1;
++ }
++
++ end_parent:
++ if (free_parent)
++ __free_remap(parent);
++
++ spin_unlock_irqrestore(&parent->lock, flags);
++
++ return ret;
++}
++
++static int remap_request(struct dmu_msg_map_response *msg,
++ struct dmu_device *dev, uint32_t id)
++{
++ struct dmu_map *remap = NULL, *parent = NULL;
++ struct target_device *s_dev = NULL, *d_dev = NULL;
++ int is_chained = 0;
++ struct userspace_request *cursor, *next, *req = NULL;
++
++ /* See if we have a pending request that matches */
++ spin_lock(&dev->lock);
++ list_for_each_entry_safe(cursor, next, &dev->requests, list) {
++ if ((cursor->type == DM_USERSPACE_MAP_BLOCK_REQ) &&
++ (cursor->id == msg->id_of_req)) {
++ req = cursor;
++ list_del(&req->list);
++ break;
++ }
++ }
++ spin_unlock(&dev->lock);
++
++ if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) {
++ s_dev = find_target(dev, MKDEV(msg->src_maj, msg->src_min));
++ if (!s_dev) {
++ DMERR("Failed to find src device %i:%i",
++ msg->src_maj, msg->src_min);
++ goto bad;
++ }
++ }
++
++ d_dev = find_target(dev, MKDEV(msg->dst_maj, msg->dst_min));
++ if (!d_dev) {
++ DMERR("Failed to find dest device %i:%i",
++ msg->dst_maj, msg->dst_min);
++ goto bad;
++ }
++
++ if (req) {
++ while (atomic_read(&req->refcnt) != 0)
++ /* Wait for exclusive use of request. Even
++ * though we have removed it from the list,
++ * someone still has a pointer to it, which
++ * means we must wait for them to finish with
++ * it before continuing.
++ */
++ schedule();
++ remap = req->remap;
++ mempool_free(req, request_pool);
++ } else {
++ /* Allocate a new remap early (before grabbing locks),
++ * since we will most likely need it, and we didn't
++ * get one with the request
++ */
++ /* FIXME */
++ remap = alloc_remap_atomic(dev);
++ if (!remap) {
++ DMERR("Failed to alloc remap!");
++ goto bad;
++ }
++ init_remap(dev, remap);
++ }
++
++ spin_lock(&dev->lock);
++
++ /* FIXME: Now that we pass the remap with the req, do we need
++ IRQs disabled here? */
++ spin_lock(&remap->lock);
++ remap->org_block = msg->org_block;
++
++ /* Now, we insert the new remap into the table, and remove the
++ * existing map, if present, all while the device is locked
++ */
++
++ parent = ht_find_map(&dev->remaps, msg->org_block);
++ if (parent) {
++ is_chained = handle_parent_remap(parent, remap, msg);
++ if (is_chained < 0) {
++ __free_remap(remap);
++ spin_unlock(&remap->lock);
++ spin_unlock(&dev->lock);
++ return 1;
++ }
++ }
++
++ if (dmu_get_flag(&msg->flags, DMU_FLAG_SYNC))
++ dmu_set_flag(&remap->flags, DMU_FLAG_WAITING);
++
++ remap->new_block = msg->new_block;
++ remap->offset = msg->offset;
++ remap->src = s_dev;
++ remap->dest = d_dev;
++ remap->dev = dev;
++ remap->id = id;
++
++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_COPY_FIRST);
++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_TEMPORARY);
++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_SYNC);
++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_WR);
++ dmu_cpy_flag(&remap->flags, msg->flags, DMU_FLAG_RD);
++ dmu_clr_flag(&remap->flags, DMU_FLAG_VALID);
++
++ spin_unlock(&remap->lock);
++
++ ht_insert_map(&dev->remaps, remap);
++
++ spin_unlock(&dev->lock);
++
++ if (! is_chained)
++ copy_or_flush(remap);
++
++ return 1;
++
++ bad:
++ DMERR("Remap error: chaos may ensue");
++
++ return 0;
++}
++
++/*
++ * Adds the request to the front of the queue so it's picked up first
++ */
++static void add_urgent_request(struct dmu_device *dev,
++ struct userspace_request *req)
++{
++ spin_lock(&dev->lock);
++ list_add(&req->list, &dev->requests);
++ spin_unlock(&dev->lock);
++
++ wake_up(&dev->wqueue);
++}
++
++static int version_request(struct dmu_msg_version *msg,
++ struct dmu_device *dev, uint32_t id)
++{
++ struct userspace_request *req;
++
++ req = mempool_alloc(request_pool, GFP_NOIO);
++ if (!req) {
++ DMERR("Failed to alloc version response");
++ return 0;
++ }
++
++ init_request(dev, DM_USERSPACE_GET_VERSION, req);
++ add_urgent_request(dev, req);
++
++ return 1;
++}
++
++static int invalidate_request(struct dmu_msg_invalidate_map *msg,
++ struct dmu_device *dev, uint32_t id)
++{
++ struct dmu_map *remap;
++ struct userspace_request *req;
++ int ret = 1;
++ unsigned long flags;
++
++ remap = ht_find_map_dev(dev, msg->org_block);
++ if (!remap)
++ ret = 0;
++ else {
++ spin_lock(&dev->lock);
++ spin_lock_irqsave(&remap->lock, flags);
++ if (dmu_get_flag(&remap->flags, DMU_FLAG_VALID))
++ ht_delete_map(&dev->remaps, remap);
++ else
++ ret = 0;
++ spin_unlock_irqrestore(&remap->lock, flags);
++ spin_unlock(&dev->lock);
++ }
++
++ req = mempool_alloc(request_pool, GFP_NOIO);
++ if (!req) {
++ DMERR("Failed to allocate request");
++ return 0;
++ }
++
++ if (ret)
++ init_request(dev, DM_USERSPACE_INVAL_COMPLETE, req);
++ else
++ init_request(dev, DM_USERSPACE_INVAL_FAILED, req);
++
++ req->u.block = msg->org_block;
++ req->id = id;
++
++ add_request(dev, req);
++
++ return ret;
++}
++
++static void sync_complete(struct dmu_device *dev, uint32_t id_of_op) {
++ struct dmu_map *remap = NULL;
++ struct bio *bio;
++ struct userspace_request *req, *next;
++ unsigned long flags;
++
++ spin_lock(&dev->lock);
++ list_for_each_entry_safe(req, next, &dev->requests, list) {
++ if (req->id == id_of_op) {
++ list_del(&req->list);
++ break;
++ }
++ }
++ spin_unlock(&dev->lock);
++
++ if (!req) {
++ DMERR("Unable to complete unknown request: %u\n",
++ id_of_op);
++ return;
++ }
++
++ while (atomic_read(&req->refcnt) != 0)
++ /* Wait for exclusive use of request. Even
++ * though we have removed it from the list,
++ * someone still has a pointer to it, which
++ * means we must wait for them to finish with
++ * it before continuing.
++ */
++ schedule();
++
++ remap = req->remap;
++ mempool_free(req, request_pool);
++
++ if (remap) {
++ spin_lock_irqsave(&remap->lock, flags);
++ dmu_clr_flag(&remap->flags, DMU_FLAG_WAITING);
++ spin_unlock_irqrestore(&remap->lock, flags);
++ while(1) {
++ spin_lock_irqsave(&remap->lock, flags);
++ bio = remap->bios_waiting.head;
++ spin_unlock_irqrestore(&remap->lock, flags);
++ if (!bio)
++ break;
++ bio->bi_end_io(bio, 0, 0);
++ }
++ } else {
++ DMERR("Unable to complete empty request: %u\n",
++ id_of_op);
++ }
++}
++
++ssize_t dmu_ctl_write(struct file *file, const char __user *buffer,
++ size_t size, loff_t *offset)
++{
++ struct dmu_device *dev = (struct dmu_device *)file->private_data;
++ int ret = 0;
++ struct dmu_msg_header hdr;
++ union {
++ struct dmu_msg_map_response map_rsp;
++ struct dmu_msg_invalidate_map inval_rsp;
++ struct dmu_msg_version ver_req;
++ struct dmu_msg_status status_rsp;
++ } msgs;
++
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
++
++ while ((ret + sizeof(hdr)) < size) {
++ if (copy_from_user(&hdr, buffer+ret, sizeof(hdr))) {
++ DMERR("%s copy_from_user failed!", __FUNCTION__);
++ ret = -EFAULT;
++ goto out;
++ }
++
++ ret += sizeof(hdr);
++
++ switch (hdr.msg_type) {
++
++ case DM_USERSPACE_GET_VERSION:
++ if (hdr.payload_len != sizeof(msgs.ver_req)) {
++ DMERR("Malformed version request");
++ break;
++ }
++
++ if (copy_from_user(&msgs.ver_req, buffer+ret,
++ sizeof(msgs.ver_req))) {
++ DMERR("%s copy_from_user failed!",
++ __FUNCTION__);
++ ret = -EFAULT;
++ goto out;
++ }
++
++ version_request(&msgs.ver_req, dev, hdr.id);
++ break;
++
++ case DM_USERSPACE_MAP_BLOCK_RESP:
++ if (hdr.payload_len != sizeof(msgs.map_rsp)) {
++ DMERR("Malformed block response");
++ break;
++ }
++
++ if (copy_from_user(&msgs.map_rsp, buffer+ret,
++ sizeof(msgs.map_rsp))) {
++ DMERR("%s copy_from_user failed!",
++ __FUNCTION__);
++ ret = -EFAULT;
++ goto out;
++ }
++
++ remap_request(&msgs.map_rsp, dev,
++ msgs.map_rsp.id_of_req);
++ break;
++
++ case DM_USERSPACE_MAP_FAILED:
++ if (hdr.payload_len != sizeof(msgs.map_rsp)) {
++ DMERR("Malformed block failed response");
++ break;
++ }
++
++ if (copy_from_user(&msgs.map_rsp, buffer+ret,
++ sizeof(msgs.map_rsp))) {
++ DMERR("%s copy_from_user failed",
++ __FUNCTION__);
++ ret = -EFAULT;
++ goto out;
++ }
++
++ DMERR("Userspace map failed");
++ break;
++
++ case DM_USERSPACE_MAP_INVALIDATE:
++ if (hdr.payload_len != sizeof(msgs.inval_rsp)) {
++ DMERR("Malformed invalidate request");
++ break;
++ }
++
++ if (copy_from_user(&msgs.inval_rsp, buffer+ret,
++ sizeof(msgs.inval_rsp))) {
++ DMERR("%s copy_from_user failed",
++ __FUNCTION__);
++ ret = -EFAULT;
++ goto out;
++ }
++
++ invalidate_request(&msgs.inval_rsp, dev, hdr.id);
++ break;
++
++ case DM_USERSPACE_STATUS:
++ if (hdr.payload_len != sizeof(msgs.status_rsp)) {
++ DMERR("Malformed invalidate request");
++ break;
++ }
++
++ if (copy_from_user(&msgs.status_rsp, buffer+ret,
++ sizeof(msgs.status_rsp))) {
++ DMERR("%s copy_from_user failed",
++ __FUNCTION__);
++ ret = -EFAULT;
++ goto out;
++ }
++
++ if (msgs.status_rsp.status ==
++ DM_USERSPACE_SYNC_COMPLETE) {
++ /* FIXME: check req */
++ sync_complete(dev, msgs.status_rsp.id_of_op);
++ }
++ break;
++
++ default:
++ DMWARN("Unknown request type: %i", hdr.msg_type);
++ }
++
++ ret += hdr.payload_len;
++ }
++ out:
++ return ret;
++}
++
++int dmu_ctl_open(struct inode *inode, struct file *file)
++{
++ struct chardev_transport *t;
++ struct dmu_device *dev;
++
++ if (!capable(CAP_SYS_ADMIN))
++ return -EACCES;
++
++ t = container_of(inode->i_cdev, struct chardev_transport, cdev);
++ dev = t->parent;
++
++ get_dev(dev);
++
++ file->private_data = dev;
++
++ return 0;
++}
++
++int dmu_ctl_release(struct inode *inode, struct file *file)
++{
++ struct dmu_device *dev;
++
++ dev = (struct dmu_device *)file->private_data;
++
++ put_dev(dev);
++
++ return 0;
++}
++
++unsigned dmu_ctl_poll(struct file *file, poll_table *wait)
++{
++ struct dmu_device *dev = (struct dmu_device *)file->private_data;
++ unsigned mask = 0;
++
++ poll_wait(file, &dev->wqueue, wait);
++
++ if (have_pending_requests(dev))
++ mask |= POLLIN | POLLRDNORM;
++
++ return mask;
++}
++
++static struct file_operations ctl_fops = {
++ .open = dmu_ctl_open,
++ .release = dmu_ctl_release,
++ .read = dmu_ctl_read,
++ .write = dmu_ctl_write,
++ .poll = dmu_ctl_poll,
++ .owner = THIS_MODULE,
++};
++
++static int get_free_minor(void)
++{
++ struct dmu_device *dev;
++ int minor = 0;
++
++ spin_lock(&devices_lock);
++
++ while (1) {
++ list_for_each_entry(dev, &devices, list) {
++ struct chardev_transport *t = dev->transport_private;
++ if (MINOR(t->ctl_dev) == minor)
++ goto dupe;
++ }
++ break;
++ dupe:
++ minor++;
++ }
++
++ spin_unlock(&devices_lock);
++
++ return minor;
++}
++
++int register_chardev_transport(struct dmu_device *dev)
++{
++ struct chardev_transport *t;
++ int ret;
++
++ dev->transport_private = kmalloc(sizeof(struct chardev_transport),
++ GFP_KERNEL);
++ t = dev->transport_private;
++
++ if (!t) {
++ DMERR("Failed to allocate chardev transport");
++ goto bad;
++ }
++
++ t->ctl_dev = MKDEV(MAJOR(dmu_dev), get_free_minor());
++ t->parent = dev;
++
++ cdev_init(&t->cdev, &ctl_fops);
++ t->cdev.owner = THIS_MODULE;
++ t->cdev.ops = &ctl_fops;
++
++ ret = cdev_add(&t->cdev, t->ctl_dev, 1);
++ if (ret < 0) {
++ DMERR("Failed to register control device %d:%d",
++ MAJOR(t->ctl_dev), MINOR(t->ctl_dev));
++ goto bad;
++ }
++
++ return 1;
++
++ bad:
++ kfree(t);
++ return 0;
++}
++
++void unregister_chardev_transport(struct dmu_device *dev)
++{
++ struct chardev_transport *t = dev->transport_private;
++
++ cdev_del(&t->cdev);
++ kfree(t);
++}
++
++int init_chardev_transport(void)
++{
++ int r;
++
++ r = alloc_chrdev_region(&dmu_dev, 0, 10, "dm-userspace");
++ if (r) {
++ DMERR("Failed to allocate chardev region");
++ return 0;
++ } else
++ return 1;
++}
++
++void cleanup_chardev_transport(void)
++{
++ unregister_chrdev_region(dmu_dev, 10);
++}
++
++void write_chardev_transport_info(struct dmu_device *dev,
++ char *buf, unsigned int maxlen)
++{
++ struct chardev_transport *t = dev->transport_private;
++
++ snprintf(buf, maxlen, "%x:%x",
++ MAJOR(t->ctl_dev), MINOR(t->ctl_dev));
++}
+diff -purN ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.h ./drivers/md/dm-userspace.h
+--- ../pristine-linux-2.6.16.13/drivers/md/dm-userspace.h 1969-12-31 18:00:00.000000000 -0600
++++ ./drivers/md/dm-userspace.h 2006-08-16 18:48:18.000000000 -0500
+@@ -0,0 +1,147 @@
++/*
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Dan Smith <danms@us.ibm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ *
++ */
++
++#ifndef __DM_USERSPACE_H
++#define __DM_USERSPACE_H
++
++#include <linux/types.h>
++
++/*
++ * Message Types
++ */
++#define DM_USERSPACE_GET_VERSION 1
++#define DM_USERSPACE_MAP_BLOCK_REQ 2
++#define DM_USERSPACE_MAP_BLOCK_RESP 3
++#define DM_USERSPACE_MAP_FAILED 4
++#define DM_USERSPACE_MAP_INVALIDATE 5
++#define DM_USERSPACE_STATUS 6
++
++/*
++ * Status codes
++ */
++#define DM_USERSPACE_INVAL_COMPLETE 101
++#define DM_USERSPACE_INVAL_FAILED 102
++#define DM_USERSPACE_SYNC_COMPLETE 103
++
++/*
++ * Flags and associated macros
++ */
++#define DMU_FLAG_VALID 1
++#define DMU_FLAG_RD 2
++#define DMU_FLAG_WR 4
++#define DMU_FLAG_COPY_FIRST 8
++#define DMU_FLAG_TEMPORARY 16
++#define DMU_FLAG_INUSE 32
++#define DMU_FLAG_SYNC 64
++#define DMU_FLAG_WAITING 128
++
++static int dmu_get_flag(uint32_t *flags, uint32_t flag)
++{
++ return (*flags & flag) != 0;
++}
++
++static void dmu_set_flag(uint32_t *flags, uint32_t flag)
++{
++ *flags |= flag;
++}
++
++static void dmu_clr_flag(uint32_t *flags, uint32_t flag)
++{
++ *flags &= (~flag);
++}
++
++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag)
++{
++ *flags = (*flags & ~flag) | (src & flag);
++}
++
++/*
++ * This message header is sent in front of every message, in both
++ * directions
++ */
++struct dmu_msg_header {
++ uint32_t msg_type;
++ uint32_t payload_len;
++ uint32_t id;
++};
++
++/* DM_USERSPACE_GET_VERSION */
++struct dmu_msg_version {
++ uint32_t userspace_ver;
++ uint32_t kernel_ver;
++};
++
++/* For status codes */
++struct dmu_msg_status {
++ uint32_t id_of_op;
++ uint32_t status;
++};
++
++/* DM_USERSPACE_MAP_BLOCK_REQ */
++struct dmu_msg_map_request {
++ uint64_t org_block;
++
++ uint32_t flags;
++};
++
++/* DM_USERSPACE_MAP_BLOCK_RESP
++ * DM_USERSPACE_MAP_BLOCK_FAILED
++ */
++struct dmu_msg_map_response {
++ uint64_t org_block;
++ uint64_t new_block;
++ int64_t offset;
++
++ uint32_t id_of_req;
++ uint32_t flags;
++
++ uint32_t src_maj;
++ uint32_t src_min;
++
++ uint32_t dst_maj;
++ uint32_t dst_min;
++};
++
++/* DM_USERSPACE_MAP_INVALIDATE */
++struct dmu_msg_invalidate_map {
++ uint64_t org_block;
++};
++
++static inline int dmu_get_msg_len(int type)
++{
++ switch (type) {
++ case DM_USERSPACE_GET_VERSION:
++ return sizeof(struct dmu_msg_version);
++ case DM_USERSPACE_INVAL_COMPLETE:
++ case DM_USERSPACE_INVAL_FAILED:
++ case DM_USERSPACE_STATUS:
++ return sizeof(struct dmu_msg_status);
++ case DM_USERSPACE_MAP_BLOCK_REQ:
++ return sizeof(struct dmu_msg_map_request);
++ case DM_USERSPACE_MAP_BLOCK_RESP:
++ case DM_USERSPACE_MAP_FAILED:
++ return sizeof(struct dmu_msg_map_response);
++ case DM_USERSPACE_MAP_INVALIDATE:
++ return sizeof(struct dmu_msg_invalidate_map);
++ default:
++ return -1;
++ };
++}
++
++#endif
+diff -purN ../pristine-linux-2.6.16.13/drivers/md/Kconfig ./drivers/md/Kconfig
+--- ../pristine-linux-2.6.16.13/drivers/md/Kconfig 2006-05-02 16:38:44.000000000 -0500
++++ ./drivers/md/Kconfig 2006-08-16 18:48:18.000000000 -0500
+@@ -210,6 +210,12 @@ config DM_SNAPSHOT
+ ---help---
+ Allow volume managers to take writeable snapshots of a device.
+
++config DM_USERSPACE
++ tristate "Userspace target (EXPERIMENTAL)"
++ depends on BLK_DEV_DM && EXPERIMENTAL
++ ---help---
++ A target that provides a userspace interface to device-mapper
++
+ config DM_MIRROR
+ tristate "Mirror target (EXPERIMENTAL)"
+ depends on BLK_DEV_DM && EXPERIMENTAL
+diff -purN ../pristine-linux-2.6.16.13/drivers/md/Makefile ./drivers/md/Makefile
+--- ../pristine-linux-2.6.16.13/drivers/md/Makefile 2006-05-02 16:38:44.000000000 -0500
++++ ./drivers/md/Makefile 2006-08-16 18:48:18.000000000 -0500
+@@ -14,6 +14,7 @@ raid6-objs := raid6main.o raid6algos.o r
+ raid6altivec1.o raid6altivec2.o raid6altivec4.o \
+ raid6altivec8.o \
+ raid6mmx.o raid6sse1.o raid6sse2.o
++dm-user-objs := dm-userspace.o dm-userspace-chardev.o
+ hostprogs-y := mktables
+
+ # Note: link order is important. All raid personalities
+@@ -37,6 +38,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc
+ obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o
+ obj-$(CONFIG_DM_MIRROR) += dm-mirror.o
+ obj-$(CONFIG_DM_ZERO) += dm-zero.o
++obj-$(CONFIG_DM_USERSPACE) += dm-user.o
+
+ quiet_cmd_unroll = UNROLL $@
+ cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \
+diff -purN ../pristine-linux-2.6.16.13/include/linux/dm-userspace.h ./include/linux/dm-userspace.h
+--- ../pristine-linux-2.6.16.13/include/linux/dm-userspace.h 1969-12-31 18:00:00.000000000 -0600
++++ ./include/linux/dm-userspace.h 2006-08-16 18:48:28.000000000 -0500
+@@ -0,0 +1,147 @@
++/*
++ * Copyright (C) International Business Machines Corp., 2006
++ * Author: Dan Smith <danms@us.ibm.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; under version 2 of the License.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++ *
++ */
++
++#ifndef __DM_USERSPACE_H
++#define __DM_USERSPACE_H
++
++#include <linux/types.h>
++
++/*
++ * Message Types
++ */
++#define DM_USERSPACE_GET_VERSION 1
++#define DM_USERSPACE_MAP_BLOCK_REQ 2
++#define DM_USERSPACE_MAP_BLOCK_RESP 3
++#define DM_USERSPACE_MAP_FAILED 4
++#define DM_USERSPACE_MAP_INVALIDATE 5
++#define DM_USERSPACE_STATUS 6
++
++/*
++ * Status codes
++ */
++#define DM_USERSPACE_INVAL_COMPLETE 101
++#define DM_USERSPACE_INVAL_FAILED 102
++#define DM_USERSPACE_SYNC_COMPLETE 103
++
++/*
++ * Flags and associated macros
++ */
++#define DMU_FLAG_VALID 1
++#define DMU_FLAG_RD 2
++#define DMU_FLAG_WR 4
++#define DMU_FLAG_COPY_FIRST 8
++#define DMU_FLAG_TEMPORARY 16
++#define DMU_FLAG_INUSE 32
++#define DMU_FLAG_SYNC 64
++#define DMU_FLAG_WAITING 128
++
++static int dmu_get_flag(uint32_t *flags, uint32_t flag)
++{
++ return (*flags & flag) != 0;
++}
++
++static void dmu_set_flag(uint32_t *flags, uint32_t flag)
++{
++ *flags |= flag;
++}
++
++static void dmu_clr_flag(uint32_t *flags, uint32_t flag)
++{
++ *flags &= (~flag);
++}
++
++static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag)
++{
++ *flags = (*flags & ~flag) | (src & flag);
++}
++
++/*
++ * This message header is sent in front of every message, in both
++ * directions
++ */
++struct dmu_msg_header {
++ uint32_t msg_type;
++ uint32_t payload_len;
++ uint32_t id;
++};
++
++/* DM_USERSPACE_GET_VERSION */
++struct dmu_msg_version {
++ uint32_t userspace_ver;
++ uint32_t kernel_ver;
++};
++
++/* For status codes */
++struct dmu_msg_status {
++ uint32_t id_of_op;
++ uint32_t status;
++};
++
++/* DM_USERSPACE_MAP_BLOCK_REQ */
++struct dmu_msg_map_request {
++ uint64_t org_block;
++
++ uint32_t flags;
++};
++
++/* DM_USERSPACE_MAP_BLOCK_RESP
++ * DM_USERSPACE_MAP_BLOCK_FAILED
++ */
++struct dmu_msg_map_response {
++ uint64_t org_block;
++ uint64_t new_block;
++ int64_t offset;
++
++ uint32_t id_of_req;
++ uint32_t flags;
++
++ uint32_t src_maj;
++ uint32_t src_min;
++
++ uint32_t dst_maj;
++ uint32_t dst_min;
++};
++
++/* DM_USERSPACE_MAP_INVALIDATE */
++struct dmu_msg_invalidate_map {
++ uint64_t org_block;
++};
++
++static inline int dmu_get_msg_len(int type)
++{
++ switch (type) {
++ case DM_USERSPACE_GET_VERSION:
++ return sizeof(struct dmu_msg_version);
++ case DM_USERSPACE_INVAL_COMPLETE:
++ case DM_USERSPACE_INVAL_FAILED:
++ case DM_USERSPACE_STATUS:
++ return sizeof(struct dmu_msg_status);
++ case DM_USERSPACE_MAP_BLOCK_REQ:
++ return sizeof(struct dmu_msg_map_request);
++ case DM_USERSPACE_MAP_BLOCK_RESP:
++ case DM_USERSPACE_MAP_FAILED:
++ return sizeof(struct dmu_msg_map_response);
++ case DM_USERSPACE_MAP_INVALIDATE:
++ return sizeof(struct dmu_msg_invalidate_map);
++ default:
++ return -1;
++ };
++}
++
++#endif
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 2 of 6] dm-userspace userspace tool base patch
[not found] <patchbomb.1156540578@venkman-64>
2006-08-25 21:23 ` [PATCH 1 of 6] dm-userspace xen kernel patch Ryan Grimm
@ 2006-08-25 21:23 ` Ryan Grimm
2006-08-25 21:24 ` [PATCH 3 of 6] dm-userspace internal libdmu support for userspace tool Ryan Grimm
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Ryan Grimm @ 2006-08-25 21:23 UTC (permalink / raw)
To: Xen Devel; +Cc: Dan Smith
Signed-off-by: Ryan Grimm <grimm@us.ibm.com>
Signed-off-by: Dan Smith <danms@us.ibm.com>
# HG changeset patch
# User Ryan Grimm <grimm@us.ibm.com>
# Date 1156536093 18000
# Node ID 7ca9885684d9eaeef4422d52f9ae9efd033650d0
# Parent 2cb702dcea0e44dcfb9c243943d3e523245ad495
dm-userspace userspace tool base patch
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/Makefile
--- a/tools/Makefile Fri Aug 25 10:58:10 2006 -0500
+++ b/tools/Makefile Fri Aug 25 15:01:33 2006 -0500
@@ -31,6 +31,7 @@ all: check
$(MAKE) -C $$subdir $@; \
done
$(MAKE) ioemu
+ $(MAKE) cowd
.PHONY: install
install: check
@@ -38,6 +39,7 @@ install: check
$(MAKE) -C $$subdir $@; \
done
$(MAKE) ioemuinstall
+ $(MAKE) cowdinstall
$(INSTALL_DIR) -p $(DESTDIR)/var/xen/dump
.PHONY: clean
@@ -46,6 +48,7 @@ clean: check_clean
$(MAKE) -C $$subdir $@; \
done
$(MAKE) ioemuclean
+ $(MAKE) cowdclean
.PHONY: distclean
distclean: clean
@@ -71,3 +74,11 @@ ioemu ioemuinstall ioemuclean:
ioemu ioemuinstall ioemuclean:
endif
+.PHONY: cowd cowdinstall cowclean
+cowd/Makefile:
+ -which libtoolize && which aclocal && which automake && \
+ cd cowd && sh autogen && sh configure
+cowd cowdinstall: cowd/Makefile
+ -$(MAKE) -C cowd $(patsubst cowd%,%,$@)
+cowdclean:
+ [ -f ./cowd/Makefile ] && $(MAKE) -C cowd clean || true
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/Makefile.am
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/Makefile.am Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,11 @@
+bin_PROGRAMS = cowd
+
+cowd_SOURCES = cowd.c util.c cowd_loader.c cowd_control_loop.c \
+ cowd_plugin.h cowd.h cowd_loader.h cowd_ll.c cowd_ll.h
+cowd_CFLAGS = -I/lib/modules/`uname -r`/build/include \
+ -DDEFAULT_PLUGIN_DIR=\"@PLUGIN_DIR@\" @GLOBAL_CFLAGS@
+cowd_LDADD = -ldevmapper -lltdl
+cowd_LDFLAGS = -rdynamic -L./lib
+
+clean-local:
+ rm -f *~
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/README Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,68 @@
+***
+*** dm-userspace cow daemon
+***
+
+The tools in this directory are the userspace-side of a functioning
+dm-userspace system. The 'cowd' daemon is responsible for communicating
+with the kernel module and passing requests to a loadable plugin.
+
+##############
+## Building ##
+##############
+
+Make sure you have the following packages on your system:
+ A patched device-mapper (for libdevmapper.so)
+ ltdl-devel (for ltdl.h and ltdl.so)
+
+A patch against the device-mapper package is available here:
+
+ http://static.danplanet.com/dm-userspace/
+
+Once you have an appropriately-patched device-mapper library, simply
+run the following:
+
+ % ./configure
+ % make
+
+And then as root:
+
+ # make install
+
+#############
+## Running ##
+#############
+
+First, you must load the kernel module. If you have a patched kernel,
+then run:
+
+ # modprobe dm-user
+
+if not, build the module and insert it manually:
+
+ # insmod ./dm-user.ko
+
+The following will create a /dev/mapper/mycow device, using the
+image.qcow file:
+
+ # ./cowd -p qcow mycow image.qcow
+
+Note that qcow support is a little shaky at the moment. It's probably
+a better idea to use the dscow plugin. This will create a foo.dscow
+file with a 64k block size:
+
+ # ./plugins/dscow_tool foo.dscow /path/to/base.img 64
+
+Then load it into cowd as such:
+
+ # ./cowd -p dscow mycow foo.dscow
+
+You might also want to enable verbose output with "-v" or even
+debugging output with "-d", so you can watch the magic. Adding "-n"
+in either situation would be a good idea.
+
+After starting the daemon, you can then use /dev/mapper/mycow as a
+normal block device. Reads to unmodified blocks will go directly to
+the base device (specified when image.qcow was created). Writes will
+trigger a block copy from the base image to image.qcow, followed by a
+write of the changes to image.qcow. Subsequent reads will go directly
+to the remapped block in image.qcow.
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/autogen
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/autogen Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,5 @@
+#!/bin/sh
+libtoolize --force
+aclocal
+automake --add-missing --copy --foreign
+autoconf
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/configure.in
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/configure.in Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,77 @@
+AC_PREREQ(2.59)
+AC_INIT(cowd, 0.4.0)
+AC_CONFIG_AUX_DIR(.)
+AM_INIT_AUTOMAKE
+
+GLOBAL_CFLAGS="-Werror"
+
+libdevmapper_error() {
+ echo "*************************************************************"
+ echo "* ERROR: You need a newer version of libdevmapper for cowd. *"
+ echo "* The version of libdevmapper on this system does *"
+ echo "* not contain dm-userspace support *"
+ echo "* *"
+ echo "*************************************************************"
+
+ exit 1
+}
+
+AC_CONFIG_SRCDIR([cowd_plugin.h])
+# AC_CONFIG_HEADER([config.h])
+
+AC_ARG_WITH(plugindir,
+ [AC_HELP_STRING([--with-plugindir=<dir>],[Location of plugins])],
+ PLUGIN_DIR=$withval,
+ PLUGIN_DIR=$libdir)
+
+AC_ARG_ENABLE(gcov,
+ [AC_HELP_STRING([--enable-gcov],
+ [Enable coverage analysis])],
+ COVERAGE="-fprofile-arcs -ftest-coverage",
+ COVERAGE="")
+
+# Checks for programs.
+AC_PROG_CC
+AC_PROG_LIBTOOL
+
+# Checks for libraries.
+AC_CHECK_LIB([devmapper], [dm_task_create],, exit)
+AC_CHECK_LIB([ltdl], [lt_dlsym],, exit)
+AC_CHECK_LIB([devmapper], [dmu_ctl_open],, libdevmapper_error)
+
+if test -z "$COVERAGE"; then
+ GLOBAL_CFLAGS="$GLOBAL_CFLAGS"
+else
+ GLOBAL_CFLAGS="$COVERAGE $GLOBAL_CFLAGS"
+ AC_CHECK_LIB([gcov], [__gcov_init])
+fi
+
+# Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS([fcntl.h inttypes.h netinet/in.h stdint.h stdlib.h \
+ string.h sys/ioctl.h unistd.h ltdl.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_C_INLINE
+AC_TYPE_PID_T
+AC_CHECK_MEMBERS([struct stat.st_rdev])
+
+# Checks for library functions.
+AC_FUNC_FORK
+AC_PROG_GCC_TRADITIONAL
+AC_FUNC_MALLOC
+AC_TYPE_SIGNAL
+AC_FUNC_STAT
+AC_CHECK_FUNCS([memset strtol strtoull])
+
+AC_SUBST(PLUGIN_DIR)
+AC_SUBST(GLOBAL_CFLAGS)
+
+AC_CONFIG_FILES([Makefile])
+
+# This just makes it easier to run cowd from the source directory
+# for testing
+mkdir -p lib
+
+AC_OUTPUT
+
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/cowd.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/cowd.c Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,453 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sched.h>
+#include <errno.h>
+#include <signal.h>
+#include <wait.h>
+#include <getopt.h>
+#include <syslog.h>
+
+#include <libdevmapper.h>
+
+#include "cowd.h"
+#include "cowd_plugin.h"
+#include "cowd_loader.h"
+
+#define ERR_LEN 1024
+
+/* global control variables */
+int running;
+struct config_struct config;
+
+int initialize_plugin(struct cow_device *dev, char *name)
+{
+ if (! load_plugin(&dev->plugin, name)) {
+ printf("Loading %s failed: %s\n",
+ name, dev->plugin.errmsg);
+ return -1;
+ }
+
+ if (dev->plugin.init_plugin(dev, config.debug) != PLUGIN_OK) {
+ printf("Initializing %s failed: %s\n",
+ name, dev->plugin.errmsg);
+ return -1;
+ }
+
+ if (config.verbose) {
+ printf("Device %s: %lu blocks @ %lu KB\n",
+ dev->name,
+ dev->blocks,
+ dev->block_size >> 10);
+ }
+
+ return 1;
+}
+
+void make_dm_node(struct cow_device *dev)
+{
+ struct dm_task *task;
+ dev_t devno;
+ char filename[256];
+
+ snprintf(filename, 256, "/dev/mapper/%s", dev->name);
+
+ task = dm_task_create(DM_DEVICE_INFO);
+ dm_task_set_name(task, dev->name);
+ if (!dm_task_run(task)) {
+ fprintf(stderr,
+ "Failed to get info for device %s\n", dev->name);
+ return;
+ }
+
+ if (!dm_task_get_info(task, &dev->info)) {
+ fprintf(stderr,
+ "Failed to get info for device %s\n", dev->name);
+ return;
+ }
+
+ devno = MKDEV(dev->info.major, dev->info.minor);
+
+ if (config.debug)
+ printf("Creating /dev/mapper/%s with 0x%llx (%i %i)\n",
+ dev->name, devno, dev->info.major, dev->info.minor);
+
+ mknod(filename, S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, devno);
+}
+
+void remove_dm_node(struct cow_device *dev)
+{
+ char filename[256];
+
+ snprintf(filename, 256, "/dev/mapper/%s", dev->name);
+ unlink(filename);
+}
+
+int destroy_dm_device(struct cow_device *dev)
+{
+ struct dm_task *task;
+
+ task = dm_task_create(DM_DEVICE_REMOVE);
+
+ dm_task_set_name(task, dev->name);
+ dm_task_run(task);
+ dm_task_destroy(task);
+
+ remove_dm_node(dev);
+
+ return 1;
+}
+
+void sighandler(int signal)
+{
+ int status;
+ pid_t child;
+
+ switch (signal) {
+ case SIGINT:
+ case SIGTERM:
+ running = 0;
+ break;
+ case SIGCHLD:
+ child = waitpid(0, &status, WNOHANG);
+ break;
+ default:
+ /* Unknown Signal */
+ break;
+ }
+}
+
+void version()
+{
+ printf("cowd v%i.%i.%i\n", 0, 0, 1);
+}
+
+void usage(char *name)
+{
+ printf("%s [OPTS] <name> <plugin args ...>\n"
+ "\n"
+ "name: The name to register for this device\n"
+ "plugin args: Arguments to be passed to the plugin\n"
+ "\n"
+ "Options:\n"
+ " -p,--plugin=name : Use plugin <name>\n"
+ " -b,--bsize=kb : Set blocks size to <bsize> KB\n"
+ " -I,--init : Force plugin to initialize CoW space\n"
+ " -n,--nodaemon : Do not daemonize\n"
+ " -r,--resume : Do not initialize device\n"
+ " -s,--sync : Operate block in sync-alloc mode\n"
+ " -V,--version : Display version and exit\n"
+ " -d,--debug : Enable debugging output\n"
+ " -v,--verbose : Enable verbose output\n"
+ " -i,--pidfile=path : Write pid to path\n"
+ "\n", name);
+}
+
+int parse_arguments(int argc, char **argv, struct cow_device *dev)
+{
+ int c;
+ int optidx = 0;
+ int logmask = 0;
+ static struct option lopts[] = {
+ {"plugin", 1, 0, 'p'},
+ {"verbose", 0, 0, 'v'},
+ {"nodaemon", 0, 0, 'n'},
+ {"version", 0, 0, 'V'},
+ {"bsize", 1, 0, 'b'},
+ {"sync", 0, 0, 's'},
+ {"debug", 0, 0, 'd'},
+ {"resume", 0, 0, 'r'},
+ {"init", 0, 0, 'I'},
+ {"pidfile", 1, 0, 'i'},
+ {0, 0, 0, 0 }
+ };
+
+ /* Defaults */
+ strncpy(dev->plugin_name, "dscow", MAX_PLUGIN_LEN);
+ config.verbose = 0;
+ config.debug = 0;
+ config.daemonize = 1;
+ config.init_device = 1;
+ config.init = 0;
+ config.block_size = 0;
+ config.sync_mode = 0;
+ config.pidfile = NULL;
+
+ while (1) {
+ c = getopt_long(argc, argv, "+p:NvnVb:drIs", lopts, &optidx);
+ if (c == -1)
+ break;
+
+ switch (c) {
+
+ case 'p':
+ strncpy(dev->plugin_name, optarg, MAX_PLUGIN_LEN);
+ break;
+
+ case 'v':
+ config.verbose = 1;
+ break;
+
+ case 'n':
+ config.daemonize = 0;
+ break;
+
+ case 'V':
+ version();
+ return(1);
+
+ case 'r':
+ config.init_device = 0;
+ break;
+
+ case 'b':
+ config.block_size = strtol(optarg, NULL, 0) << 10;
+ if (config.block_size & (config.block_size - 1)) {
+ fprintf(stderr,
+ "Block size must be a power of 2!\n");
+ return -1;
+ }
+ break;
+
+ case 'd':
+ config.debug = 1;
+ break;
+
+ case 'I':
+ config.init = 1;
+ break;
+
+ case 's':
+ config.sync_mode = 1;
+ break;
+
+ case 'i':
+ config.pidfile = strdup(optarg);
+ break;
+
+ default:
+ if ((c > 'a') && (c < 'Z')) {
+ fprintf(stderr, "Invalid argument: `%c'\n", c);
+ } else {
+ fprintf(stderr, "[ %c ]\n", c);
+ }
+ usage(argv[0]);
+ return -1;
+ };
+ }
+
+ if ((argc - optind) == 0) {
+ fprintf(stderr, "Error: `name' is required\n");
+ usage(argv[0]);
+ return -1;
+ }
+
+ dev->name = (char *)malloc(strlen(argv[optind])+1);
+ strcpy(dev->name, argv[optind]);
+
+ logmask = LOG_CONS | LOG_PID | LOG_NDELAY;
+ if (!config.daemonize)
+ logmask |= LOG_PERROR;
+ openlog("cowd", logmask, LOG_USER);
+
+ logmask = LOG_UPTO(LOG_NOTICE);
+ if (config.verbose)
+ logmask |= LOG_MASK(LOG_INFO);
+ if (config.debug)
+ logmask |= LOG_MASK(LOG_DEBUG);
+ setlogmask(logmask);
+
+ if (config.verbose) {
+ fprintf(stderr, "Daemon Configuration:\n");
+ fprintf(stderr,
+ "Plugin: %s\n"
+ "Daemon: %s\n"
+ "Init CoW: %s\n"
+ "Verbose: %s\n"
+ "Block Size: %lu KB\n"
+ "Init device:%s\n",
+ dev->plugin_name,
+ config.daemonize ? "yes" : "no",
+ config.init ? "yes" : "no",
+ "yes",
+ config.block_size >> 10,
+ config.init_device ? "yes" : "no");
+ }
+
+ if (optind < argc) {
+ dev->plugin_args = (char **)calloc(sizeof(char*),
+ (argc - optind) + 2);
+ dev->plugin_num_args = (argc - optind);
+
+ for (c = 0; c < dev->plugin_num_args; c++) {
+ dev->plugin_args[c] =
+ (char *)malloc(strlen(argv[optind+c])+1);
+ strcpy(dev->plugin_args[c],
+ argv[optind+c]);
+ if (config.debug)
+ fprintf(stderr,
+ "Adding plugin arg %i/%i: %s\n",
+ c, dev->plugin_num_args,
+ dev->plugin_args[c]);
+ }
+ }
+
+ return 0;
+}
+
+int make_dm_table(struct cow_device *dev)
+{
+ struct dm_task *task;
+ char params[256]; /* Yes, these are magic numbers */
+ char devstr[7];
+ int r, i;
+ uint64_t sectors;
+ dev_t *devs;
+ int dev_count;
+
+ devs = dev->plugin.get_devs(dev, &dev_count);
+
+ sectors = (dev->blocks * dev->block_size) / ((uint64_t)512);
+
+ snprintf(params, 256, "%s %lu", dev->name, dev->block_size);
+
+ for (i = 0; i < dev_count; i++) {
+ snprintf(devstr, 7, " %u:%u",
+ (unsigned)(devs[i] & 0xFF00) >> 8,
+ (unsigned)(devs[i] & 0x00FF));
+ strcat(params, devstr);
+ }
+
+ free(devs);
+
+ if (config.debug)
+ fprintf(stderr, "Creating dm device: %s\n", params);
+
+ task = dm_task_create(DM_DEVICE_CREATE);
+
+ dm_task_set_name(task, dev->name);
+
+ r = dm_task_add_target(task,
+ 0, sectors,
+ "userspace", params);
+
+ if (!r) {
+ fprintf(stderr, "Failed to add target: %u %u %s %s\n",
+ 0, dev->blocks / 512,
+ "userspace", params);
+ return 0;
+ }
+
+ r = dm_task_run(task);
+ if (!r) {
+ fprintf(stderr, "Failed to run device-mapper command!\n");
+ return 0;
+ }
+
+ return 1;
+}
+
+int main(int argc, char **argv)
+{
+ struct cow_device *dev;
+ int r;
+ pid_t pid;
+
+ dev = (struct cow_device *)malloc(sizeof(*dev));
+ if (!dev) {
+ fprintf(stderr, "Failed to allocate device: out of memory\n");
+ exit(1);
+ }
+
+ r = parse_arguments(argc, argv, dev);
+ if (r > 0)
+ exit(0);
+ else if (r < 0)
+ exit(1);
+
+ syslog(LOG_INFO, "Starting");
+
+ /* Load the plugin */
+ if (initialize_plugin(dev, dev->plugin_name) < 0) {
+ fprintf(stderr, "Failed to initialize plugin: %s\n",
+ dev->plugin_name);
+ exit(1);
+ }
+
+ /* Build initial device */
+ r = make_dm_table(dev);
+ if (!r) {
+ fprintf(stderr, "Failed to create DM device\n");
+ dev->plugin.cleanup_plugin(dev);
+ exit(1);
+ }
+
+ /* Create /dev/mapper/foo */
+ make_dm_node(dev);
+
+ dev->ctx = dmu_ctl_open(dev->name, O_NONBLOCK);
+ if (!dev->ctx) {
+ fprintf(stderr, "Unable to open control device\n");
+ dev->plugin.cleanup_plugin(dev);
+ exit(1);
+ }
+
+ /* initialize link list of sync'd maps */
+ ll_init(&sync_list);
+
+ running = 1;
+
+ if (config.daemonize) {
+ int ret = daemon(0, 1);
+ if (ret) {
+ fprintf(stderr, "Unable to daemonize\n");
+ dev->plugin.cleanup_plugin(dev);
+ exit(1);
+ }
+ }
+
+ pid = getpid();
+ if (config.pidfile) {
+ FILE *fpid = fopen(config.pidfile, "w");
+ if (fpid) {
+ fprintf(fpid, "%d\n", pid);
+ fclose(fpid);
+ }
+ }
+
+ signal(SIGTERM, sighandler);
+ signal(SIGCHLD, sighandler);
+ signal(SIGINT, sighandler);
+
+ cow_ctl_loop(dev);
+
+ dmu_ctl_close(dev->ctx);
+
+ destroy_dm_device(dev);
+
+ dev->plugin.cleanup_plugin(dev);
+
+ if (!config.daemonize)
+ fprintf(stderr, "Exiting...\n");
+
+ if (config.pidfile)
+ unlink(config.pidfile);
+
+ return 0;
+}
+
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/cowd.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/cowd.h Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#ifndef __COWD_H
+#define __COWD_H
+
+#include "cowd_ll.h"
+#include <stdint.h>
+
+struct config_struct {
+ int verbose;
+ int debug;
+ int daemonize;
+ int init_device;
+ int init;
+ unsigned long block_size;
+ int sync_mode;
+ char *pidfile;
+};
+
+extern struct config_struct config;
+
+struct sync_blocks {
+ uint32_t id;
+ uint64_t block;
+ struct ll_member *member;
+};
+
+struct ll *sync_list;
+
+#endif
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/cowd_control_loop.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/cowd_control_loop.c Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <syslog.h>
+
+#ifdef INTERNAL_DMU
+# include <dmu.h>
+#endif
+
+#include "cowd.h"
+#include "cowd_plugin.h"
+
+/* Permit the signal handler to tell us that we should wrap things up
+ * soon */
+extern int running;
+
+/* Global cowd configuration */
+extern struct config_struct config;
+
+int need_hup = 0;
+
+static struct ll_member *find_sync_block_by_org(uint64_t org)
+{
+ struct ll_member *p;
+ struct sync_blocks *sb;
+
+ for (p = sync_list->head; p != NULL; p = p->next) {
+ sb = p->member_of;
+
+ if (sb->block == org)
+ return p;
+ }
+
+ return NULL;
+}
+
+static struct ll_member *find_sync_block_by_id(uint32_t id)
+{
+ struct ll_member *p;
+ struct sync_blocks *sb;
+
+ for (p = sync_list->head; p != NULL; p = p->next) {
+ sb = p->member_of;
+
+ if (sb->id == id)
+ return p;
+ }
+
+ return NULL;
+}
+
+
+static int map_handler(void *data, struct dmu_map_data *map_data)
+{
+ struct cow_device *dev = (struct cow_device *)data;
+ int ret;
+ uint64_t org, new;
+
+ org = dmu_map_get_block(map_data);
+ ret = dev->plugin.map_prepare(dev, map_data);
+ new = dmu_map_get_block(map_data);
+
+ if (ret != PLUGIN_OK) {
+ syslog(LOG_ERR, "Plugin failed to map %llu", org);
+ return 0;
+ }
+
+ if (config.verbose)
+ syslog(LOG_INFO, "Plugin mapped %llu->%llu [%c]",
+ org, dmu_map_get_block(map_data),
+ dmu_map_is_write(map_data) ? 'W' : 'R');
+
+ if (dmu_map_is_write(map_data) && (org != new)){
+ /* A mapping was made */
+ if (config.sync_mode) {
+ /* Request to sync metadata with mapping */
+ struct sync_blocks *sb;
+
+ syslog(LOG_DEBUG,
+ "setting sync flag for %llu", org);
+
+ sb = malloc(sizeof(*sb));
+ if (!sb) {
+ syslog(LOG_CRIT, "malloc failed");
+ return -1;
+ }
+
+ sb->id = dmu_map_get_id(map_data);
+ sb->block = org;
+ ll_member_init(&sb->member, sb);
+ ll_add_tail(sync_list, sb->member);
+ dmu_map_set_sync(map_data);
+ } else {
+ /* No sync needed, Complete mapping immediately */
+ dev->plugin.map_complete(dev, org);
+ }
+ }
+
+ if ((org != new) && (org != (new-1))) {
+ printf("**** ERROR: Mapping %llu -> %llu\n", org, new);
+ }
+ out:
+ return 1;
+}
+
+static int status_msg_handler(void *data, uint32_t id, uint32_t status)
+{
+ struct cow_device *dev = (struct cow_device *)data;
+
+ switch (status) {
+
+ case DMU_STATUS_INVAL_COMPLETE:
+ syslog(LOG_INFO, "Invalidation %u complete", id);
+ break;
+
+ case DMU_STATUS_INVAL_FAILED:
+ syslog(LOG_INFO, "Invalidation %u FAILED", id);
+ break;
+
+ case DMU_STATUS_BLOCK_FLUSHED:
+ syslog(LOG_INFO, "Request %u has flushed");
+ break;
+
+ case DMU_STATUS_SYNC_COMPLETE:
+ {
+ struct ll_member *p;
+ struct sync_blocks *sb;
+
+ if (!config.sync_mode) {
+ syslog(LOG_ERR,
+ "Aiee! Got a SYNC_COMPLETE in aync mode!");
+ break;
+ }
+
+ for (p = sync_list->head; p != NULL; p = p->next) {
+ sb = p->member_of;
+
+ if (sb->id == id) {
+ syslog(LOG_INFO,
+ "Writing metadata for id:%d, block:%llu", sb->id,
+ sb->block);
+ dmu_sync_complete(dev->ctx, id);
+ dev->plugin.map_complete(dev, sb->block);
+ dev->plugin.write_metadata(dev);
+ ll_remove(p);
+ break;
+ }
+ }
+
+ if (p == NULL) {
+ syslog(LOG_ERR,
+ "Got a SYNC_COMPLETE for %u "
+ "that has no match\n",
+ id);
+ }
+
+ break;
+ }
+
+ case DMU_STATUS_UNKNOWN:
+ default:
+ syslog(LOG_ERR, "Unknown status received (%u) for id %u",
+ status, id);
+ break;
+ };
+
+ return 0;
+}
+
+void hup_handler(int signal)
+{
+ if (signal == SIGHUP)
+ need_hup = 1;
+}
+
+/*
+ * Invalidate all possible remaps for the entire device. This happens
+ * all at once, which is kinda atomic. In other words, we invalidate
+ * all of these blocks before we process any new map requests, which
+ * should give some checkpoint-like behavior.
+ */
+void invalidate_all(struct cow_device *dev)
+{
+ uint64_t i;
+ int r;
+
+ syslog(LOG_INFO, "Invalidating blocks...");
+
+ for (i = 0; i < dev->blocks; i++) {
+ r = dmu_invalidate_block(dev->ctx, i);
+ if (!r){
+ /* No more buffer space */
+ dmu_ctl_send_queue(dev->ctx);
+ sleep(1);
+ }
+ }
+
+ dmu_ctl_send_queue(dev->ctx);
+
+ syslog(LOG_DEBUG, "Invalidated blocks %llu - %llu",
+ 0, dev->blocks - 1);
+}
+
+/* This is the main loop of the daemon that handles:
+ 1. Servicing requests from userspace
+ 2. Occasionally poking the plugin to write metadata
+*/
+void cow_ctl_loop(struct cow_device *dev)
+{
+ int reqs;
+ int ret;
+
+ struct ll_member *p;
+ struct sync_blocks *sb;
+ int dangle_count = 0;
+
+ /* Register SIGHUP handler */
+ signal(SIGHUP, hup_handler);
+
+ dmu_register_map_handler(dev->ctx, map_handler, dev);
+ dmu_register_status_handler(dev->ctx, status_msg_handler, dev);
+
+ while (running) {
+ if (need_hup) {
+ invalidate_all(dev);
+ need_hup = 0;
+ continue;
+ }
+
+ if (dmu_events_pending(dev->ctx, 1000)) {
+ dmu_process_events(dev->ctx);
+ /* Read-ahead */
+ dmu_ctl_send_queue(dev->ctx);
+ }
+ }
+
+ for (p = sync_list->head; p != NULL; p = p->next) {
+ sb = p->member_of;
+
+ syslog(LOG_ERR, "Completing dangling block %llu (%i)",
+ sb->block, ++dangle_count);
+ dev->plugin.map_complete(dev, sb->block);
+ }
+
+ syslog(LOG_INFO, "%i dangling blocks (%p)",
+ dangle_count, sync_list->head);
+
+ syslog(LOG_INFO, "Exiting...");
+}
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/cowd_ll.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/cowd_ll.c Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Ryan Grimm <grimm@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <malloc.h>
+#include <syslog.h>
+#include "cowd_ll.h"
+
+#define COWD_LL_MAIN 0
+
+int ll_init(struct ll **ll)
+{
+ *ll = malloc(sizeof(**ll));
+ if (!*ll) {
+ syslog(LOG_CRIT, "%s: malloc() failed", __FUNCTION__);
+ return -1;
+ }
+ (*ll)->head = (*ll)->tail = NULL;
+ return 0;
+}
+
+int ll_member_init(struct ll_member **member, void *member_of)
+{
+ *member = malloc(sizeof(**member));
+ if (!*member) {
+ syslog(LOG_CRIT, "%s: malloc() failed", __FUNCTION__);
+ return -1;
+ }
+ (*member)->next = (*member)->prev = NULL;
+ (*member)->member_of = member_of;
+ (*member)->ll = NULL;
+ return 0;
+}
+
+int ll_add_tail(struct ll *ll, struct ll_member *member)
+{
+ if (ll->head == NULL) {
+ ll->head = ll->tail = member;
+ member->next = NULL;
+ member->prev = NULL;
+ member->ll = ll;
+ } else {
+ ll->tail->next = member;
+ member->next = NULL;
+ member->prev = ll->tail;
+ ll->tail = member;
+ member->ll = ll;
+ }
+ return 0;
+}
+
+int ll_remove(struct ll_member *member)
+{
+ if (member->prev)
+ member->prev->next = member->next;
+ else
+ member->ll->head = member->next;
+
+ if (member->next)
+ member->next->prev = member->prev;
+ else
+ member->ll->tail = member->prev;
+
+ free(member);
+}
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/cowd_ll.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/cowd_ll.h Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Ryan Grimm <grimm@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#ifndef __COWD_LL_H__
+#define __COWD_LL_H__
+
+struct ll_member {
+ void *member_of;
+ struct ll *ll;
+ struct ll_member *next;
+ struct ll_member *prev;
+};
+
+struct ll {
+ struct ll_member *head;
+ struct ll_member *tail;
+};
+
+
+int ll_init(struct ll **ll);
+int ll_member_init(struct ll_member **member, void *member_of);
+int ll_add_tail(struct ll *ll, struct ll_member *member);
+int ll_remove(struct ll_member *member);
+#endif
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/cowd_loader.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/cowd_loader.c Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <syslog.h>
+
+#include "cowd_plugin.h"
+
+/* Calls a specially-named function in the plugin to initialize the
+ jump table */
+static int poke_plugin(struct cowd_plugin *plugin, void *handle)
+{
+ int (*loader)(struct cowd_plugin *plugin);
+
+ loader = dlsym(handle, "load_plugin");
+
+ if (!loader) {
+ fprintf(stderr, "Failed to find LOAD_PLUGIN\n");
+ return 0;
+ }
+
+ return loader(plugin);
+}
+
+/* Load the dynamic library plugin */
+int load_plugin(struct cowd_plugin *plugin, char *name)
+{
+ void *handle;
+ char *filename;
+ char *dir;
+ int len;
+
+ dir = getenv("COWD_PLUGIN_DIR");
+ if (!dir) {
+ dir = DEFAULT_PLUGIN_DIR;
+ }
+
+ len = strlen(dir) + strlen(name) + 13;
+
+ filename = (char *)malloc(len);
+
+ snprintf(filename, len, "%s/libcowd_%s.so", dir, name);
+
+ handle = dlopen(filename, RTLD_NOW | RTLD_GLOBAL);
+ if (handle == NULL) {
+ snprintf(filename, len, "libcowd_%s.so", name);
+ handle = dlopen(filename, RTLD_NOW | RTLD_GLOBAL);
+ if (handle == NULL) {
+ fprintf(stderr, "Failed to load %s: %s\n",
+ filename, dlerror());
+ return 0;
+ }
+
+ syslog(LOG_INFO,
+ "Loaded libcowd_%s.so from system path",
+ name);
+ } else {
+ syslog(LOG_INFO, "Loaded %s", filename);
+ }
+
+ return poke_plugin(plugin, handle);
+}
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/cowd_loader.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/cowd_loader.h Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#ifndef __COWD_LOADER_H
+#define __COWD_LOADER_H
+
+#include "cowd_plugin.h"
+
+#ifndef DEFAULT_PLUGIN_DIR
+# define DEFAULT_PLUGIN_DIR "./lib"
+#endif
+
+int load_plugin(struct cowd_plugin *plugin, char *name);
+
+#endif
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/cowd_plugin.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/cowd_plugin.h Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#ifndef __COWD_PLUGIN_H
+#define __COWD_PLUGIN_H
+
+#include <stdbool.h>
+
+#include <stdint.h>
+
+#include <libdevmapper.h>
+
+#ifdef INTERNAL_DMU
+#include <dmu.h>
+#endif
+
+#define MKDEV(x,y) (((x << 8) & 0xFF00) | (y & 0xFF))
+
+#define MAX_PLUGIN_LEN 256
+
+unsigned long get_device_blocks(char *dev);
+uint64_t get_file_size(char *path);
+unsigned long long get_device_size(char *dev, uint64_t *size);
+char *make_dev_str(char *dev);
+loff_t dio_lseek(int fd, loff_t offset, int whence);
+int is_file(char *path);
+
+typedef enum plugin_status {
+ PLUGIN_OK=0,
+ PLUGIN_FAIL=-1,
+} p_status;
+
+enum dev_types {
+ COW,
+ BASE,
+};
+
+struct cow_device;
+
+struct cowd_plugin {
+ int (*init_plugin)(struct cow_device *, int debug);
+ int (*write_metadata)(struct cow_device *);
+ bool (*need_flush)(struct cow_device *);
+ int (*map_prepare)(struct cow_device *, struct dmu_map_data *);
+ int (*map_complete)(struct cow_device *, uint64_t org_block);
+ void (*cleanup_plugin)(struct cow_device *);
+ dev_t *(*get_devs)(struct cow_device *, int *count);
+ char *errmsg;
+};
+
+struct cow_device {
+ /* User-supplied attributes */
+ char *name;
+
+ uint64_t block_size;
+ uint64_t blocks;
+
+ /* The assigned control device */
+ struct dmu_context *ctx;
+
+ /* Device mapper info */
+ struct dm_info info;
+
+ /* Plugin information */
+ char plugin_name[MAX_PLUGIN_LEN];
+ int plugin_num_args;
+ char **plugin_args;
+ struct cowd_plugin plugin;
+ void *plugin_private;
+
+};
+
+
+#endif
diff -r 2cb702dcea0e -r 7ca9885684d9 tools/cowd/util.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/util.c Fri Aug 25 15:01:33 2006 -0500
@@ -0,0 +1,236 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#define __USE_LARGEFILE64
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <asm/fcntl.h>
+#include <sys/ioctl.h>
+#include <linux/fs.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <syslog.h>
+
+#include "cowd_plugin.h"
+
+size_t round_up_to_sector(size_t value)
+{
+ return (value + 511) & ~511;
+}
+
+/*
+ * Direct I/O helper functions
+ *
+ * These allow us to do simple reads and writes of any size (and to
+ * any location) without having to worry about sector alignment. Note
+ * that if the underlying format is anything other than sector chunks,
+ * data loss may occur.
+ */
+
+loff_t ppos; /* Only one dio file open at a time right now */
+loff_t vpos;
+/* FIXME: Do we want to update {v,p}pos after read/write? */
+
+int dio_open(char *path, int flags)
+{
+ ppos = vpos = 0;
+ return open(path, O_DIRECT | O_LARGEFILE | flags);
+}
+
+loff_t dio_lseek(int fd, loff_t offset, int whence)
+{
+ if (whence != SEEK_SET)
+ return -1;
+
+ vpos = offset;
+
+ if (offset % 512)
+ ppos = round_up_to_sector(offset) - 512;
+ else
+ ppos = offset;
+
+ if (lseek(fd, ppos, SEEK_SET) != ppos)
+ return offset - 1;
+ else
+ return offset;
+}
+
+int dio_read(int fd, void *buffer, size_t count)
+{
+ void *aligned_buf;
+ size_t aligned_size;
+ int ret;
+
+ aligned_size = round_up_to_sector(count);
+
+ ret = posix_memalign(&aligned_buf, 512, aligned_size);
+ if (ret != 0)
+ return -EINVAL;
+
+ ret = read(fd, aligned_buf, aligned_size);
+ memcpy(buffer, aligned_buf + (vpos - ppos), count);
+
+ if (ret < 0) {
+ syslog(LOG_CRIT, "dio_read(%i) failed: %m",
+ aligned_size);
+ }
+
+ free(aligned_buf);
+
+ if (ret == aligned_size)
+ ret = count;
+
+ return ret;
+}
+
+int dio_write(int fd, void *buffer, size_t count)
+{
+ void *aligned_buf;
+ size_t aligned_size;
+ int ret;
+ loff_t prev_ppos;
+
+ if (vpos != ppos) {
+ syslog(LOG_ERR, "dio_write(): vpos: %llu ppos: %llu",
+ vpos, ppos);
+ }
+
+ aligned_size = round_up_to_sector(count);
+
+ ret = posix_memalign(&aligned_buf, 512, aligned_size);
+ if (ret != 0)
+ return -EINVAL;
+
+ /* Prime the buffer */
+ prev_ppos = ppos;
+ ret = read(fd, aligned_buf, aligned_size);
+ if (ret < aligned_size) {
+ syslog(LOG_ERR, "dio_write() failed to prime: %m");
+ return ret;
+ } if (ret != aligned_size) {
+ syslog(LOG_ERR, "dio_write() failed to prime");
+ return -EIO;
+ }
+
+ if (lseek(fd, prev_ppos, SEEK_SET) != prev_ppos) {
+ syslog(LOG_ERR, "dio_write() failed to re-lseek: %m");
+ return -EIO;
+ }
+
+ memcpy(aligned_buf + (vpos - ppos), buffer, count);
+ ret = write(fd, aligned_buf, aligned_size);
+
+ free(aligned_buf);
+
+ if (ret < 0) {
+ syslog(LOG_ERR, "dio_write(%i) failed:%m",
+ aligned_size);
+ }
+
+ if (ret == aligned_size)
+ ret = count;
+
+ return ret;
+}
+
+inline unsigned long get_device_blocks(char *dev)
+{
+ int fd;
+ unsigned long size;
+
+ fd = open(dev, O_RDONLY);
+
+ if (fd <= 0) {
+ syslog(LOG_ERR, "Error trying to open %s: %m", dev);
+ return 0;
+ }
+
+ ioctl(fd, BLKGETSIZE, &size);
+ close(fd);
+
+ return size;
+}
+
+inline unsigned long long get_device_size(char *dev, uint64_t *size)
+{
+ (*size) = ((unsigned long long)get_device_blocks(dev)) * 512;
+ return ((unsigned long long)get_device_blocks(dev)) * 512;
+}
+
+uint64_t get_file_size(char *path)
+{
+ struct stat s;
+
+ if (stat(path, &s)) {
+ perror(path);
+ return 0;
+ } else {
+ return s.st_size;
+ }
+}
+
+char *make_dev_str(char *dev)
+{
+ struct stat s;
+ static char str[10];
+ unsigned int maj, min;
+
+ stat(dev, &s);
+
+ maj = (s.st_rdev & 0xFF00) >> 8;
+ min = (s.st_rdev & 0x00FF);
+
+ snprintf(str, 10, "%i:%i",
+ maj, min);
+
+ return str;
+}
+
+/*
+ * Loop setup functions. These need to be replaced by an ioctl()
+ * implementation, but this is good enough for now.
+ */
+
+int loop_setup(char *dev, char *path)
+{
+ char cmd[256];
+ int ret;
+
+ snprintf(cmd, 256, "losetup %s %s 2>&1", dev, path);
+
+ ret = system(cmd);
+
+ return ret == 0;
+}
+
+int loop_destroy(char *dev)
+{
+ char cmd[256];
+ int ret;
+
+ snprintf(cmd, 256, "losetup -d %s", dev);
+
+ ret = system(cmd);
+
+ return ret == 0;
+}
+
+int is_file(char *path)
+{
+ struct stat s;
+
+ stat(path, &s);
+
+ return s.st_mode & S_IFREG;
+}
+
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 3 of 6] dm-userspace internal libdmu support for userspace tool
[not found] <patchbomb.1156540578@venkman-64>
2006-08-25 21:23 ` [PATCH 1 of 6] dm-userspace xen kernel patch Ryan Grimm
2006-08-25 21:23 ` [PATCH 2 of 6] dm-userspace userspace tool base patch Ryan Grimm
@ 2006-08-25 21:24 ` Ryan Grimm
2006-08-25 21:24 ` [PATCH 4 of 6] dscow plugin for dm-userspace " Ryan Grimm
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Ryan Grimm @ 2006-08-25 21:24 UTC (permalink / raw)
To: Xen Devel; +Cc: Dan Smith
Signed-off-by: Ryan Grimm <grimm@us.ibm.com>
Signed-off-by: Dan Smith <danms@us.ibm.com>
# HG changeset patch
# User Ryan Grimm <grimm@us.ibm.com>
# Date 1156536094 18000
# Node ID 8c8d5dc4eaf4f0044f7fdd5adb282359eff7263a
# Parent 7ca9885684d9eaeef4422d52f9ae9efd033650d0
dm-userspace internal libdmu support for userspace tool
diff -r 7ca9885684d9 -r 8c8d5dc4eaf4 tools/Makefile
--- a/tools/Makefile Fri Aug 25 15:01:33 2006 -0500
+++ b/tools/Makefile Fri Aug 25 15:01:34 2006 -0500
@@ -77,7 +77,7 @@ endif
.PHONY: cowd cowdinstall cowclean
cowd/Makefile:
-which libtoolize && which aclocal && which automake && \
- cd cowd && sh autogen && sh configure
+ cd cowd && sh autogen && sh configure --enable-internal-dmu
cowd cowdinstall: cowd/Makefile
-$(MAKE) -C cowd $(patsubst cowd%,%,$@)
cowdclean:
diff -r 7ca9885684d9 -r 8c8d5dc4eaf4 tools/cowd/Makefile.am
--- a/tools/cowd/Makefile.am Fri Aug 25 15:01:33 2006 -0500
+++ b/tools/cowd/Makefile.am Fri Aug 25 15:01:34 2006 -0500
@@ -1,3 +1,5 @@ bin_PROGRAMS = cowd
+EXTRA_DIST = libdmu/dmu.c libdmu/dmu.h libdmu/internal_renames
+
bin_PROGRAMS = cowd
cowd_SOURCES = cowd.c util.c cowd_loader.c cowd_control_loop.c \
@@ -7,5 +9,13 @@ cowd_LDADD = -ldevmapper -lltdl
cowd_LDADD = -ldevmapper -lltdl
cowd_LDFLAGS = -rdynamic -L./lib
+if INTERNAL_DMU
+cowd_SOURCES += libdmu/dmu.c
+all-local:
+ for obj in cowd lib/*.so; do \
+ objcopy --redefine-syms libdmu/internal_renames $$obj; \
+ done
+endif
+
clean-local:
rm -f *~
diff -r 7ca9885684d9 -r 8c8d5dc4eaf4 tools/cowd/configure.in
--- a/tools/cowd/configure.in Fri Aug 25 15:01:33 2006 -0500
+++ b/tools/cowd/configure.in Fri Aug 25 15:01:34 2006 -0500
@@ -11,7 +11,19 @@ libdevmapper_error() {
echo "* The version of libdevmapper on this system does *"
echo "* not contain dm-userspace support *"
echo "* *"
+ echo "* If you cannot reinstall libdevmapper, you can include *"
+ echo "* rough internal support with --enable-internal-dmu *"
echo "*************************************************************"
+
+ exit
+}
+
+libdevmapper_conflict() {
+ echo "*************************************************************"
+ echo "* ERROR: The system libdevmapper library has dm-userspace *"
+ echo "* support, which cannot be used in combination with *"
+ echo "* internal support. *"
+ echo "*************************************************************"
exit 1
}
@@ -30,6 +42,13 @@ AC_ARG_ENABLE(gcov,
COVERAGE="-fprofile-arcs -ftest-coverage",
COVERAGE="")
+AC_ARG_ENABLE(internal-dmu,
+ [AC_HELP_STRING([--enable-internal-dmu],
+ [Enable internal dm-user library support])],
+ need_internal_dmu="yes",
+ need_internal_dmu="")
+AM_CONDITIONAL(INTERNAL_DMU, test x$need_internal_dmu = xyes)
+
# Checks for programs.
AC_PROG_CC
AC_PROG_LIBTOOL
@@ -37,7 +56,14 @@ AC_PROG_LIBTOOL
# Checks for libraries.
AC_CHECK_LIB([devmapper], [dm_task_create],, exit)
AC_CHECK_LIB([ltdl], [lt_dlsym],, exit)
-AC_CHECK_LIB([devmapper], [dmu_ctl_open],, libdevmapper_error)
+
+if test x$need_internal_dmu = xyes; then
+ AC_CHECK_LIB([devmapper], [dmu_ctl_open], libdevmapper_conflict)
+ abs_libdmu_dir=$(readlink -f .)/libdmu
+ GLOBAL_CFLAGS="$GLOBAL_CFLAGS -DINTERNAL_DMU -I$abs_libdmu_dir"
+else
+ AC_CHECK_LIB([devmapper], [dmu_ctl_open],, libdevmapper_error)
+fi
if test -z "$COVERAGE"; then
GLOBAL_CFLAGS="$GLOBAL_CFLAGS"
diff -r 7ca9885684d9 -r 8c8d5dc4eaf4 tools/cowd/cowd.c
--- a/tools/cowd/cowd.c Fri Aug 25 15:01:33 2006 -0500
+++ b/tools/cowd/cowd.c Fri Aug 25 15:01:34 2006 -0500
@@ -24,6 +24,10 @@
#include <syslog.h>
#include <libdevmapper.h>
+
+#ifdef INTERNAL_DMU
+# include <dmu.h>
+#endif
#include "cowd.h"
#include "cowd_plugin.h"
diff -r 7ca9885684d9 -r 8c8d5dc4eaf4 tools/cowd/libdmu/dmu.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/libdmu/dmu.c Fri Aug 25 15:01:34 2006 -0500
@@ -0,0 +1,554 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <errno.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <libdevmapper.h>
+#include <linux/dm-userspace.h>
+
+#include <dmu.h>
+
+#define DMU_MSG_DEBUG 0
+
+#define QUEUE_SIZE_KB 4096
+
+#if DMU_MSG_DEBUG
+#define DPRINTF( s, arg... ) fprintf(stderr, s, ##arg)
+#else
+#define DPRINTF( s, arg... )
+#endif
+
+struct dmu_events {
+ status_handler status_fn;
+ map_req_handler map_fn;
+};
+
+struct dmu_event_data {
+ void *status_user_data;
+ void *map_user_data;
+};
+
+struct dmu_context {
+ int fd;
+ unsigned int buf_size;
+ unsigned int in_ptr;
+ unsigned int out_ptr;
+ uint8_t *in_buf;
+ uint8_t *out_buf;
+ uint32_t id_ctr;
+ struct dmu_events events;
+ struct dmu_event_data event_data;
+};
+
+struct dmu_map_data {
+ uint64_t block;
+ int64_t offset;
+ uint32_t id;
+ uint32_t flags;
+ dev_t dest_dev;
+ dev_t copy_src_dev;
+};
+
+void dmu_map_set_block(struct dmu_map_data *data, uint64_t block)
+{
+ data->block = block;
+}
+
+uint64_t dmu_map_get_block(struct dmu_map_data *data)
+{
+ return data->block;
+}
+
+void dmu_map_set_offset(struct dmu_map_data *data, int64_t offset)
+{
+ data->offset = offset;
+}
+
+uint32_t dmu_map_get_id(struct dmu_map_data *data)
+{
+ return data->id;
+}
+
+void dmu_map_set_dest_dev(struct dmu_map_data *data, dev_t dev)
+{
+ data->dest_dev = dev;
+}
+
+void dmu_map_set_copy_src_dev(struct dmu_map_data *data, dev_t dev)
+{
+ data->copy_src_dev = dev;
+ dmu_set_flag(&data->flags, DMU_FLAG_COPY_FIRST);
+}
+
+void dmu_map_set_writable(struct dmu_map_data *data, int writable)
+{
+ if (writable)
+ dmu_set_flag(&data->flags, DMU_FLAG_WR);
+ else
+ dmu_clr_flag(&data->flags, DMU_FLAG_WR);
+}
+
+int dmu_map_is_write(struct dmu_map_data *data)
+{
+ return dmu_get_flag(&data->flags, DMU_FLAG_WR);
+}
+
+void dmu_map_set_sync(struct dmu_map_data *data)
+{
+ dmu_set_flag(&data->flags, DMU_FLAG_SYNC);
+}
+
+/*
+ * Get the major/minor of the character control device that @dm_device
+ * has exported for us. We do this by looking at the device status
+ * string.
+ */
+static int get_dm_control_dev(char *dm_device,
+ unsigned *maj, unsigned *min)
+{
+ struct dm_task *task;
+ int ret;
+ void *next = NULL;
+ uint64_t start, length;
+ char *ttype = NULL, *params = NULL;
+
+ task = dm_task_create(DM_DEVICE_STATUS);
+
+ ret = dm_task_set_name(task, dm_device);
+ if (!ret) {
+ DPRINTF("Failed to set device-mapper target name\n");
+ dm_task_destroy(task);
+ return -1;
+ }
+
+ ret = dm_task_run(task);
+ if (!ret) {
+ DPRINTF("Failed to run device-mapper task\n");
+ dm_task_destroy(task);
+ return -1;
+ }
+
+ ret = 0;
+ do {
+ next = dm_get_next_target(task, next, &start, &length,
+ &ttype, ¶ms);
+
+ if (strcmp(ttype, "userspace") == 0) {
+ ret = sscanf(params, "%x:%x", maj, min);
+ if (ret == 2)
+ break;
+ }
+
+ } while (next);
+
+ return 0;
+}
+
+/*
+ * Create the character device node for our control channel
+ */
+static int make_device_node(unsigned major, unsigned minor)
+{
+ char path[256];
+
+ sprintf(path, "/dev/dmu%i", minor);
+
+ return mknod(path, S_IFCHR, makedev(major, minor));
+}
+
+static char *dmu_get_ctl_device(char *dm_device)
+{
+ unsigned ctl_major, ctl_minor;
+ static char path[256];
+
+ if (get_dm_control_dev(dm_device, &ctl_major, &ctl_minor) < 0)
+ return NULL;
+
+ if (ctl_major == 0) {
+ DPRINTF("Unable to get device number\n");
+ return NULL;
+ }
+
+ sprintf(path, "/dev/dmu%i", ctl_minor);
+
+ if (access(path, R_OK | W_OK)) {
+ if (make_device_node(ctl_major, ctl_minor)) {
+ DPRINTF("Failed to create device node: %s",
+ strerror(errno));
+ return NULL;
+ }
+ }
+
+ return path;
+}
+
+static uint32_t make_version(int maj, int min, int patch)
+{
+ return 0 | (maj << 16) | (min << 8) | patch;
+}
+
+static void dmu_split_dev(dev_t dev, uint32_t *maj, uint32_t *min)
+{
+ *maj = (dev & 0xFF00) >> 8;
+ *min = (dev & 0x00FF);
+}
+
+/* Queue a message for sending */
+static int dmu_ctl_queue_msg(struct dmu_context *ctx, int type, void *msg)
+{
+ struct dmu_msg_header hdr;
+
+ hdr.msg_type = type;
+ hdr.payload_len = dmu_get_msg_len(type);
+ hdr.id = ctx->id_ctr++;
+
+ if ((ctx->out_ptr + (sizeof(hdr) + hdr.payload_len)) > ctx->buf_size)
+ return 0; /* No room for this */
+
+ memcpy(ctx->out_buf+ctx->out_ptr, &hdr, sizeof(hdr));
+ ctx->out_ptr += sizeof(hdr);
+
+ memcpy(ctx->out_buf+ctx->out_ptr, msg, hdr.payload_len);
+ ctx->out_ptr += hdr.payload_len;
+
+ return 1;
+}
+
+int dmu_invalidate_block(struct dmu_context *ctx, uint64_t block)
+{
+ struct dmu_msg_invalidate_map inv_msg;
+
+ inv_msg.org_block = block;
+
+ DPRINTF("Queuing invalidation for block %llu\n", block);
+
+ return dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_INVALIDATE,
+ &inv_msg);
+}
+
+int dmu_sync_complete(struct dmu_context *ctx, uint32_t id)
+{
+ struct dmu_msg_status status_msg;
+
+ status_msg.id_of_op = id;
+ status_msg.status = DM_USERSPACE_SYNC_COMPLETE;
+
+ DPRINTF("Queuing metadata written for block %llu\n", block);
+
+ return dmu_ctl_queue_msg(ctx, DM_USERSPACE_STATUS,
+ &status_msg);
+}
+
+static int dmu_ctl_peek_queue(struct dmu_context *ctx,
+ int *type, void **msg)
+{
+ struct dmu_msg_header *hdr;
+
+ if (ctx->in_ptr < sizeof(*hdr))
+ return 0;
+
+ hdr = (struct dmu_msg_header *)ctx->in_buf;
+
+ *type = hdr->msg_type;
+ *msg = ctx->in_buf + sizeof(*hdr);
+
+ return 1;
+}
+
+/* Flush queue of messages to the kernel */
+int dmu_ctl_send_queue(struct dmu_context *ctx)
+{
+ int r;
+
+ DPRINTF("Flushing outgoing queue\n");
+
+ r = write(ctx->fd, ctx->out_buf, ctx->out_ptr);
+
+ if (r == ctx->out_ptr)
+ r = 1;
+ else
+ r = 0;
+
+ ctx->out_ptr = 0;
+
+ DPRINTF("Finished flushing queue\n");
+
+ return r;
+}
+
+/* Fill the queue with requests from the kernel */
+static int dmu_ctl_recv_queue(struct dmu_context *ctx)
+{
+ int r;
+
+ r = read(ctx->fd, ctx->in_buf, ctx->buf_size);
+
+ ctx->in_ptr = r;
+
+ if (r >= 0)
+ r = 1;
+ else
+ r = 0;
+
+ return r;
+}
+
+struct dmu_context *dmu_ctl_open(char *dev, int flags)
+{
+ int fd, r, type = 0;
+ struct dmu_msg_version msg;
+ struct dmu_msg_version *response;
+ struct dmu_context *ctx = NULL;
+ char *ctl_dev;
+
+ ctl_dev = dmu_get_ctl_device(dev);
+ if (ctl_dev == NULL)
+ return NULL;
+ else if (access(ctl_dev, R_OK | W_OK))
+ return NULL;
+
+ fd = open(ctl_dev, O_RDWR | flags);
+ if (fd < 0)
+ goto out;
+
+ ctx = calloc(sizeof(*ctx), 1);
+ if (!ctx)
+ goto out;
+
+ ctx->in_buf = malloc(QUEUE_SIZE_KB << 10);
+ if (!ctx->in_buf)
+ goto out;
+ ctx->out_buf = malloc(QUEUE_SIZE_KB << 10);
+ if (!ctx->out_buf)
+ goto out;
+
+ ctx->fd = fd;
+ ctx->in_ptr = ctx->out_ptr = 0;
+ ctx->id_ctr = 0;
+ ctx->buf_size = 4 << 20;
+ memset(&ctx->events, 0, sizeof(ctx->events));
+ memset(&ctx->event_data, 0, sizeof(ctx->event_data));
+
+ msg.userspace_ver = make_version(0, 1, 0);
+
+ r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_GET_VERSION, &msg);
+ if (r < 0)
+ goto out;
+
+ dmu_ctl_send_queue(ctx);
+ dmu_ctl_recv_queue(ctx);
+
+ r = dmu_ctl_peek_queue(ctx, &type, (void**)&response);
+ if (r < 0)
+ goto out;
+
+ if (type != DM_USERSPACE_GET_VERSION) {
+ DPRINTF(stderr, "Got non-version ping back: %i\n", type);
+ goto out;
+ }
+
+ if (response->kernel_ver != msg.userspace_ver) {
+ DPRINTF(stderr, "Version mismatch: %x != %x\n",
+ msg.userspace_ver, response->kernel_ver);
+ goto out;
+ } else {
+ DPRINTF("Version match: %x == %x\n",
+ msg.userspace_ver, response->kernel_ver);
+ }
+
+ return ctx;
+
+ out:
+ if (ctx && ctx->in_buf)
+ free(ctx->in_buf);
+
+ if (ctx && ctx->out_buf)
+ free(ctx->out_buf);
+
+ if (ctx)
+ free(ctx);
+
+ return NULL;
+}
+
+int dmu_ctl_close(struct dmu_context *ctx)
+{
+ return close(ctx->fd);
+}
+
+void dmu_register_status_handler(struct dmu_context *ctx,
+ status_handler handler,
+ void *data)
+{
+ ctx->events.status_fn = handler;
+ ctx->event_data.status_user_data = data;
+}
+
+void dmu_register_map_handler(struct dmu_context *ctx,
+ map_req_handler handler,
+ void *data)
+{
+ ctx->events.map_fn = handler;
+ ctx->event_data.map_user_data = data;
+}
+
+int dmu_events_pending(struct dmu_context *ctx, unsigned int msec)
+{
+ fd_set fds;
+ struct timeval tv;
+
+ FD_ZERO(&fds);
+ FD_SET(ctx->fd, &fds);
+
+ tv.tv_sec = msec / 1000;
+ tv.tv_usec = (msec % 1000) * 1000;
+
+ if (select(ctx->fd + 1, &fds, NULL, NULL, &tv) < 0)
+ return 0;
+
+ if (FD_ISSET(ctx->fd, &fds))
+ return 1;
+ else
+ return 0;
+}
+
+static int fire_map_req_event(struct dmu_context *ctx,
+ struct dmu_msg_map_request *req,
+ uint32_t id)
+{
+ struct dmu_msg_map_response resp;
+ struct dmu_map_data data;
+ int ret;
+
+ if (!ctx->events.map_fn)
+ return 1;
+
+ DPRINTF("Map event for %llu %c\n",
+ req->org_block,
+ dmu_get_flag(&req->flags, DMU_FLAG_WR) ? 'W':'R');
+
+ data.block = req->org_block;
+ data.offset = 0;
+ data.id = id;
+ data.flags = req->flags;
+ data.dest_dev = data.copy_src_dev = 0;
+
+ dmu_clr_flag(&data.flags, DMU_FLAG_COPY_FIRST);
+ dmu_clr_flag(&data.flags, DMU_FLAG_SYNC);
+
+ ret = ctx->events.map_fn(ctx->event_data.map_user_data, &data);
+
+ resp.org_block = req->org_block;
+ resp.new_block = data.block;
+ resp.offset = data.offset;
+ resp.flags = data.flags;
+ resp.id_of_req = data.id;
+
+ dmu_split_dev(data.copy_src_dev, &resp.src_maj, &resp.src_min);
+ dmu_split_dev(data.dest_dev, &resp.dst_maj, &resp.dst_min);
+
+ DPRINTF("Mapped %llu -> %llu\n", resp.org_block, resp.new_block);
+
+ if (ret < 0)
+ dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_FAILED, &resp);
+ else
+ dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_BLOCK_RESP, &resp);
+
+ return ret;
+}
+
+static int fire_status_event(struct dmu_context *ctx,
+ struct dmu_msg_status *status,
+ uint32_t id)
+{
+ uint32_t user_code;
+
+ switch (status->status) {
+ case DM_USERSPACE_INVAL_COMPLETE:
+ user_code = DMU_STATUS_INVAL_COMPLETE;
+ break;
+ case DM_USERSPACE_INVAL_FAILED:
+ user_code = DMU_STATUS_INVAL_FAILED;
+ break;
+ case DM_USERSPACE_SYNC_COMPLETE:
+ user_code = DMU_STATUS_SYNC_COMPLETE;
+ break;
+ default:
+ user_code = DMU_STATUS_UNKNOWN;
+ };
+
+ if (ctx->events.status_fn)
+ ctx->events.status_fn(ctx->event_data.status_user_data,
+ status->id_of_op, user_code);
+
+ return 0;
+}
+
+static int decode_message(struct dmu_context *ctx, int type, uint32_t id,
+ uint8_t *msg)
+{
+ switch (type) {
+ case DM_USERSPACE_MAP_BLOCK_REQ:
+ DPRINTF("Request event: %u\n", id);
+ return fire_map_req_event(ctx,
+ (struct dmu_msg_map_request *)msg,
+ id);
+ case DM_USERSPACE_STATUS:
+ DPRINTF("Status event\n");
+ return fire_status_event(ctx,
+ (struct dmu_msg_status *)msg,
+ id);
+ default:
+ DPRINTF("Unknown message type: %i\n", type);
+ return -1; /* Unknown message type */
+ };
+}
+
+int dmu_process_events(struct dmu_context *ctx)
+{
+ struct dmu_msg_header *hdr;
+ int ptr = 0, ret, do_flush = 0;
+
+ if (!dmu_ctl_recv_queue(ctx))
+ return -1; /* Receive failed */
+
+ DPRINTF("Got %i bytes\n", ctx->in_ptr);
+
+ ptr = 0;
+ while (ptr < ctx->in_ptr) {
+ hdr = (struct dmu_msg_header *)&ctx->in_buf[ptr];
+ ptr += sizeof(*hdr);
+
+ ret = decode_message(ctx, hdr->msg_type, hdr->id,
+ &ctx->in_buf[ptr]);
+ if (ret > 0)
+ do_flush = 1;
+
+ ptr += hdr->payload_len;
+ };
+
+ ctx->in_ptr = 0;
+
+ if (do_flush) {
+ DPRINTF("Flushing outgoing message queue as requested\n");
+ dmu_ctl_send_queue(ctx);
+ }
+
+ return 1;
+}
+
diff -r 7ca9885684d9 -r 8c8d5dc4eaf4 tools/cowd/libdmu/dmu.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/libdmu/dmu.h Fri Aug 25 15:01:34 2006 -0500
@@ -0,0 +1,50 @@
+#ifndef __DMU_H
+#define __DMU_H
+
+#include <stdint.h>
+
+/**************
+ * dm-userspace
+ **************/
+
+enum {
+ DMU_STATUS_UNKNOWN = 0,
+ DMU_STATUS_BLOCK_FLUSHED,
+ DMU_STATUS_INVAL_COMPLETE,
+ DMU_STATUS_INVAL_FAILED,
+ DMU_STATUS_SYNC_COMPLETE
+};
+
+struct dmu_context;
+struct dmu_map_data;
+
+typedef int (*status_handler)(void *data, uint32_t id, uint32_t status);
+typedef int (*map_req_handler)(void *data, struct dmu_map_data *map_data);
+
+/* High-level control operations */
+struct dmu_context *dmu_ctl_open(char *dev, int flags);
+int dmu_ctl_close(struct dmu_context *ctx);
+int dmu_ctl_send_queue(struct dmu_context *ctx);
+void dmu_register_status_handler(struct dmu_context *ctx,
+ status_handler handler,
+ void *data);
+void dmu_register_map_handler(struct dmu_context *ctx,
+ map_req_handler handler,
+ void *data);
+int dmu_invalidate_block(struct dmu_context *ctx, uint64_t block);
+int dmu_sync_complete(struct dmu_context *ctx, uint32_t id);
+int dmu_events_pending(struct dmu_context *ctx, unsigned int msec);
+int dmu_process_events(struct dmu_context *ctx);
+
+/* Map manipulation functions */
+void dmu_map_set_block(struct dmu_map_data *data, uint64_t block);
+uint64_t dmu_map_get_block(struct dmu_map_data *data);
+void dmu_map_set_offset(struct dmu_map_data *data, int64_t offset);
+uint32_t dmu_map_get_id(struct dmu_map_data *data);
+void dmu_map_set_dest_dev(struct dmu_map_data *data, dev_t dev);
+void dmu_map_set_copy_src_dev(struct dmu_map_data *data, dev_t dev);
+void dmu_map_set_writable(struct dmu_map_data *data, int writable);
+int dmu_map_is_write(struct dmu_map_data *data);
+void dmu_map_set_sync(struct dmu_map_data *data);
+
+#endif
diff -r 7ca9885684d9 -r 8c8d5dc4eaf4 tools/cowd/libdmu/internal_renames
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/libdmu/internal_renames Fri Aug 25 15:01:34 2006 -0500
@@ -0,0 +1,23 @@
+dmu_ctl_open int__dmu_ctl_open
+dmu_clr_flag int__dmu_clr_flag
+dmu_ctl_close int__dmu_ctl_close
+dmu_ctl_queue_msg int__dmu_ctl_queue_msg
+dmu_ctl_recv_queue int__dmu_ctl_recv_queue
+dmu_ctl_send_queue int__dmu_ctl_send_queue
+dmu_events_pending int__dmu_events_pending
+dmu_invalidate_block int__dmu_invalidate_block
+dmu_map_get_block int__dmu_map_get_block
+dmu_map_get_id int__dmu_map_get_id
+dmu_map_is_write int__dmu_map_is_write
+dmu_map_set_block int__dmu_map_set_block
+dmu_map_set_copy_src_dev int__dmu_map_set_copy_src_dev
+dmu_map_set_dest_dev int__dmu_map_set_dest_dev
+dmu_map_set_offset int__dmu_map_set_offset
+dmu_map_set_sync int__dmu_map_set_sync
+dmu_map_set_writable int__dmu_map_set_writable
+dmu_process_events int__dmu_process_events
+dmu_register_map_handler int__dmu_register_map_handler
+dmu_register_status_handler int__dmu_register_status_handler
+dmu_set_flag int__dmu_set_flag
+dmu_split_dev int__dmu_split_dev
+dmu_sync_complete int__dmu_sync_complete
diff -r 7ca9885684d9 -r 8c8d5dc4eaf4 tools/cowd/libdmu/linux/dm-userspace.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/libdmu/linux/dm-userspace.h Fri Aug 25 15:01:34 2006 -0500
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef __DM_USERSPACE_H
+#define __DM_USERSPACE_H
+
+#include <linux/types.h>
+
+/*
+ * Message Types
+ */
+#define DM_USERSPACE_GET_VERSION 1
+#define DM_USERSPACE_MAP_BLOCK_REQ 2
+#define DM_USERSPACE_MAP_BLOCK_RESP 3
+#define DM_USERSPACE_MAP_FAILED 4
+#define DM_USERSPACE_MAP_INVALIDATE 5
+#define DM_USERSPACE_STATUS 6
+
+/*
+ * Status codes
+ */
+#define DM_USERSPACE_INVAL_COMPLETE 101
+#define DM_USERSPACE_INVAL_FAILED 102
+#define DM_USERSPACE_SYNC_COMPLETE 103
+
+/*
+ * Flags and associated macros
+ */
+#define DMU_FLAG_VALID 1
+#define DMU_FLAG_RD 2
+#define DMU_FLAG_WR 4
+#define DMU_FLAG_COPY_FIRST 8
+#define DMU_FLAG_TEMPORARY 16
+#define DMU_FLAG_INUSE 32
+#define DMU_FLAG_SYNC 64
+#define DMU_FLAG_WAITING 128
+
+static int dmu_get_flag(uint32_t *flags, uint32_t flag)
+{
+ return (*flags & flag) != 0;
+}
+
+static void dmu_set_flag(uint32_t *flags, uint32_t flag)
+{
+ *flags |= flag;
+}
+
+static void dmu_clr_flag(uint32_t *flags, uint32_t flag)
+{
+ *flags &= (~flag);
+}
+
+static void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag)
+{
+ *flags = (*flags & ~flag) | (src & flag);
+}
+
+/*
+ * This message header is sent in front of every message, in both
+ * directions
+ */
+struct dmu_msg_header {
+ uint32_t msg_type;
+ uint32_t payload_len;
+ uint32_t id;
+};
+
+/* DM_USERSPACE_GET_VERSION */
+struct dmu_msg_version {
+ uint32_t userspace_ver;
+ uint32_t kernel_ver;
+};
+
+/* For status codes */
+struct dmu_msg_status {
+ uint32_t id_of_op;
+ uint32_t status;
+};
+
+/* DM_USERSPACE_MAP_BLOCK_REQ */
+struct dmu_msg_map_request {
+ uint64_t org_block;
+
+ uint32_t flags;
+};
+
+/* DM_USERSPACE_MAP_BLOCK_RESP
+ * DM_USERSPACE_MAP_BLOCK_FAILED
+ */
+struct dmu_msg_map_response {
+ uint64_t org_block;
+ uint64_t new_block;
+ int64_t offset;
+
+ uint32_t id_of_req;
+ uint32_t flags;
+
+ uint32_t src_maj;
+ uint32_t src_min;
+
+ uint32_t dst_maj;
+ uint32_t dst_min;
+};
+
+/* DM_USERSPACE_MAP_INVALIDATE */
+struct dmu_msg_invalidate_map {
+ uint64_t org_block;
+};
+
+static inline int dmu_get_msg_len(int type)
+{
+ switch (type) {
+ case DM_USERSPACE_GET_VERSION:
+ return sizeof(struct dmu_msg_version);
+ case DM_USERSPACE_INVAL_COMPLETE:
+ case DM_USERSPACE_INVAL_FAILED:
+ case DM_USERSPACE_STATUS:
+ return sizeof(struct dmu_msg_status);
+ case DM_USERSPACE_MAP_BLOCK_REQ:
+ return sizeof(struct dmu_msg_map_request);
+ case DM_USERSPACE_MAP_BLOCK_RESP:
+ case DM_USERSPACE_MAP_FAILED:
+ return sizeof(struct dmu_msg_map_response);
+ case DM_USERSPACE_MAP_INVALIDATE:
+ return sizeof(struct dmu_msg_invalidate_map);
+ default:
+ return -1;
+ };
+}
+
+#endif
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 4 of 6] dscow plugin for dm-userspace userspace tool
[not found] <patchbomb.1156540578@venkman-64>
` (2 preceding siblings ...)
2006-08-25 21:24 ` [PATCH 3 of 6] dm-userspace internal libdmu support for userspace tool Ryan Grimm
@ 2006-08-25 21:24 ` Ryan Grimm
2006-08-25 21:24 ` [PATCH 5 of 6] qcow " Ryan Grimm
2006-08-25 21:24 ` [PATCH 6 of 6] dm-userspace backend script and xmexample change Ryan Grimm
5 siblings, 0 replies; 7+ messages in thread
From: Ryan Grimm @ 2006-08-25 21:24 UTC (permalink / raw)
To: Xen Devel; +Cc: Dan Smith
Signed-off-by: Ryan Grimm <grimm@us.ibm.com>
Signed-off-by: Dan Smith <danms@us.ibm.com>
# HG changeset patch
# User Ryan Grimm <grimm@us.ibm.com>
# Date 1156536095 18000
# Node ID a3656acd770b4f21ad54fa961032ff39562058eb
# Parent 8c8d5dc4eaf4f0044f7fdd5adb282359eff7263a
dscow plugin for dm-userspace userspace tool
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/Makefile.am
--- a/tools/cowd/Makefile.am Fri Aug 25 15:01:34 2006 -0500
+++ b/tools/cowd/Makefile.am Fri Aug 25 15:01:35 2006 -0500
@@ -1,3 +1,5 @@ EXTRA_DIST = libdmu/dmu.c libdmu/dmu.h l
+SUBDIRS = plugins
+
EXTRA_DIST = libdmu/dmu.c libdmu/dmu.h libdmu/internal_renames
bin_PROGRAMS = cowd
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/configure.in
--- a/tools/cowd/configure.in Fri Aug 25 15:01:34 2006 -0500
+++ b/tools/cowd/configure.in Fri Aug 25 15:01:35 2006 -0500
@@ -93,11 +93,15 @@ AC_SUBST(PLUGIN_DIR)
AC_SUBST(PLUGIN_DIR)
AC_SUBST(GLOBAL_CFLAGS)
-AC_CONFIG_FILES([Makefile])
+AC_CONFIG_FILES([Makefile
+ plugins/Makefile
+ plugins/dscow/Makefile])
# This just makes it easier to run cowd from the source directory
# for testing
mkdir -p lib
+ln -sf ../plugins/dscow/.libs/libcowd_dscow.so.0 lib/libcowd_dscow.so
+ln -sf ../plugins/dscow/.libs/libcowd_dscow.la lib/libcowd_dscow.la
AC_OUTPUT
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/cowd_control_loop.c
--- a/tools/cowd/cowd_control_loop.c Fri Aug 25 15:01:34 2006 -0500
+++ b/tools/cowd/cowd_control_loop.c Fri Aug 25 15:01:35 2006 -0500
@@ -1,6 +1,7 @@
/*
* Copyright (C) International Business Machines Corp., 2006
* Author: Dan Smith <danms@us.ibm.com>
+ * Author: Ryan Grimm <grimm@us.ibm.com>
*
* This file is subject to the terms and conditions of the GNU Lesser
* General Public License. See the file COPYING in the main directory
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/plugins/Makefile.am
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/Makefile.am Fri Aug 25 15:01:35 2006 -0500
@@ -0,0 +1,1 @@
+SUBDIRS = dscow
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/plugins/dscow/Makefile.am
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/dscow/Makefile.am Fri Aug 25 15:01:35 2006 -0500
@@ -0,0 +1,10 @@
+EXTRA_DIST = README
+
+lib_LTLIBRARIES = libcowd_dscow.la
+libcowd_dscow_la_CFLAGS = -I../.. -I../../../../module @GLOBAL_CFLAGS@
+libcowd_dscow_la_SOURCES = dscow_ops.c dscow_plugin.c dscow_ops.h
+libcowd_dscow_la_LIBADD =
+
+bin_PROGRAMS = dscow_tool
+dscow_tool_SOURCES = dscow_tool.c dscow_ops.c ../../util.c
+dscow_tool_CFLAGS = @GLOBAL_CFLAGS@
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/plugins/dscow/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/dscow/README Fri Aug 25 15:01:35 2006 -0500
@@ -0,0 +1,13 @@
+To create a dscow image, run "dscow_tool" as follows:
+
+ # dscow_tool -c foo.dscow /path/to/base.img
+
+Which will create a "foo.dscow" file that uses '/path/to/base.img' as
+its base and 64kb blocks.
+
+Then, start a cowd instance like this:
+
+ # cowd -p dscow mydev /path/to/foo.dscow
+
+The base image will be loaded automatically.
+
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/plugins/dscow/dscow_ops.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/dscow/dscow_ops.c Fri Aug 25 15:01:35 2006 -0500
@@ -0,0 +1,428 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#define _LARGEFILE64_SOURCE
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "dscow_ops.h"
+
+#define MAX_VERSION 1
+
+#if 0
+#define DPRINTF( s, arg... ) fprintf(stderr, s, ##arg)
+#else
+#define DPRINTF( s, arg... )
+#endif
+
+int dscow_errno = 0;
+
+void dscow_print_info(struct dscow *handle)
+{
+ uint32_t i;
+ uint64_t size_mb = handle->header.size >> 20;
+ uint32_t bsize_kb = handle->header.block_size >> 10;
+ uint64_t mapped_blocks = 0;
+
+ printf("Base Image: %s\n", handle->base_filename);
+ printf("Size: %llu (%llu MB)\n",
+ handle->header.size, size_mb);
+ printf("Block size: %lu (%lu KB)\n",
+ handle->header.block_size, bsize_kb);
+ printf("First block: %llu\n", handle->header.first_block);
+ printf("Blocks: %llu\n", handle->blocks);
+ printf("Bitmap count: %u\n", handle->bitmap_count);
+ printf("Format Version: %u (we support up to version %u)\n",
+ handle->header.version, MAX_VERSION);
+
+ if (!handle->bitmap)
+ return;
+
+ for (i = 0; i < handle->bitmap_count; i++) {
+ uint32_t field = handle->bitmap[i];
+ for (field = handle->bitmap[i]; field; field >>= 1)
+ mapped_blocks++;
+ DPRINTF(" Field %u: %08x\n", i, handle->bitmap[i]);
+ }
+
+ printf("Usage: %2.1f%% %lluMB (%llu blocks)\n",
+ ((float)mapped_blocks / handle->blocks) * 100.0,
+ (mapped_blocks * handle->header.block_size) >> 20,
+ mapped_blocks);
+
+}
+
+static int _dscow_read_bitmap(struct dscow *dscow)
+{
+ uint32_t i;
+ int ret = 0;
+
+ dscow_errno = 0;
+
+ dscow->bitmap = calloc(dscow->bitmap_count, sizeof(uint32_t));
+ if (!dscow->bitmap) {
+ dscow_errno = ENOMEM;
+ return -1;
+ }
+
+ DPRINTF("Seeking to bitmap @ %llu for read\n",
+ dscow->header.bitmap_offset);
+
+ lseek64(dscow->fd, dscow->header.bitmap_offset, SEEK_SET);
+
+ for (i = 0; i < dscow->bitmap_count; i++) {
+ ret = read(dscow->fd,
+ &dscow->bitmap[i],
+ sizeof(dscow->bitmap[i]));
+ if (ret < 0) {
+ dscow_errno = errno;
+ break;
+ }
+
+ if (dscow->bitmap[i] != 0)
+ DPRINTF(" Bitmap:%08x %08x\n", i, dscow->bitmap[i]);
+ }
+
+ return ret;
+}
+
+
+static int _dscow_write_bitmap(struct dscow *dscow)
+{
+ uint32_t i;
+ int ret = 0;
+
+ DPRINTF("Seeking to bitmap @ %llu for write \n",
+ dscow->header.bitmap_offset);
+
+ lseek64(dscow->fd, dscow->header.bitmap_offset, SEEK_SET);
+
+ for (i = 0; i < dscow->bitmap_count; i++) {
+ if (dscow->dirty_bitmaps[i]) {
+ lseek64(dscow->fd, i * sizeof(uint32_t), SEEK_CUR);
+
+ ret = write(dscow->fd,
+ &dscow->bitmap[i],
+ sizeof(dscow->bitmap[i]));
+ if (ret < 0) {
+ dscow_errno = errno;
+ break;
+ }
+ dscow->dirty_bitmaps[i] = 0;
+ }
+
+ if (dscow->bitmap[i] != 0)
+ DPRINTF(" Bitmap:%08x %08x\n", i, dscow->bitmap[i]);
+ }
+
+ return ret;
+}
+
+static int _dscow_read_base_file(struct dscow *dscow)
+{
+ int ret = 0;
+
+ dscow->base_filename = malloc(dscow->header.base_file_size+1);
+ memset(dscow->base_filename, 0, dscow->header.base_file_size);
+
+ lseek64(dscow->fd, dscow->header.base_file_offset, SEEK_SET);
+ ret = read(dscow->fd,
+ dscow->base_filename,
+ dscow->header.base_file_size);
+ dscow->base_filename[dscow->header.base_file_size] = '\0';
+
+ if (ret < 0) {
+ dscow_errno = errno;
+ }
+
+ return ret;
+}
+
+static int _dscow_write_base_file(struct dscow *dscow)
+{
+ int ret = 0;
+
+ lseek64(dscow->fd, dscow->header.base_file_offset, SEEK_SET);
+ ret = write(dscow->fd,
+ dscow->base_filename,
+ dscow->header.base_file_size);
+
+ if (ret < 0)
+ dscow_errno = errno;
+
+ return ret;
+}
+
+static int _dscow_read_disk_header(struct dscow *dscow)
+{
+ int ret = 0;
+
+ lseek64(dscow->fd, 0, SEEK_SET);
+
+ ret = read(dscow->fd, &dscow->header, sizeof(dscow->header));
+ if (ret < 0) {
+ dscow_errno = errno;
+ return ret;
+ } else if (ret != sizeof(dscow->header)) {
+ return 1;
+ }
+
+ dscow->blocks = dscow->header.size / dscow->header.block_size;
+ dscow->bitmap_count = dscow->blocks / (sizeof(uint32_t) * 8) + 1;
+
+ if (dscow->header.magic != DSCOW_MAGIC) {
+ dscow_errno = EINVAL;
+ return -1;
+ }
+
+ if (_dscow_read_bitmap(dscow) < 0)
+ return -1;
+
+ if (_dscow_read_base_file(dscow) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int _dscow_write_disk_header(struct dscow *dscow)
+{
+ int ret = 0;
+
+ lseek64(dscow->fd, 0, SEEK_SET);
+
+ ret = write(dscow->fd, &dscow->header, sizeof(dscow->header));
+ if (ret < 0) {
+ dscow_errno = errno;
+ return ret;
+ } else if (ret != sizeof(dscow->header)) {
+ return 1;
+ }
+
+ if (!_dscow_write_bitmap(dscow) < 0)
+ return -1;
+
+ if (!_dscow_write_base_file(dscow) < 0)
+ return -1;
+
+ return 0;
+}
+
+void dscow_cleanup(struct dscow *dscow)
+{
+ if (!dscow)
+ return;
+
+ if (dscow->bitmap)
+ free(dscow->bitmap);
+
+ if (dscow->base_filename)
+ free(dscow->base_filename);
+
+ free(dscow);
+}
+
+struct dscow *dscow_open(char *path)
+{
+ struct dscow *handle;
+ int ret;
+
+ handle = malloc(sizeof(*handle));
+ if (!handle) {
+ dscow_errno = ENOMEM;
+ return NULL;
+ }
+
+ handle->fd = open(path, O_RDWR | O_SYNC | O_LARGEFILE);
+ if (handle->fd < 0) {
+ DPRINTF("Failed to open %s\n", path);
+ dscow_errno = errno;
+ return NULL;
+ }
+
+ ret = _dscow_read_disk_header(handle);
+ if (ret < 0) {
+ dscow_cleanup(handle);
+ dscow_errno = EINVAL;
+ return NULL;
+ } else if (ret != 0) {
+ dscow_errno = EIO;
+ return NULL;
+ }
+
+ if (handle->header.version > MAX_VERSION) {
+ DPRINTF("Unknown format version %u\n",
+ handle->header.version);
+ /* FIXME: Leak */
+ dscow_errno = ENOTSUP;
+ return NULL;
+ }
+
+ handle->dirty_bitmaps = calloc(handle->bitmap_count,
+ sizeof(uint32_t));
+ handle->dirty = 0;
+
+ return handle;
+}
+
+struct dscow *dscow_create(char *filename, char *base, uint32_t block_size)
+{
+ struct dscow *handle;
+ struct stat st;
+ uint64_t bitmap_end;
+ uint64_t s2;
+ char x;
+ int ret;
+ off64_t offset;
+
+ handle = malloc(sizeof(*handle));
+ if (!handle) {
+ dscow_errno = ENOMEM;
+ return NULL;
+ }
+
+ handle->header.magic = DSCOW_MAGIC;
+ handle->header.version = 1;
+
+ handle->fd = open(filename,
+ O_RDWR | O_CREAT | O_LARGEFILE,
+ S_IRUSR | S_IWUSR);
+ if (handle->fd < 0) {
+ dscow_errno = errno;
+ perror("open");
+ dscow_cleanup(handle);
+ return NULL;
+ }
+
+ /* Virtual Size */
+ stat(base, &st);
+ if (S_ISREG(st.st_mode))
+ handle->header.size = st.st_size;
+ else if (S_ISBLK(st.st_mode)) {
+ get_device_size(base, &s2);
+ handle->header.size = s2;
+ }
+ else
+ return NULL; /* Unknown type */
+
+ handle->header.block_size = block_size;
+
+ handle->base_filename = malloc(strlen(base) + 1);
+ strcpy(handle->base_filename, base);
+
+ handle->blocks = handle->header.size / handle->header.block_size;
+ handle->bitmap_count = (handle->blocks / (sizeof(uint32_t) * 8)) + 1;
+
+ /* Base Filename Location */
+ handle->header.base_file_offset = sizeof(handle->header);
+ handle->header.base_file_size = strlen(handle->base_filename);
+
+ /* Bitmap Location */
+ handle->header.bitmap_offset = handle->header.base_file_offset +
+ handle->header.base_file_size;
+
+ /* First Block Location */
+ bitmap_end = handle->header.bitmap_offset +
+ (handle->bitmap_count * sizeof(uint32_t));
+ handle->header.first_block =
+ (bitmap_end + (handle->header.block_size - 1)) &
+ ~(handle->header.block_size - 1);
+
+ handle->bitmap = calloc(handle->bitmap_count, sizeof(uint32_t));
+ handle->dirty_bitmaps = calloc(handle->bitmap_count, sizeof(uint32_t));
+
+ ret = _dscow_write_disk_header(handle);
+ if (ret < 0) {
+ dscow_errno = errno;
+ dscow_cleanup(handle);
+ return NULL;
+ } else if (ret > 0) {
+ dscow_errno = EIO;
+ return NULL;
+ }
+
+ /* Inflate by seeking to the last byte in the file and then
+ writing to it */
+ offset = lseek64(handle->fd,
+ (handle->header.size + handle->header.first_block) - 1,
+ SEEK_SET);
+ if (offset != (handle->header.size + handle->header.first_block) - 1) {
+ DPRINTF("Failed to lseek to %llu\n",
+ (handle->header.size + handle->header.first_block)-1);
+ dscow_errno = EIO;
+ /* FIXME: Leak */
+ return NULL;
+ }
+ if (write(handle->fd, &x, 1) != 1) {
+ DPRINTF("Failed to write inflation byte\n");
+ dscow_errno = EIO;
+ return NULL;
+ }
+
+ handle->dirty = 0;
+
+ return handle;
+}
+
+int dscow_sync(struct dscow *handle)
+{
+ int ret = 0;
+
+ ret = _dscow_write_disk_header(handle);
+ if (ret != 0)
+ dscow_errno = EIO;
+ else
+ handle->dirty = 0;
+
+ return ret;
+}
+
+void dscow_close(struct dscow *handle)
+{
+ if (handle->dirty)
+ dscow_sync(handle);
+
+ close(handle->fd);
+
+ dscow_cleanup(handle);
+}
+
+void dscow_map_block(struct dscow *handle, uint64_t block)
+{
+ uint32_t field;
+ uint32_t bit;
+
+ field = block / (sizeof(uint32_t) * 8);
+ bit = block % (sizeof(uint32_t) * 8);
+
+ handle->dirty_bitmaps[field] = 1;
+ handle->bitmap[field] |= (1 << bit);
+
+ handle->dirty = 1;
+}
+
+int dscow_is_block_mapped(struct dscow *handle, uint64_t block)
+{
+ uint32_t field;
+ uint32_t bit;
+
+ field = block / (sizeof(uint32_t) * 8);
+ bit = block % (sizeof(uint32_t) * 8);
+
+ return (handle->bitmap[field] & (1 << bit)) != 0;
+}
+
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/plugins/dscow/dscow_ops.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/dscow/dscow_ops.h Fri Aug 25 15:01:35 2006 -0500
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#ifndef __DSCOW_OPS_H
+#define __DSCOW_OPS_H
+
+#include <stdint.h>
+
+#define DSCOW_MAGIC (('D'<<24) | ('a'<<16) | ('n'<<8) | ('S'))
+
+struct dscow_disk_header {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t size;
+ uint64_t base_file_offset;
+ uint32_t base_file_size;
+ uint64_t bitmap_offset;
+ uint64_t first_block;
+ uint32_t block_size;
+};
+
+struct dscow {
+ struct dscow_disk_header header;
+
+ uint64_t blocks;
+ uint32_t *bitmap;
+ uint32_t bitmap_count;
+ uint32_t *dirty_bitmaps;
+ char *base_filename;
+
+ int fd;
+ int dirty;
+};
+
+extern int dscow_errno;
+
+struct dscow *dscow_create(char *filename, char *base, uint32_t block_size);
+struct dscow *dscow_open(char *path);
+void dscow_close(struct dscow *handle);
+int dscow_sync(struct dscow *handle);
+int dscow_is_block_mapped(struct dscow *handle, uint64_t block);
+void dscow_map_block(struct dscow *handle, uint64_t block);
+void dscow_print_info(struct dscow *handle);
+#endif
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/plugins/dscow/dscow_plugin.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/dscow/dscow_plugin.c Fri Aug 25 15:01:35 2006 -0500
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ * Author: Ryan Grimm <grimm@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <stdint.h>
+
+#include <cowd_plugin.h>
+
+#include "dscow_ops.h"
+
+#define ERR_LEN 256
+#define PATH_LEN 256
+
+static char errmsg[ERR_LEN];
+
+struct dscow_prv {
+ char base_dev[PATH_LEN];
+ char cow_dev[PATH_LEN];
+ dev_t base;
+ dev_t cow;
+ struct dscow *dscow;
+};
+
+static dev_t *dscow_get_devs(struct cow_device *dev, int *count)
+{
+ struct dscow_prv *prv = dev->plugin_private;
+ dev_t *devs;
+
+ devs = malloc(sizeof(*devs) * 2);
+ if (!devs) {
+ *count = 0;
+ return NULL;
+ }
+
+ devs[0] = prv->base;
+ devs[1] = prv->cow;
+
+ *count = 2;
+ return devs;
+}
+
+static int dscow_init(struct cow_device *dev, int debug)
+{
+ struct dscow_prv *prv;
+ struct stat s;
+ char loop_dev[128];
+
+ debug = 1;
+
+ if (dev->plugin_num_args != 2) {
+ snprintf(errmsg, ERR_LEN, "Need two arguments");
+ return PLUGIN_FAIL;
+ }
+
+ prv = malloc(sizeof(*prv));
+ if (!prv) {
+ snprintf(errmsg, ERR_LEN, "Failed to allocate memory");
+ return PLUGIN_FAIL;
+ }
+
+ /* Need some grace here */
+ prv->dscow = dscow_open(dev->plugin_args[1]);
+ if (!prv->dscow) {
+ free(prv);
+ snprintf(errmsg, ERR_LEN, "Failed to open dscow: %s",
+ strerror(dscow_errno));
+ return PLUGIN_FAIL;
+ }
+
+ snprintf(prv->base_dev, PATH_LEN, "%s", prv->dscow->base_filename);
+ snprintf(prv->cow_dev, PATH_LEN, "%s", dev->plugin_args[1]);
+
+ if (is_file(prv->base_dev)) {
+ int ret;
+ int i = 0;
+ for (i = 0; i < 256; i++) {
+ sprintf(loop_dev, "/dev/loop%d", i);
+ ret = loop_setup(loop_dev, prv->base_dev);
+ if (ret)
+ break;
+ }
+ snprintf(prv->base_dev, PATH_LEN, loop_dev);
+ }
+
+ if (is_file(prv->cow_dev)) {
+ int ret;
+ int i = 0;
+ for (i = 0; i < 256; i++) {
+ sprintf(loop_dev, "/dev/loop%d", i);
+ ret = loop_setup(loop_dev, prv->cow_dev);
+ if (ret)
+ break;
+ }
+ snprintf(prv->cow_dev, PATH_LEN, loop_dev);
+ }
+
+ /* FIXME: replace this with a call */
+ dev->block_size = (uint64_t)prv->dscow->header.block_size;
+ dev->blocks = prv->dscow->blocks;
+
+ fflush(stdout);
+ stat(prv->base_dev, &s);
+ prv->base = s.st_rdev;
+
+ stat(prv->cow_dev, &s);
+ prv->cow = s.st_rdev;
+
+ dev->plugin_private = prv;
+
+ return PLUGIN_OK;
+}
+
+static int dscow_write_metadata(struct cow_device *dev)
+{
+ struct dscow_prv *prv = dev->plugin_private;
+
+ if (dscow_sync(prv->dscow)) {
+ snprintf(errmsg, ERR_LEN, "Failed to sync: %s",
+ strerror(dscow_errno));
+ return PLUGIN_FAIL;
+ } else {
+ return PLUGIN_OK;
+ }
+}
+
+static bool dscow_need_flush(struct cow_device *dev)
+{
+ struct dscow_prv *prv = dev->plugin_private;
+
+ return prv->dscow->dirty;
+}
+
+static int dscow_map(struct cow_device *dev, struct dmu_map_data *data)
+{
+ struct dscow_prv *prv = dev->plugin_private;
+ uint64_t block = dmu_map_get_block(data);
+
+ if (dscow_is_block_mapped(prv->dscow, block)) {
+ dmu_map_set_block(data,
+ block + (prv->dscow->header.first_block /
+ prv->dscow->header.block_size));
+ dmu_map_set_dest_dev(data, prv->cow);
+ /* FIXME: Might as well mark as write here! */
+ } else if (dmu_map_is_write(data)) {
+ dmu_map_set_block(data,
+ block + (prv->dscow->header.first_block /
+ prv->dscow->header.block_size));
+ dmu_map_set_copy_src_dev(data, prv->base);
+ dmu_map_set_dest_dev(data, prv->cow);
+ } else {
+ /* Don't change block */
+ dmu_map_set_dest_dev(data, prv->base);
+ }
+
+ return PLUGIN_OK;
+}
+
+static int dscow_map_complete(struct cow_device *dev, uint64_t org_block)
+{
+ struct dscow_prv *prv = dev->plugin_private;
+
+ dscow_map_block(prv->dscow, org_block);
+
+ return PLUGIN_OK;
+}
+
+static void dscow_cleanup(struct cow_device *dev)
+{
+ struct dscow_prv *prv = dev->plugin_private;
+
+ if (!prv)
+ return;
+
+ dscow_close(prv->dscow);
+
+ loop_destroy(prv->base_dev);
+ loop_destroy(prv->cow_dev);
+
+ free(prv);
+}
+
+int load_plugin(struct cowd_plugin *p)
+{
+ p->init_plugin = dscow_init;
+ p->write_metadata = dscow_write_metadata;
+ p->map_prepare = dscow_map;
+ p->map_complete = dscow_map_complete;
+ p->cleanup_plugin = dscow_cleanup;
+ p->need_flush = dscow_need_flush;
+ p->errmsg = errmsg;
+ p->get_devs = dscow_get_devs;
+
+ return 1;
+}
diff -r 8c8d5dc4eaf4 -r a3656acd770b tools/cowd/plugins/dscow/dscow_tool.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/dscow/dscow_tool.c Fri Aug 25 15:01:35 2006 -0500
@@ -0,0 +1,190 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+
+#include "dscow_ops.h"
+
+enum verbosity {NORMAL, LOUD, QUIET};
+
+usage(char *name)
+{
+ printf("Usage: %s [OPTIONS] [IMAGE] [BASE]\n"
+ "Create a dscow image\n"
+ "\n"
+ "Options:\n"
+ " -c,--create Create a new dscow image\n"
+ " -i,--info Display information about a dscow image\n"
+ " -V,--version Display version information\n"
+ " -v,--verbose Be verbose during image creation\n"
+ " -b,--block-size=KB Set the block size in KB (default 64)\n"
+ " -q,--quiet Be quiet\n"
+ "\n"
+ " IMAGE is the file to contain the changes\n"
+ " BASE is the base read-only image\n",
+ name);
+}
+
+int do_create(char *image, char *base, unsigned long bs, int verbose)
+{
+ struct dscow *handle;
+
+ if (access(base, R_OK)) {
+ perror(base);
+ return 1;
+ }
+
+ handle = dscow_create(image, base, bs << 10);
+
+ if (handle) {
+ if (verbose == LOUD)
+ dscow_print_info(handle);
+ else if (verbose == NORMAL)
+ fprintf(stderr, "Created %s\n", image);
+ } else {
+ fprintf(stderr, "Failed: %s\n", strerror(dscow_errno));
+ return 1;
+ }
+
+ dscow_close(handle);
+
+ return 0;
+}
+
+int do_info(char *image, int verbose)
+{
+ struct dscow *handle;
+ char msg[256];
+
+ memset(msg, 0, 256);
+
+ if (access(image, R_OK)) {
+ perror(image);
+ return 1;
+ }
+
+ handle = dscow_open(image);
+
+ if (handle) {
+ if (verbose != QUIET)
+ dscow_print_info(handle);
+ } else {
+ if (dscow_errno == EINVAL) {
+ snprintf(msg, 256, "Not a dscow file");
+ } else if (dscow_errno == ENOTSUP) {
+ snprintf(msg, 256, "File created with newer tools");
+ } else {
+ strerror_r(dscow_errno, msg, 256);
+ }
+ if (verbose != QUIET)
+ printf("Unable to read %s: %s\n", image, msg);
+ return 1;
+ }
+
+ dscow_close(handle);
+
+ return 0;
+}
+
+int do_version()
+{
+ printf("dscow_tool v0.1.0\n"
+ "\n"
+ "Copyright (C) International Business Machines Corp., 2006\n"
+ "Dan Smith <danms@us.ibm.com>\n");
+
+ return 0;
+}
+
+int process_arguments(int argc, char **argv)
+{
+ int c;
+ enum {Create, Info, Version, Error} mode;
+ int verbose = NORMAL;
+ unsigned long block_size = 64;
+
+ while (1) {
+ int this_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_opts[] = {
+ {"create", 0, 0, 'c'},
+ {"info", 0, 0, 'i'},
+ {"version", 0, 0, 'V'},
+ {"verbose", 0, 0, 'v'},
+ {"block-size", 1, 0, 'b'},
+ {"quiet", 0, 0, 'q'},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long(argc, argv, "ciVvb:q",
+ long_opts, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'c':
+ mode = Create;
+ break;
+
+ case 'V':
+ mode = Version;
+ break;
+
+ case 'i':
+ mode = Info;
+ break;
+
+ case 'v':
+ verbose = LOUD;
+ break;
+
+ case 'b':
+ block_size = strtoul(optarg, NULL, 0);
+ break;
+
+ case 'q':
+ verbose = QUIET;
+ break;
+ };
+ }
+
+ if (mode == Create) {
+ if ((argc - optind) != 2) {
+ usage(argv[0]);
+ return 1;
+ } else {
+ return do_create(argv[optind], argv[optind+1],
+ block_size, verbose);
+ }
+ } else if (mode == Info) {
+ if ((argc - optind) != 1) {
+ usage(argv[0]);
+ return 1;
+ } else {
+ return do_info(argv[optind], verbose);
+ }
+ } else if (mode == Version) {
+ return do_version();
+ } else {
+ usage(argv[0]);
+ return 1;
+ }
+}
+
+int main(int argc, char **argv)
+{
+ return process_arguments(argc, argv);
+}
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 5 of 6] qcow plugin for dm-userspace userspace tool
[not found] <patchbomb.1156540578@venkman-64>
` (3 preceding siblings ...)
2006-08-25 21:24 ` [PATCH 4 of 6] dscow plugin for dm-userspace " Ryan Grimm
@ 2006-08-25 21:24 ` Ryan Grimm
2006-08-25 21:24 ` [PATCH 6 of 6] dm-userspace backend script and xmexample change Ryan Grimm
5 siblings, 0 replies; 7+ messages in thread
From: Ryan Grimm @ 2006-08-25 21:24 UTC (permalink / raw)
To: Xen Devel; +Cc: Dan Smith
Signed-off-by: Ryan Grimm <grimm@us.ibm.com>
Signed-off-by: Dan Smith <danms@us.ibm.com>
# HG changeset patch
# User Ryan Grimm <grimm@us.ibm.com>
# Date 1156536096 18000
# Node ID be4574d288030b64d4623dd33505d0990185a6b9
# Parent a3656acd770b4f21ad54fa961032ff39562058eb
qcow plugin for dm-userspace userspace tool
diff -r a3656acd770b -r be4574d28803 tools/cowd/configure.in
--- a/tools/cowd/configure.in Fri Aug 25 15:01:35 2006 -0500
+++ b/tools/cowd/configure.in Fri Aug 25 15:01:36 2006 -0500
@@ -95,11 +95,13 @@ AC_SUBST(GLOBAL_CFLAGS)
AC_CONFIG_FILES([Makefile
plugins/Makefile
+ plugins/qcow/Makefile
plugins/dscow/Makefile])
# This just makes it easier to run cowd from the source directory
# for testing
mkdir -p lib
+ln -sf ../plugins/qcow/.libs/libcowd_qcow.so.0 lib/libcowd_qcow.so
ln -sf ../plugins/dscow/.libs/libcowd_dscow.so.0 lib/libcowd_dscow.so
ln -sf ../plugins/dscow/.libs/libcowd_dscow.la lib/libcowd_dscow.la
diff -r a3656acd770b -r be4574d28803 tools/cowd/plugins/Makefile.am
--- a/tools/cowd/plugins/Makefile.am Fri Aug 25 15:01:35 2006 -0500
+++ b/tools/cowd/plugins/Makefile.am Fri Aug 25 15:01:36 2006 -0500
@@ -1,1 +1,1 @@ SUBDIRS = dscow
-SUBDIRS = dscow
+SUBDIRS = dscow qcow
diff -r a3656acd770b -r be4574d28803 tools/cowd/plugins/qcow/Makefile.am
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/qcow/Makefile.am Fri Aug 25 15:01:36 2006 -0500
@@ -0,0 +1,3 @@
+lib_LTLIBRARIES = libcowd_qcow.la
+libcowd_qcow_la_CFLAGS = -I../.. -I../../../../module @GLOBAL_CFLAGS@
+libcowd_qcow_la_SOURCES = qcow_plugin.c qcow_ops.c qcow.h qcow_ops.h
diff -r a3656acd770b -r be4574d28803 tools/cowd/plugins/qcow/qcow.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/qcow/qcow.h Fri Aug 25 15:01:36 2006 -0500
@@ -0,0 +1,167 @@
+#ifndef __QCOW_H
+#define __QCOW_H
+
+#include <sys/types.h>
+#include <stdint.h>
+
+#define NAME "qcow: "
+#define L2_CACHE_SIZE 16
+
+#define QCOW_MAGIC (('Q' << 24) | ('F' << 16) | ('I' << 8) | 0xFB)
+
+typedef uint64_t qcow_te ;
+
+struct qcow_header {
+ uint32_t magic;
+ uint32_t version;
+ uint64_t backing_filename_offset;
+ uint32_t backing_filename_size;
+ uint32_t mtime;
+ uint64_t size;
+ uint8_t cluster_bits;
+ uint8_t l2_bits;
+ uint32_t crypto_method;
+ uint64_t l1_table_offset;
+};
+
+struct l1_entry {
+ int dirty;
+ uint64_t loc_on_disk;
+ qcow_te *table;
+};
+
+struct l2_cache {
+ uint64_t l1_index;
+ struct l1_entry *l2;
+ uint32_t hits;
+};
+
+struct qcow {
+ struct qcow_header header;
+ struct l1_entry *table;
+ int l1_dirty;
+
+ char *filename;
+
+ struct l2_cache *l2_cache[L2_CACHE_SIZE];
+ int l2_cache_counter;
+
+ uint64_t next_avail_block;
+
+ /* This is the offset we use for the file. The problem is,
+ the kernel module expects to remap whole aligned blocks.
+ Since we can interleave L2 blocks with differently-sized */
+ uint64_t offset;
+
+ int fd;
+
+};
+
+static inline uint64_t qcow_num_l1(struct qcow_header *h)
+{
+ uint64_t nonl1 = (1 << h->cluster_bits) * (1 << h->l2_bits);
+
+ if (h->size % nonl1)
+ return (h->size / nonl1) + 1;
+ else
+ return (h->size / nonl1);
+}
+
+static inline uint64_t qcow_num_l2(struct qcow_header *h)
+{
+ return 1 << h->l2_bits;
+}
+
+static inline uint64_t qcow_block_size(struct qcow_header *h)
+{
+ return 1 << h->cluster_bits;
+}
+
+static inline uint64_t qcow_cmask(struct qcow_header *h)
+{
+ return (1 << h->cluster_bits) - 1;
+}
+
+static inline uint64_t qcow_l2mask(struct qcow_header *h)
+{
+ return ((1 << h->l2_bits) - 1) << h->cluster_bits;
+}
+
+static inline uint64_t qcow_l1mask(struct qcow_header *h)
+{
+ return ~(qcow_cmask(h) | qcow_l2mask(h));
+}
+
+/* These are i386, little-endian.
+ * Clearly this needs to be generalized.
+ */
+static inline uint64_t ntohll(uint64_t value)
+{
+ uint32_t a, b;
+
+ a = value >> 32;
+ b = value & 0xFFFFFFFF;
+
+ return (((uint64_t)ntohl(b)) << 32) | ntohl(a);
+}
+
+static inline uint64_t htonll(uint64_t value)
+{
+ uint32_t a, b;
+
+ a = value >> 32;
+ b = value & 0xFFFFFFFF;
+
+ return (((uint64_t)htonl(b)) << 32) | htonl(a);
+}
+
+static inline uint64_t qcow_get_l1_entry(struct qcow *qcow, uint64_t sector)
+{
+ uint64_t entry;
+
+ entry = sector & qcow_l1mask(&qcow->header);
+ entry = entry >>
+ (qcow->header.l2_bits + qcow->header.cluster_bits);
+
+// printf("L1 entry: %llx %llx\n",
+// sector & qcow_l1mask(&qcow->header),
+// entry);
+
+ return entry;
+}
+
+static inline uint64_t qcow_get_l2_entry(struct qcow *qcow, uint64_t sector)
+{
+ return (sector & qcow_l2mask(&qcow->header)) >>
+ (qcow->header.cluster_bits);
+}
+
+static inline uint64_t qcow_make_te(struct qcow *qcow,
+ uint64_t l1_entry,
+ uint64_t l2_entry,
+ uint64_t index)
+{
+ uint64_t te = 0;
+
+ te |= (l1_entry << (qcow->header.l2_bits + qcow->header.cluster_bits));
+ te |= (l2_entry << (qcow->header.cluster_bits));
+ te |= (index & qcow_cmask(&qcow->header));
+
+ return te;
+}
+
+static inline uint64_t qcow_calc_offset(struct qcow *qcow,
+ uint64_t sector)
+{
+ return sector % qcow_block_size(&qcow->header);
+}
+
+static inline uint64_t qcow_align_to_block(struct qcow *qcow,
+ uint64_t sector)
+{
+ return sector / qcow_block_size(&qcow->header);
+}
+
+
+
+#endif
diff -r a3656acd770b -r be4574d28803 tools/cowd/plugins/qcow/qcow_ops.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/qcow/qcow_ops.c Fri Aug 25 15:01:36 2006 -0500
@@ -0,0 +1,603 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <netinet/in.h> /* for endian ops */
+
+#include "qcow.h"
+#include "qcow_ops.h"
+
+#define OPS_DEBUG 0
+
+#define LOG_ABOVE (128 << 20)
+
+/*
+ * Update our pointer to the end of the highest-block, if appropriate.
+ * This is used to allocate new blocks and tables, because it's the
+ * end of the last chunk of real data.
+ */
+static int qcow_seen_block(struct qcow *qcow,
+ qcow_te sector,
+ qcow_te block_size)
+{
+ uint64_t end;
+
+ end = sector + block_size;
+
+ if (end > qcow->header.size) {
+ fprintf(stderr,
+ "*** ERROR: Saw block %llu beyond end of %llu\n",
+ end, qcow->header.size);
+ return -1;
+ }
+
+ if (qcow->next_avail_block < end) {
+ qcow->next_avail_block = end;
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+void qcow_init_qcow(struct qcow *qcow)
+{
+ qcow->table = NULL;
+ qcow->filename = NULL;
+ qcow->l1_dirty = 0;
+ qcow->next_avail_block = 0;
+}
+
+/*
+ * Read in the qcow header from @fd, storing it in @header
+ * If @w!=0, then write instead. (Although not yet :)
+ */
+int qcow_rw_header(int fd, struct qcow_header *header, int w)
+{
+ int ret;
+ loff_t offset = 0;
+
+ if (dio_lseek(fd, offset, SEEK_SET) != offset) {
+ fprintf(stderr, "Offset was %lli instead of 0\n", offset);
+ perror("dio_lseek");
+ return 0;
+ }
+
+ if (w)
+ ret = dio_write(fd, header, sizeof(*header));
+ else
+ ret = dio_read(fd, header, sizeof(*header));
+
+ if (ret == 0) {
+ fprintf(stderr, "Read 0 bytes for header (%i)!\n",
+ sizeof(*header));
+ return 0;
+ } else if (ret < 0) {
+ perror("qcow-header");
+ return 0;
+ }
+
+ /* Convert endianess */
+ header->magic = ntohl(header->magic);
+ header->version = ntohl(header->version);
+ header->backing_filename_offset =
+ ntohll(header->backing_filename_offset);
+ header->backing_filename_size = ntohl(header->backing_filename_size);
+ header->mtime = ntohl(header->mtime);
+ header->size = ntohll(header->size);
+ header->crypto_method = ntohl(header->crypto_method);
+ header->l1_table_offset = ntohll(header->l1_table_offset);
+
+ return 1;
+}
+
+/*
+ * Read the backing filename information
+ */
+int qcow_read_backing_info(struct qcow *qcow)
+{
+ int ret;
+
+ qcow->filename = (char *)malloc(qcow->header.backing_filename_size+1);
+
+ ret = dio_lseek(qcow->fd,
+ qcow->header.backing_filename_offset,
+ SEEK_SET);
+ if (ret != qcow->header.backing_filename_offset) {
+ fprintf(stderr, "lseek to %llu\n",
+ qcow->header.backing_filename_offset);
+ perror("qcow-lseek");
+ return 0;
+ }
+
+ ret = dio_read(qcow->fd, qcow->filename,
+ qcow->header.backing_filename_size);
+ if (ret != qcow->header.backing_filename_size) {
+ fprintf(stderr, "qcow-read: EOF while reading filename\n");
+ return 0;
+ }
+ qcow->filename[qcow->header.backing_filename_size] = '\0';
+
+ return 1;
+}
+
+/*
+ * Allocate memory for the L1 table
+ */
+int qcow_init_from_header(struct qcow *qcow)
+{
+ qcow->table = (struct l1_entry *)calloc(qcow_num_l1(&qcow->header),
+ sizeof(struct l1_entry));
+
+ if (qcow->table == NULL) {
+ fprintf(stderr,
+ NAME "*** failed to calloc %llu x %i for table\n",
+ qcow_num_l1(&qcow->header), sizeof(qcow_te*));
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * Write @table to @fd:@pos, @length entries
+ */
+static int qcow_write_table(int fd, uint64_t pos,
+ uint64_t length, qcow_te *table)
+{
+ qcow_te *disk_table;
+ int64_t i;
+
+ disk_table = (qcow_te *)calloc(length,
+ sizeof(qcow_te));
+ if (!disk_table)
+ goto bad;
+
+ for (i=0; i < length; i++)
+ disk_table[i] = htonll(table[i]);
+
+ if (dio_lseek(fd, pos, SEEK_SET) != pos) {
+ perror("write_table lseek");
+ goto bad;
+ }
+
+ if (OPS_DEBUG)
+ fprintf(stderr, "Writing %llu bytes @ %llu\n",
+ length * sizeof(qcow_te), pos);
+
+ i = dio_write(fd, disk_table, length * sizeof(qcow_te));
+
+ if (i != (length * sizeof(qcow_te))) {
+ fprintf(stderr,
+ "Short write: %lli/%llu\n",
+ i,
+ length * sizeof(qcow_te));
+
+ goto bad;
+ } else if (i < 0) {
+ perror("write");
+ goto bad;
+ }
+
+ return 1;
+
+ bad:
+ return 0;
+}
+
+/*
+ * Read @table from @fd:@pos, @length entries
+ */
+static int qcow_read_table(int fd, uint64_t pos,
+ uint64_t length, qcow_te *table)
+{
+ qcow_te *disk_table;
+ int64_t i;
+
+ disk_table = (qcow_te *)calloc(length,
+ sizeof(qcow_te));
+ if (!disk_table)
+ goto bad;
+
+ if (dio_lseek(fd, pos, SEEK_SET) != pos) {
+ perror("lseek");
+ goto bad;
+ }
+
+ i = dio_read(fd, disk_table, length * sizeof(qcow_te));
+
+ if (i != (length * sizeof(qcow_te))) {
+ fprintf(stderr,
+ "Short read: %lli/%llu\n",
+ i,
+ length * sizeof(qcow_te));
+ goto bad;
+ } else if (i < 0) {
+ perror("read");
+ goto bad;
+ }
+
+ for (i=0; i < length; i++)
+ table[i] = htonll(disk_table[i]);
+
+ return 1;
+
+ bad:
+ return 0;
+}
+
+static int qcow_load_l2(struct qcow *qcow, qcow_te index)
+{
+ int ret;
+ struct l1_entry *entry;
+
+ entry = &qcow->table[index];
+
+ if (entry->loc_on_disk == 0) {
+ fprintf(stderr,
+ "*** ERROR: Trying to load non-existent L2 %llu\n",
+ index);
+ return 0;
+ }
+
+ if (entry->table != NULL)
+ fprintf(stderr,
+ "*** WARNING: Reloading L2 table %llu\n",
+ index);
+
+ entry->table = calloc(qcow_num_l2(&qcow->header),
+ sizeof(qcow_te));
+
+ if (entry->table == NULL)
+ return 0;
+
+ ret = qcow_read_table(qcow->fd,
+ entry->loc_on_disk,
+ qcow_num_l2(&qcow->header),
+ entry->table);
+
+ return ret;
+}
+
+/*
+ * Allocate a new L2 table on disk
+ */
+static int alloc_new_l2_on_disk(struct qcow *qcow, uint64_t l1_entry)
+{
+ uint64_t num, count, j;
+ struct l1_entry *entry;
+
+ if (qcow_num_l2(&qcow->header) < qcow_block_size(&qcow->header))
+ num = qcow_block_size(&qcow->header) /
+ qcow_num_l2(&qcow->header);
+
+ else
+ num = 1;
+
+ qcow->l1_dirty = 1;
+
+ entry = &qcow->table[l1_entry];
+
+ entry->dirty = 1;
+ entry->table = calloc(qcow_num_l2(&qcow->header), sizeof(qcow_te));
+ entry->loc_on_disk = qcow->next_avail_block;
+ qcow_seen_block(qcow, entry->loc_on_disk,
+ qcow_num_l2(&qcow->header) * sizeof(qcow_te));
+
+ printf("New L2 entry %llu @ %llu\n",
+ l1_entry, entry->loc_on_disk);
+
+ /* We try to distribute the extra to other non-allocated L2s,
+ so that we don't get too far away from our block
+ alignment */
+ count = num - 1;
+
+ /* Right now, we ignore the case that we might've allocated
+ too much space and didn't use it all. In the normal case
+ of 512-byte blocks, this isn't an issue */
+ return 1; /* FIXME */
+
+ j = 0;
+ while ((count > 0) && (j < qcow_num_l1(&qcow->header))) {
+ if (qcow->table[j].loc_on_disk == 0) {
+ qcow->table[j].loc_on_disk = qcow->next_avail_block;
+ qcow_seen_block(qcow,
+ qcow->table[j].loc_on_disk,
+ qcow_num_l2(&qcow->header) *
+ sizeof(qcow_te));
+ count--;
+ }
+ j++;
+ }
+
+ return 1;
+}
+
+/* Write out any dirty L2 tables, and the L1 table if needed */
+int qcow_write_tables(struct qcow *qcow)
+{
+ uint64_t l1_size;
+ uint64_t l2_size;
+ qcow_te *l1_table;
+ uint64_t i;
+ int ret;
+
+ l1_size = qcow_num_l1(&qcow->header);
+ l2_size = qcow_num_l2(&qcow->header);
+
+ for (i=0; i < l1_size; i++) {
+
+ if (qcow->table[i].dirty && qcow->table[i].loc_on_disk) {
+ qcow->table[i].dirty = 0;
+ if (OPS_DEBUG)
+ fprintf(stderr, "writing L2 %llu @ %llu\n",
+ i, qcow->table[i].loc_on_disk);
+ ret = qcow_write_table(qcow->fd,
+ qcow->table[i].loc_on_disk,
+ l2_size,
+ qcow->table[i].table);
+
+ if (ret != 1) {
+ fprintf(stderr,
+ NAME "failed to write L2 %i\n", i);
+ return 0;
+ }
+ }
+ }
+
+ if (qcow->l1_dirty) {
+ l1_table = (qcow_te *)calloc(l1_size,
+ sizeof(qcow_te));
+
+ for (i=0; i < l1_size; i++) {
+ l1_table[i] = qcow->table[i].loc_on_disk;
+ if (l1_table[i])
+ printf("Writing L1 entry %llu: %llu\n",
+ i, l1_table[i]);
+ }
+ ret = qcow_write_table(qcow->fd, qcow->header.l1_table_offset,
+ l1_size, l1_table);
+
+ free(l1_table);
+
+ qcow->l1_dirty = 0;
+
+ if (ret != 1) {
+ fprintf(stderr, NAME "failed to write L1\n");
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/*
+ * Read in the L1 table. We read L2 tables as needed based on the
+ * information in the l1.
+ */
+int qcow_read_tables(struct qcow *qcow)
+{
+ int i;
+ uint64_t numL1;
+ qcow_te *disk_table;
+
+ numL1 = qcow_num_l1(&qcow->header);
+
+ disk_table = (qcow_te *)calloc(numL1,
+ sizeof(qcow_te));
+
+ dio_lseek(qcow->fd, qcow->header.l1_table_offset, SEEK_SET);
+ dio_read(qcow->fd, disk_table, numL1 * sizeof(qcow_te));
+
+ for (i=0; i < numL1; i++) {
+
+ qcow->table[i].table = NULL;
+ qcow->table[i].dirty = 0;
+
+ qcow->table[i].loc_on_disk = ntohll(disk_table[i]);
+ }
+
+ if (OPS_DEBUG)
+ qcow_print_header(qcow);
+
+ return 1;
+}
+
+
+/* Map the block of logical to physical */
+int qcow_make_mapping(struct qcow *qcow, uint64_t logical, uint64_t physical)
+{
+ uint64_t l1_entry;
+ uint64_t l2_entry;
+
+ l1_entry = qcow_get_l1_entry(qcow, logical);
+ l2_entry = qcow_get_l2_entry(qcow, logical);
+
+ if (l1_entry >= qcow_num_l1(&qcow->header)) {
+ fprintf(stderr, "*** ERROR: L1 of %llu >= %llu, max\n",
+ l1_entry,
+ qcow_num_l1(&qcow->header));
+ fprintf(stderr, " Offending new map: %llu -> %llu\n",
+ logical, physical);
+ return 0;
+ }
+
+ if (l2_entry >= qcow_num_l2(&qcow->header)) {
+ fprintf(stderr, "*** ERRROR: L2 of %llu >= %llu, max\n",
+ l2_entry,
+ qcow_num_l2(&qcow->header));
+ return 0;
+ }
+
+ if (logical >= LOG_ABOVE) {
+ printf("Making mapping for %llu -> %llu\n"
+ " l1e: %llu l2e: %llu\n",
+ logical, physical, l1_entry, l2_entry);
+ }
+
+ if (qcow->table[l1_entry].table == NULL) {
+ if (qcow->table[l1_entry].loc_on_disk == 0) {
+
+ if (!alloc_new_l2_on_disk(qcow, l1_entry)) {
+ fprintf(stderr,
+ "Failed to alloc table for %llu\n",
+ l1_entry);
+ return 0;
+ }
+
+ qcow->l1_dirty = 1;
+ } else {
+ if (!qcow_load_l2(qcow, l1_entry)) {
+ fprintf(stderr,
+ "Failed to load L2 %llu\n",
+ l1_entry);
+ return 0;
+ }
+ }
+ }
+
+ qcow->table[l1_entry].table[l2_entry] = physical;
+ qcow->table[l1_entry].dirty = 1;
+
+ return 1;
+}
+
+/*
+ * Make a new mapping for @logical by selecting the next available
+ * block
+ */
+uint64_t qcow_make_new_mapping(struct qcow *qcow, uint64_t logical)
+{
+ uint64_t new_block;
+
+ new_block = qcow->next_avail_block;
+
+ /* Align to the next 512-byte boundary */
+ /* FIXME: Do we want to leak space like this? */
+ if (new_block % 512) {
+ new_block += (512 - (new_block % 512));
+ }
+
+ qcow_seen_block(qcow, new_block, qcow_block_size(&qcow->header));
+
+ if (qcow_make_mapping(qcow, logical, new_block))
+ return new_block;
+ else
+ return 0; /* Block 0 is error, because it contains the
+ header and L1, etc*/
+
+}
+
+/* Return the location that @logical maps to, 0 if unmapped */
+uint64_t qcow_get_mapping(struct qcow *qcow, uint64_t logical)
+{
+ uint64_t l1_entry;
+ uint64_t l2_entry;
+ uint64_t physical;
+ qcow_te *l2_table;
+
+ l1_entry = qcow_get_l1_entry(qcow, logical);
+ l2_entry = qcow_get_l2_entry(qcow, logical);
+
+ if (l1_entry >= qcow_num_l1(&qcow->header)) {
+ fprintf(stderr, "*** ERROR: L1 of %llu >= %llu, max\n",
+ l1_entry,
+ qcow_num_l1(&qcow->header));
+ fprintf(stderr, " Offending map request: %llu (%llx)\n",
+ logical, logical);
+ fprintf(stderr, " l1_entry: %llu\n", l1_entry);
+ fprintf(stderr, " l2_entry: %llu\n", l2_entry);
+
+ return 0;
+ }
+
+ if (l2_entry >= qcow_num_l2(&qcow->header)) {
+ fprintf(stderr, "*** ERROR: L2 of %llu >= %llu, max\n",
+ l2_entry,
+ qcow_num_l2(&qcow->header));
+ return 0;
+ }
+
+ if (qcow->table[l1_entry].table == NULL) {
+ if (qcow->table[l1_entry].loc_on_disk == 0) {
+ /* Not mapped if no L2 table */
+ return 0;
+ } else {
+ /* Need to load it in */
+ if (!qcow_load_l2(qcow, l1_entry))
+ return 0;
+ }
+ }
+
+ l2_table = qcow->table[l1_entry].table;
+
+ physical = l2_table[l2_entry];
+
+ if (OPS_DEBUG && 0) {
+ printf("L1 entry: %llu\n", l1_entry);
+ printf("L2 entry: %llu\n", l2_entry);
+ printf("Physical Block: %llu (%llx)\n", physical, physical);
+ printf("\n");
+ }
+
+ return physical;
+}
+
+/*
+ * Return 1 if any of the tables need flushing, 0 otherwise
+ */
+int qcow_is_anything_dirty(struct qcow *qcow)
+{
+ uint64_t i;
+
+ if (qcow->l1_dirty)
+ return 1;
+
+ for (i=0; i < qcow_num_l1(&qcow->header); i++) {
+ if (qcow->table[i].dirty)
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Print out the header in human-readable format
+ */
+void qcow_print_header(struct qcow *qcow)
+{
+ printf("=== QCOW HEADER ===\n");
+ printf("Magic: %x (should be %x)\n", qcow->header.magic,
+ QCOW_MAGIC);
+ printf("Version: %x\n", qcow->header.version);
+ printf("File Offset: %llu\n", qcow->header.backing_filename_offset);
+ printf("File Size: %u\n", qcow->header.backing_filename_size);
+ printf("Mod Time: %u\n", qcow->header.mtime);
+ printf("Size: %llu\n", qcow->header.size);
+ printf("Cluster Bits: %hhu\n", qcow->header.cluster_bits);
+ printf("L2 Bits: %hhu\n", qcow->header.l2_bits);
+ printf("Crypto Method: %x\n", qcow->header.crypto_method);
+ printf("L1 Table @: %llu\n", qcow->header.l1_table_offset);
+ printf("Backing File: %s\n", qcow->filename);
+
+ printf("\nCalculated Information:\n");
+
+ printf("Next Avail: %llu\n", qcow->next_avail_block);
+ printf("Cluster Mask: %016llx\n", qcow_cmask(&qcow->header));
+ printf("L2 Mask: %016llx\n", qcow_l2mask(&qcow->header));
+ printf("L1 Mask: %016llx\n", qcow_l1mask(&qcow->header));
+ printf("Num L1: %llu\n", qcow_num_l1(&qcow->header));
+ printf("Num L2: %llu\n", qcow_num_l2(&qcow->header));
+}
diff -r a3656acd770b -r be4574d28803 tools/cowd/plugins/qcow/qcow_ops.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/qcow/qcow_ops.h Fri Aug 25 15:01:36 2006 -0500
@@ -0,0 +1,15 @@
+#ifndef __QCOW_OPS_H
+#define __QCOW_OPS_H
+
+#include <stdint.h>
+
+#include "qcow.h"
+
+int qcow_rw_header(int fd, struct qcow_header *header, int write);
+int qcow_read_backing_info(struct qcow *qcow);
+int qcow_read_l1_table(struct qcow *qcow);
+uint64_t qcow_get_mapping(struct qcow *qcow, uint64_t logical);
+
+void qcow_print_header(struct qcow *qcow);
+
+#endif
diff -r a3656acd770b -r be4574d28803 tools/cowd/plugins/qcow/qcow_plugin.c
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/cowd/plugins/qcow/qcow_plugin.c Fri Aug 25 15:01:36 2006 -0500
@@ -0,0 +1,519 @@
+/*
+ * Copyright (C) International Business Machines Corp., 2006
+ * Author: Dan Smith <danms@us.ibm.com>
+ *
+ * This file is subject to the terms and conditions of the GNU Lesser
+ * General Public License. See the file COPYING in the main directory
+ * of this archive for more details.
+ *
+ */
+
+#define _LARGEFILE64_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdint.h>
+
+#include <cowd_plugin.h>
+
+#include <libdevmapper.h>
+
+#include "qcow.h"
+#include "qcow_ops.h"
+
+#define MAX_PATH 256
+#define ERR_LEN 256
+
+#define LOOP_SET_FD 0x4C00
+#define LOOP_CLR_FD 0x4C01
+
+char errmsg[ERR_LEN];
+
+struct qcow_private {
+ struct qcow qcow;
+ int init;
+
+ char base_path[MAX_PATH];
+ char qcow_path[MAX_PATH];
+
+ char base_dev[MAX_PATH];
+ char qcow_dev[MAX_PATH];
+
+ dev_t base_dev_t;
+ dev_t qcow_dev_t;
+
+ unsigned long qcow_size;
+
+ int debug;
+};
+
+static dev_t *qcow_get_devs(struct cow_device *dev, int *count)
+{
+ struct qcow_private *prv = dev->plugin_private;
+ struct stat s;
+ dev_t *devs;
+
+ devs = malloc(sizeof(*dev) * 2);
+ if (!devs) {
+ count = 0;
+ return NULL;
+ }
+
+ devs[0] = prv->base_dev_t;
+ devs[1] = prv->qcow_dev_t;
+
+ *count = 2;
+ return devs;
+}
+
+/*
+ * We use the loop driver to make the base and cow files available to
+ * the kernel. The problem is, we can't grow the cow file underneath
+ * the loop device. This function "inflates" the cow file to the
+ * maximum size it could be before we hook it up to the loop driver.
+ * This isn't *too* bad because it's just a sparse file.
+ */
+static int qcow_inflate(struct qcow_private *prv)
+{
+ struct qcow_header header;
+ int fd;
+ uint64_t offset;
+
+ fd = open(prv->qcow_path, O_RDWR | O_LARGEFILE);
+ if (fd < 0) {
+ perror("open");
+ return PLUGIN_FAIL;
+ }
+
+ if (read(fd, &header, sizeof(header)) != sizeof(header)) {
+ perror("read");
+ return PLUGIN_FAIL;
+ }
+
+ header.size = ntohll(header.size);
+
+ offset = lseek64(fd, header.size, SEEK_SET);
+
+ if (prv->debug)
+ printf("## Inflating %s to %llu (%llu)\n",
+ prv->qcow_path, offset, header.size);
+
+ if (offset != header.size) {
+ fprintf(stderr, "Failed to lseek to %llu\n", header.size);
+ return PLUGIN_FAIL;
+ }
+
+ if (write(fd, &fd, 1) != 1) {
+ perror("write");
+ close(fd);
+ return PLUGIN_FAIL;
+ }
+
+ close(fd);
+ return PLUGIN_OK;
+}
+
+/*
+ * This "deflates" the sparse cow file back to the appropriate size
+ */
+static void qcow_deflate(struct qcow_private *prv)
+{
+ printf("### Truncating %s to %llu\n",
+ prv->qcow_path, prv->qcow.next_avail_block);
+ truncate(prv->qcow_path, prv->qcow.next_avail_block);
+}
+
+/*
+ * Process the subset of arguments we were given
+ */
+static void qcow_process_args(int argc, char **argv, struct qcow_private *prv)
+{
+ int c, optidx = 0;
+ int i;
+ static struct option lopts[] = {
+ {"qcow", 1, 0, 'q'},
+ {"base", 1, 0, 'b'},
+ {"debug", 0, 0, 'd'},
+ {0, 0, 0, 0 }
+ };
+
+ strcpy(prv->qcow_path, argv[1]);
+ return;
+
+ /* getopt doesn't like the way I've arranged the strings for
+ some reason, so we just use the first arg as the qcow file
+ for now */
+
+ for (i=0; i<argc; i++)
+ fprintf(stderr, "Arg %i: %s\n", i, argv[i]);
+
+ while (1) {
+ fprintf(stderr, "Going... %s\n", argv[0]);
+ c = getopt_long(argc, argv, "db:q:", lopts, &optidx);
+ if (c == -1)
+ break;
+
+ fprintf(stderr, "Got: %c\n", c);
+
+ switch (c) {
+
+ case 'b':
+ strncpy(prv->base_path, optarg, MAX_PATH);
+ break;
+ case 'q':
+ strncpy(prv->qcow_path, optarg, MAX_PATH);
+ break;
+ case 'd':
+ prv->debug = 1;
+ break;
+ };
+
+ }
+}
+
+#if 0
+/*
+ * Hook up @path to @dev
+ *
+ * NB: This doesn't work at the moment!
+ */
+static int qcow_loop_setup(const char *path, char *dev)
+{
+ int fd, lfd;
+ int i, ret;
+ char ldevpath[256];
+
+ fd = open(path, O_RDWR);
+ if (fd < 0)
+ return 0;
+
+ for (i = 0; i < 8; i++) {
+ snprintf(ldevpath, 256, "/dev/loop%i", i);
+ lfd = open(ldevpath, O_RDWR);
+ if (lfd < 0) {
+ fprintf(stderr, "Failed to open %s\n", ldevpath);
+ continue;
+ }
+ ret = ioctl(lfd, LOOP_SET_FD, fd);
+ close(lfd);
+ if (ret == 0) {
+ strcpy(dev, ldevpath);
+ close(fd);
+ return 1;
+ } else {
+ fprintf(stderr, "ioctl() failed:\n");
+ perror(ldevpath);
+ }
+ }
+
+ close(fd);
+
+ printf("No free loops for file: %s\n", path);
+
+ return 0;
+}
+
+/*
+ * Detach @dev from its backing file
+ *
+ * NB: This isn't used at the moment!
+ */
+static int qcow_loop_destroy(const char *dev)
+{
+ int lfd;
+ int ret;
+
+ lfd = open(dev, O_RDWR);
+ if (lfd < 0)
+ return 0;
+
+ ret = ioctl(lfd, LOOP_CLR_FD, 0);
+
+ if (ret == 0)
+ return 1;
+ else
+ return 0;
+}
+#endif
+
+/*
+ * Call the appropriate qcow functions to initialize all our metadata
+ * and accounting information.
+ */
+static int qcow_read_metadata(struct cow_device *dev, int force_init)
+{
+ struct qcow_private *prv = dev->plugin_private;
+ int ret;
+
+ if (force_init) {
+ /* At some point, we want to be able to format a
+ qcow file outselves */
+ fprintf(stderr, "The QCOW plugin doesn't support init yet\n");
+ return PLUGIN_FAIL;
+ }
+
+ if (! prv->init) {
+ if (prv->debug)
+ printf("Init qcow from header\n");
+ ret = qcow_init_from_header(&prv->qcow);
+ if (! ret)
+ return PLUGIN_FAIL;
+
+ dev->block_size = 1 << prv->qcow.header.cluster_bits;
+ if (prv->debug)
+ fprintf(stderr, "Block size: %u\n", dev->block_size);
+
+ dev->blocks = get_device_blocks(prv->base_dev) /
+ (dev->block_size / 512);
+
+ if (prv->debug)
+ fprintf(stderr, "Blocks: %lu\n", dev->blocks);
+ }
+
+ if (prv->debug)
+ printf("Reading tables\n");
+
+ ret = qcow_read_tables(&prv->qcow);
+ if (! ret)
+ return PLUGIN_FAIL;
+
+ prv->init = 1;
+
+ return PLUGIN_OK;
+
+}
+
+static int qcow_init(struct cow_device *dev, int debug)
+{
+
+ struct qcow_private *prv;
+ struct stat s;
+
+ if (debug)
+ printf(NAME "init\n");
+
+ memset(errmsg, 0, ERR_LEN);
+
+ prv = (struct qcow_private *)malloc(sizeof(*prv));
+ prv->qcow.fd = -1;
+ prv->init = 0;
+ prv->debug = debug;
+ prv->base_path[0] = '\0';
+ prv->qcow_path[0] = '\0';
+ qcow_init_qcow(&prv->qcow);
+
+ qcow_process_args(dev->plugin_num_args, dev->plugin_args, prv);
+
+ if (prv->qcow_path[0] == '\0') {
+ snprintf(errmsg, ERR_LEN, "Path to qcow file is required!\n");
+ return PLUGIN_FAIL;
+ }
+
+ prv->qcow.next_avail_block = get_file_size(prv->qcow_path);
+ prv->qcow_size = get_file_size(prv->qcow_path);
+
+ /* Dirty hack to get around the fact that loop-backed files
+ can't grow */
+ qcow_inflate(prv);
+
+ if (prv->debug)
+ printf("Opening %s\n", prv->qcow_path);
+
+ prv->qcow.fd = dio_open(prv->qcow_path, O_RDWR);
+ if (prv->qcow.fd < 0) {
+ snprintf(errmsg, ERR_LEN,
+ "Failed to open %s with O_DIRECT: %s\n",
+ prv->qcow_path, strerror(prv->qcow.fd));
+ return PLUGIN_FAIL;
+ }
+
+ if (prv->debug)
+ printf("### Preset next_avail_block to %llu\n",
+ prv->qcow.next_avail_block);
+
+ /* Init the qcow header information */
+ qcow_rw_header(prv->qcow.fd, &prv->qcow.header, 0);
+ qcow_read_backing_info(&prv->qcow);
+ qcow_print_header(&prv->qcow);
+
+ /* FIXME: REMOVE */
+ qcow_print_header(&prv->qcow);
+
+ /* Get the files/devices setup for device-mapper */
+ if (prv->base_path[0] == '\0')
+ strcpy(prv->base_path, prv->qcow.filename);
+
+ /* FIXME: Need real loop support!! */
+
+ if (is_file(prv->base_path)) {
+ sprintf(prv->base_dev, "/dev/loop0");
+ loop_destroy(prv->base_dev);
+ loop_setup(prv->base_dev, prv->base_path);
+ }
+
+ if (is_file(prv->qcow_path)) {
+ sprintf(prv->qcow_dev, "/dev/loop1");
+ loop_destroy(prv->qcow_dev);
+ loop_setup(prv->qcow_dev, prv->qcow_path);
+ }
+
+#if 0
+ if (is_file(prv->qcow_path))
+ qcow_loop_setup(prv->qcow_path, prv->qcow_dev);
+ else
+ strcpy(prv->qcow_dev, prv->qcow_path);
+
+ if (is_file(prv->base_path))
+ qcow_loop_setup(prv->base_path, prv->base_dev);
+ else
+ strcpy(prv->base_dev, prv->base_path);
+#endif
+
+ stat(prv->base_dev, &s);
+ prv->base_dev_t = s.st_rdev;
+
+ stat(prv->qcow_dev, &s);
+ prv->qcow_dev_t = s.st_rdev;
+
+ if (prv->debug)
+ printf("Base Device: %s Cow Device: %s\n",
+ prv->base_dev, prv->qcow_dev);
+
+ dev->plugin_private = prv;
+
+ return qcow_read_metadata(dev, 0);
+
+}
+
+static void qcow_cleanup(struct cow_device *dev)
+{
+ /* FIXME: Do something more useful here */
+ struct qcow_private *prv = dev->plugin_private;
+
+ close(prv->qcow.fd);
+
+ /* FIXME: Need to check if these are actually loops */
+ loop_destroy(prv->base_dev);
+ loop_destroy(prv->qcow_dev);
+
+ qcow_deflate(prv);
+
+ /* Free some stuff */
+}
+
+
+static int qcow_write_metadata(struct cow_device *dev)
+{
+ struct qcow_private *prv = dev->plugin_private;
+ int ret;
+
+ if (prv->debug)
+ printf("Writing metadata!\n");
+
+ ret = qcow_write_tables(&prv->qcow);
+
+ if (! ret)
+ return PLUGIN_FAIL;
+ else
+ return PLUGIN_OK;
+}
+
+static int qcow_map_block(struct cow_device *dev,
+ struct dmu_map_data *map)
+{
+
+ struct qcow_private *prv = dev->plugin_private;
+ uint64_t tmp;
+ uint64_t org, org_block;
+
+ /*
+ * FIXME: This is super ugly
+ */
+
+ /* Convert to start byte position of cluster */
+ org_block = dmu_map_get_block(map);
+ org = org_block << prv->qcow.header.cluster_bits;
+
+ if (prv->debug && 0)
+ fprintf(stderr, "Looking for existing map for %llu\n",
+ org_block);
+
+ tmp = qcow_get_mapping(&prv->qcow, org);
+ if ((tmp == 0) && dmu_map_is_write(map)) {
+ /* NEW mapping for WRITE access:
+ Remap to somewhere in the cow device */
+ if (prv->debug && 0)
+ fprintf(stderr, "Not found, mapping...\n");
+
+ tmp = qcow_make_new_mapping(&prv->qcow, (uint64_t)org);
+
+ dmu_map_set_block(map, qcow_align_to_block(&prv->qcow, tmp));
+ dmu_map_set_offset(map, qcow_calc_offset(&prv->qcow, tmp));
+
+ dmu_map_set_copy_src_dev(map, prv->base_dev_t);
+ dmu_map_set_dest_dev(map, prv->qcow_dev_t);
+
+ } else if ((tmp == 0) && !dmu_map_is_write(map)) {
+ /* NEW mapping for READ access:
+ Remap to the same place in the base device */
+ dmu_map_set_block(map, org_block);
+
+ dmu_map_set_dest_dev(map, prv->base_dev_t);
+
+ } else if ((tmp == 0) && dmu_map_is_write(map)) {
+ /* This should not be allowed */
+ return PLUGIN_FAIL;
+ } else {
+ /* OLD mapping (access doesn't matter):
+ Remap to the correct location in the cow device */
+ dmu_map_set_block(map,
+ qcow_align_to_block(&prv->qcow, tmp));
+ dmu_map_set_offset(map,
+ qcow_calc_offset(&prv->qcow, tmp));
+ if (prv->debug)
+ fprintf(stderr,
+ "Found existing map for %llu: %llx (%llu)\n",
+ org, tmp, tmp);
+
+ dmu_map_set_dest_dev(map, prv->qcow_dev_t);
+ }
+
+ if (tmp > prv->qcow.header.size) {
+ snprintf(errmsg, ERR_LEN,
+ "Tried to map a block beyond end of device: "
+ "%llu > %llu\n",
+ tmp, prv->qcow.header.size);
+ return PLUGIN_FAIL;
+ }
+
+ return PLUGIN_OK;
+}
+
+static bool qcow_need_flush(struct cow_device *dev)
+{
+
+ struct qcow_private *prv = dev->plugin_private;
+
+ return qcow_is_anything_dirty(&prv->qcow);
+}
+
+int load_plugin(struct cowd_plugin *p)
+{
+
+ p->init_plugin = qcow_init;
+ p->write_metadata = qcow_write_metadata;
+ p->map_prepare = qcow_map_block;
+ p->cleanup_plugin = qcow_cleanup;
+ p->need_flush = qcow_need_flush;
+ p->errmsg = errmsg;
+ p->get_devs = qcow_get_devs;
+
+ return 1;
+
+}
^ permalink raw reply [flat|nested] 7+ messages in thread* [PATCH 6 of 6] dm-userspace backend script and xmexample change
[not found] <patchbomb.1156540578@venkman-64>
` (4 preceding siblings ...)
2006-08-25 21:24 ` [PATCH 5 of 6] qcow " Ryan Grimm
@ 2006-08-25 21:24 ` Ryan Grimm
5 siblings, 0 replies; 7+ messages in thread
From: Ryan Grimm @ 2006-08-25 21:24 UTC (permalink / raw)
To: Xen Devel; +Cc: Dan Smith
Signed-off-by: Ryan Grimm <grimm@us.ibm.com>
Signed-off-by: Dan Smith <danms@us.ibm.com>
# HG changeset patch
# User Ryan Grimm <grimm@us.ibm.com>
# Date 1156536098 18000
# Node ID 55e3795e5bc46ea6a205e60ebf27ed9faf306616
# Parent be4574d288030b64d4623dd33505d0990185a6b9
dm-userspace backend script and xmexample change
diff -r be4574d28803 -r 55e3795e5bc4 tools/examples/Makefile
--- a/tools/examples/Makefile Fri Aug 25 15:01:36 2006 -0500
+++ b/tools/examples/Makefile Fri Aug 25 15:01:38 2006 -0500
@@ -32,6 +32,7 @@ XEN_SCRIPTS += vtpm vtpm-delete
XEN_SCRIPTS += vtpm vtpm-delete
XEN_SCRIPTS += xen-hotplug-cleanup
XEN_SCRIPTS += external-device-migrate
+XEN_SCRIPTS += block-dmu
XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
XEN_SCRIPT_DATA += xen-hotplug-common.sh xen-network-common.sh vif-common.sh
XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh vtpm-hotplug-common.sh
diff -r be4574d28803 -r 55e3795e5bc4 tools/examples/README
--- a/tools/examples/README Fri Aug 25 15:01:36 2006 -0500
+++ b/tools/examples/README Fri Aug 25 15:01:38 2006 -0500
@@ -13,6 +13,7 @@ block-common.sh - sourced by block,
block-common.sh - sourced by block, block-*
block-enbd - binds/unbinds network block devices
block-nbd - binds/unbinds network block devices
+block-dmu - binds/unbinds dm-userspace devices
external-device-migrate - called by xend for migrating external devices
locking.sh - locking functions to prevent concurrent access to
critical sections inside script files
diff -r be4574d28803 -r 55e3795e5bc4 tools/examples/xmexample1
--- a/tools/examples/xmexample1 Fri Aug 25 15:01:36 2006 -0500
+++ b/tools/examples/xmexample1 Fri Aug 25 15:01:38 2006 -0500
@@ -64,6 +64,13 @@ vif = [ '' ]
# and MODE is r for read-only, w for read-write.
disk = [ 'phy:hda1,hda1,w' ]
+
+#----------------------------------------------------------------------------
+# Using a dm-userspace backed device
+# for dmu, the syntax is dmu:<plugin>:<cow file>:<base file>
+# if <cow file> does not exist, it is created and <base file> is used
+# as a base. if <cow file> does exist, <base file> is ignored.
+# disk = [ 'dmu:dscow:/path/to/domain.dscow:/path/to/domain.img,hda1,w']
#----------------------------------------------------------------------------
# Define to which TPM instance the user domain should communicate.
diff -r be4574d28803 -r 55e3795e5bc4 tools/examples/block-dmu
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/examples/block-dmu Fri Aug 25 15:01:38 2006 -0500
@@ -0,0 +1,57 @@
+#!/bin/sh
+#
+# Copyright (C) International Business Machines Corp., 2006
+# Author: Ryan Grimm <grimm@us.ibm.com>
+
+dir=$(dirname "$0")
+. "$dir/block-common.sh"
+
+p=$(xenstore_read "$XENBUS_PATH/params")
+mode=$(xenstore_read "$XENBUS_PATH/mode")
+
+wait_for_cowd_exit() {
+ while ps ax | grep -v grep | grep cowd | grep -q $1; do
+ sleep 1
+ done
+}
+
+case "$command" in
+ add)
+ plugin=$(echo $p | cut -d: -f1)
+ dmu_file=$(echo $p | cut -d: -f2)
+ base_file=$(echo $p | cut -d: -f3)
+
+ domain_name=$(xenstore_read "$XENBUS_PATH/domain")
+ domain_dev=$(xenstore_read "$XENBUS_PATH/dev")
+ target="$domain_name""_""$domain_dev"
+ md_dev="/dev/mapper/$target"
+
+ lsmod | grep -q dm_user || modprobe dm-user || fatal \
+ 'cannot load module dm-user'
+
+ wait_for_cowd_exit $target
+
+ if [ ! -e $dmu_file ]
+ then
+ dscow_tool -c $dmu_file $base_file || fatal \
+ 'creation of $dmu_file failed'
+ fi
+
+ cowd --sync --pidfile=/var/run/cowd.$target.pid -p $plugin \
+ $target $dmu_file || fatal 'cowd failed'
+
+ xenstore_write "$XENBUS_PATH/node" "$target"
+
+ claim_lock "block"
+ write_dev $md_dev
+ release_lock "block"
+ exit 0
+ ;;
+
+ remove)
+ node=$(xenstore_read "$XENBUS_PATH/node")
+ cowd_pid=$(cat "/var/run/cowd.$node.pid")
+ kill $cowd_pid
+ exit 0
+ ;;
+esac
^ permalink raw reply [flat|nested] 7+ messages in thread