* [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects
@ 2009-05-21 14:06 Boaz Harrosh
2009-05-21 14:09 ` [PATCH 1/2 " Boaz Harrosh
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Boaz Harrosh @ 2009-05-21 14:06 UTC (permalink / raw)
To: Jeff Garzik, linux-scsi, open-osd mailing-list; +Cc: Jens Axboe
I'm posting for review a new version of the osdblk driver. What's new?
* Once block/for-2.6.31 and all pending osd patches hit mainline. this new version
is ready for submission.
- The relevant osd patches have been posted on the mailing list, but I'll send an orderly
set for scsi-misc and scsi-post-merge on Sunday.
- All the prerequisite block patches are already in Jens's tree.
* Below is the diff from Jeff's last version of the patch. these things have changed:
{SQUASHME: osdblk} Block and OSD Api fixups and bug fixes
- Block API changes from Tejuns revamps
- OSD Api changes for supporting bio-chaining
- do_flush requests do not need bio clonning
(And might not have any so prevent such a crash)
- osdblk_make_credential is here to stay
- Use bio_kmalloc and avoid the bio_alloc dead/live locks.
TODO: Split request into smaller chunks if allocations fail.
- Only use __GFP_WAIT on first bio allocation. (Not relevant since
__GFP_WAIT is not used)
* Added an extra patch:
- [PATCH 2/2] osdblk: Adjust queue limits to lower device's limits
This is ontop of the post-merge tree. Jeff? will you push this driver
through your tree?
What is left is to bang some serious testing on this driver. I'll do
that next.
Thanks
Boaz
---
drivers/block/osdblk.c | 48 +++++++++++++++++++++---------------------------
1 files changed, 21 insertions(+), 27 deletions(-)
---
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index c7a1bb7..531d234 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -118,13 +118,13 @@ static struct block_device_operations osdblk_bd_ops = {
static const struct osd_attr g_attr_logical_length = ATTR_DEF(
OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
-/* copied from exofs; move to libosd? */
-static void osd_make_credential(u8 cred_a[OSD_CAP_LEN],
- const struct osd_obj_id *obj)
+static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN],
+ const struct osd_obj_id *obj)
{
osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
}
+/* copied from exofs; move to libosd? */
/*
* Perform a synchronous OSD operation. copied from exofs; move to libosd?
*/
@@ -216,17 +216,6 @@ out:
}
-static void osdblk_end_request(struct osdblk_device *osdev,
- struct osdblk_request *orq,
- int error)
-{
- struct request *rq = orq->rq;
- int rc;
-
- /* complete request, at block layer */
- rc = __blk_end_request(rq, error, blk_rq_bytes(rq));
-}
-
static void osdblk_osd_complete(struct osd_request *or, void *private)
{
struct osdblk_request *orq = private;
@@ -240,7 +229,7 @@ static void osdblk_osd_complete(struct osd_request *or, void *private)
osd_end_request(or);
/* complete request passed to osdblk by block layer */
- osdblk_end_request(orq->osdev, orq, ret);
+ __blk_end_request_all(orq->rq, ret);
}
static void bio_chain_put(struct bio *chain)
@@ -260,10 +249,12 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
struct bio *tmp, *new_chain = NULL, *tail = NULL;
while (old_chain) {
- tmp = bio_clone(old_chain, gfpmask);
+ tmp = bio_kmalloc(gfpmask, old_chain->bi_vcnt);
if (!tmp)
goto err_out;
+ __bio_clone(tmp, old_chain);
+ gfpmask &= ~__GFP_WAIT;
tmp->bi_next = NULL;
if (!new_chain)
new_chain = tail = tmp;
@@ -293,13 +284,13 @@ static void osdblk_rq_fn(struct request_queue *q)
while (1) {
/* peek at request from block layer */
- rq = elv_next_request(q);
+ rq = blk_fetch_request(q);
if (!rq)
break;
/* filter out block requests we don't understand */
if (!blk_fs_request(rq) && !blk_barrier_rq(rq)) {
- end_request(rq, 0);
+ blk_end_request_all(rq, 0);
continue;
}
@@ -313,10 +304,13 @@ static void osdblk_rq_fn(struct request_queue *q)
do_flush = (rq->special == (void *) 0xdeadbeefUL);
do_write = (rq_data_dir(rq) == WRITE);
- /* a bio clone to be passed down to OSD request */
- bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
- if (!bio)
- break;
+ if (!do_flush) { /* osd_flush does not use a bio */
+ /* a bio clone to be passed down to OSD request */
+ bio = bio_chain_clone(rq->bio, GFP_ATOMIC);
+ if (!bio)
+ break;
+ } else
+ bio = NULL;
/* alloc internal OSD request, for OSD command execution */
or = osd_start_request(osdev->osd, GFP_ATOMIC);
@@ -335,11 +329,11 @@ static void osdblk_rq_fn(struct request_queue *q)
osd_req_flush_object(or, &osdev->obj,
OSD_CDB_FLUSH_ALL, 0, 0);
else if (do_write)
- osd_req_write(or, &osdev->obj, bio,
- rq->sector * 512ULL);
+ osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
+ bio, blk_rq_bytes(rq));
else
- osd_req_read(or, &osdev->obj, bio,
- rq->sector * 512ULL);
+ osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL,
+ bio, blk_rq_bytes(rq));
/* begin OSD command execution */
if (osd_async_op(or, osdblk_osd_complete, orq,
@@ -527,7 +521,7 @@ static ssize_t class_osdblk_add(struct class *c, const char *buf, size_t count)
}
/* build OSD credential */
- osd_make_credential(osdev->obj_cred, &osdev->obj);
+ osdblk_make_credential(osdev->obj_cred, &osdev->obj);
/* register our block device */
irc = register_blkdev(0, osdev->name);
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH 1/2 version 4] osdblk: a Linux block device for OSD objects 2009-05-21 14:06 [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects Boaz Harrosh @ 2009-05-21 14:09 ` Boaz Harrosh 2009-05-21 14:11 ` [PATCH 2/2] osdblk: Adjust queue limits to lower device's limits Boaz Harrosh 2009-05-21 22:41 ` [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects Jeff Garzik 2 siblings, 0 replies; 9+ messages in thread From: Boaz Harrosh @ 2009-05-21 14:09 UTC (permalink / raw) To: Jeff Garzik, linux-scsi, open-osd mailing-list; +Cc: Jens Axboe From: Jeff Garzik <jeff@garzik.org> TODO: Commit log NOT-Signed-off-by: Jeff Garzik <jgarzik@redhat.com> WILL-Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> --- drivers/block/Kconfig | 16 ++ drivers/block/Makefile | 1 + drivers/block/osdblk.c | 663 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 680 insertions(+), 0 deletions(-) create mode 100644 drivers/block/osdblk.c diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index f42fa50..15d1b94 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -298,6 +298,22 @@ config BLK_DEV_NBD If unsure, say N. +config BLK_DEV_OSD + tristate "OSD object-as-blkdev support" + depends on SCSI_OSD_INITIATOR + ---help--- + Saying Y or M here will allow the exporting of a single SCSI + OSD (object-based storage) object as a Linux block device. + + For example, if you create a 2G object on an OSD device, + you can then use this module to present that 2G object as + a Linux block device. + + To compile this driver as a module, choose M here: the + module will be called osdblk. + + If unsure, say N. + config BLK_DEV_SX8 tristate "Promise SATA SX8 support" depends on PCI diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 7755a5e..cdaa3f8 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -23,6 +23,7 @@ obj-$(CONFIG_XILINX_SYSACE) += xsysace.o obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_MG_DISK) += mg_disk.o obj-$(CONFIG_SUNVDC) += sunvdc.o +obj-$(CONFIG_BLK_DEV_OSD) += osdblk.o obj-$(CONFIG_BLK_DEV_UMEM) += umem.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c new file mode 100644 index 0000000..531d234 --- /dev/null +++ b/drivers/block/osdblk.c @@ -0,0 +1,663 @@ + +/* + osdblk.c -- Export a single SCSI OSD object as a Linux block device + + + Copyright 2009 Red Hat, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + + + Instructions for use + -------------------- + + 1) Map a Linux block device to an existing OSD object. + + In this example, we will use partition id 1234, object id 5678, + OSD device /dev/osd1. + + $ echo "1234 5678 /dev/osd1" > /sys/class/osdblk/add + + + 2) List all active blkdev<->object mappings. + + In this example, we have performed step #1 twice, creating two blkdevs, + mapped to two separate OSD objects. + + $ cat /sys/class/osdblk/list + 0 174 1234 5678 /dev/osd1 + 1 179 1994 897123 /dev/osd0 + + The columns, in order, are: + - blkdev unique id + - blkdev assigned major + - OSD object partition id + - OSD object id + - OSD device + + + 3) Remove an active blkdev<->object mapping. + + In this example, we remove the mapping with blkdev unique id 1. + + $ echo 1 > /sys/class/osdblk/remove + + + NOTE: The actual creation and deletion of OSD objects is outside the scope + of this driver. + + */ + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <scsi/osd_initiator.h> +#include <scsi/osd_attributes.h> +#include <scsi/osd_sec.h> + +#define DRV_NAME "osdblk" +#define PFX DRV_NAME ": " + +struct osdblk_device; + +enum { + OSDBLK_MINORS_PER_MAJOR = 256, /* max minors per blkdev */ + OSDBLK_MAX_REQ = 32, /* max parallel requests */ + OSDBLK_OP_TIMEOUT = 4 * 60, /* sync OSD req timeout */ +}; + +struct osdblk_request { + struct request *rq; /* blk layer request */ + struct bio *bio; /* cloned bio */ + struct osdblk_device *osdev; /* associated blkdev */ +}; + +struct osdblk_device { + int id; /* blkdev unique id */ + + int major; /* blkdev assigned major */ + struct gendisk *disk; /* blkdev's gendisk and rq */ + struct request_queue *q; + + struct osd_dev *osd; /* associated OSD */ + + char name[32]; /* blkdev name, e.g. osdblk34 */ + + spinlock_t lock; /* queue lock */ + + struct osd_obj_id obj; /* OSD partition, obj id */ + uint8_t obj_cred[OSD_CAP_LEN]; /* OSD cred */ + + struct osdblk_request req[OSDBLK_MAX_REQ]; /* request table */ + + struct list_head node; + + char osd_path[0]; /* OSD device path */ +}; + +static struct class *class_osdblk; /* /sys/class/osdblk */ +static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */ +static LIST_HEAD(osdblkdev_list); + +static struct block_device_operations osdblk_bd_ops = { + .owner = THIS_MODULE, +}; + +static const struct osd_attr g_attr_logical_length = ATTR_DEF( + OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8); + +static void osdblk_make_credential(u8 cred_a[OSD_CAP_LEN], + const struct osd_obj_id *obj) +{ + osd_sec_init_nosec_doall_caps(cred_a, obj, false, true); +} + +/* copied from exofs; move to libosd? */ +/* + * Perform a synchronous OSD operation. copied from exofs; move to libosd? + */ +static int osd_sync_op(struct osd_request *or, int timeout, uint8_t *credential) +{ + int ret; + + or->timeout = timeout; + ret = osd_finalize_request(or, 0, credential, NULL); + if (ret) + return ret; + + ret = osd_execute_request(or); + + /* osd_req_decode_sense(or, ret); */ + return ret; +} + +/* + * Perform an asynchronous OSD operation. copied from exofs; move to libosd? + */ +static int osd_async_op(struct osd_request *or, osd_req_done_fn *async_done, + void *caller_context, u8 *cred) +{ + int ret; + + ret = osd_finalize_request(or, 0, cred, NULL); + if (ret) + return ret; + + ret = osd_execute_request_async(or, async_done, caller_context); + + return ret; +} + +/* copied from exofs; move to libosd? */ +static int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr) +{ + struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */ + void *iter = NULL; + int nelem; + + do { + nelem = 1; + osd_req_decode_get_attr_list(or, &cur_attr, &nelem, &iter); + if ((cur_attr.attr_page == attr->attr_page) && + (cur_attr.attr_id == attr->attr_id)) { + attr->len = cur_attr.len; + attr->val_ptr = cur_attr.val_ptr; + return 0; + } + } while (iter); + + return -EIO; +} + +static int osdblk_get_obj_size(struct osdblk_device *osdev, u64 *size_out) +{ + struct osd_request *or; + struct osd_attr attr; + int ret; + + /* start request */ + or = osd_start_request(osdev->osd, GFP_KERNEL); + if (!or) + return -ENOMEM; + + /* create a get-attributes(length) request */ + osd_req_get_attributes(or, &osdev->obj); + + osd_req_add_get_attr_list(or, &g_attr_logical_length, 1); + + /* execute op synchronously */ + ret = osd_sync_op(or, OSDBLK_OP_TIMEOUT, osdev->obj_cred); + if (ret) + goto out; + + /* extract length from returned attribute info */ + attr = g_attr_logical_length; + ret = extract_attr_from_req(or, &attr); + if (ret) + goto out; + + *size_out = get_unaligned_be64(attr.val_ptr); + +out: + osd_end_request(or); + return ret; + +} + +static void osdblk_osd_complete(struct osd_request *or, void *private) +{ + struct osdblk_request *orq = private; + struct osd_sense_info osi; + int ret = osd_req_decode_sense(or, &osi); + + if (ret) + ret = -EIO; + + /* complete OSD request */ + osd_end_request(or); + + /* complete request passed to osdblk by block layer */ + __blk_end_request_all(orq->rq, ret); +} + +static void bio_chain_put(struct bio *chain) +{ + struct bio *tmp; + + while (chain) { + tmp = chain; + chain = chain->bi_next; + + bio_put(tmp); + } +} + +static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) +{ + struct bio *tmp, *new_chain = NULL, *tail = NULL; + + while (old_chain) { + tmp = bio_kmalloc(gfpmask, old_chain->bi_vcnt); + if (!tmp) + goto err_out; + + __bio_clone(tmp, old_chain); + gfpmask &= ~__GFP_WAIT; + tmp->bi_next = NULL; + if (!new_chain) + new_chain = tail = tmp; + else { + tail->bi_next = tmp; + tail = tmp; + } + + old_chain = old_chain->bi_next; + } + + return new_chain; + +err_out: + bio_chain_put(new_chain); + return NULL; +} + +static void osdblk_rq_fn(struct request_queue *q) +{ + struct osdblk_device *osdev = q->queuedata; + struct request *rq; + struct osdblk_request *orq; + struct osd_request *or; + struct bio *bio; + int do_write, do_flush; + + while (1) { + /* peek at request from block layer */ + rq = blk_fetch_request(q); + if (!rq) + break; + + /* filter out block requests we don't understand */ + if (!blk_fs_request(rq) && !blk_barrier_rq(rq)) { + blk_end_request_all(rq, 0); + continue; + } + + /* deduce our operation (read, write, flush) */ + /* I wish the block layer simplified cmd_type/cmd_flags/cmd[] + * into a clearly defined set of RPC commands: + * read, write, flush, scsi command, power mgmt req, + * driver-specific, etc. + */ + + do_flush = (rq->special == (void *) 0xdeadbeefUL); + do_write = (rq_data_dir(rq) == WRITE); + + if (!do_flush) { /* osd_flush does not use a bio */ + /* a bio clone to be passed down to OSD request */ + bio = bio_chain_clone(rq->bio, GFP_ATOMIC); + if (!bio) + break; + } else + bio = NULL; + + /* alloc internal OSD request, for OSD command execution */ + or = osd_start_request(osdev->osd, GFP_ATOMIC); + if (!or) { + bio_chain_put(bio); + break; + } + + orq = &osdev->req[rq->tag]; + orq->rq = rq; + orq->bio = bio; + orq->osdev = osdev; + + /* init OSD command: flush, write or read */ + if (do_flush) + osd_req_flush_object(or, &osdev->obj, + OSD_CDB_FLUSH_ALL, 0, 0); + else if (do_write) + osd_req_write(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, + bio, blk_rq_bytes(rq)); + else + osd_req_read(or, &osdev->obj, blk_rq_pos(rq) * 512ULL, + bio, blk_rq_bytes(rq)); + + /* begin OSD command execution */ + if (osd_async_op(or, osdblk_osd_complete, orq, + osdev->obj_cred)) { + osd_end_request(or); + blk_requeue_request(q, rq); + bio_chain_put(bio); + } + + /* remove the special 'flush' marker, now that the command + * is executing + */ + rq->special = NULL; + } +} + +static void osdblk_prepare_flush(struct request_queue *q, struct request *rq) +{ + /* add driver-specific marker, to indicate that this request + * is a flush command + */ + rq->special = (void *) 0xdeadbeefUL; +} + +static void osdblk_free_disk(struct osdblk_device *osdev) +{ + struct gendisk *disk = osdev->disk; + + if (!disk) + return; + + if (disk->flags & GENHD_FL_UP) + del_gendisk(disk); + if (disk->queue) + blk_cleanup_queue(disk->queue); + put_disk(disk); +} + +static int osdblk_init_disk(struct osdblk_device *osdev) +{ + struct gendisk *disk; + struct request_queue *q; + int rc; + u64 obj_size = 0; + + /* contact OSD, request size info about the object being mapped */ + rc = osdblk_get_obj_size(osdev, &obj_size); + if (rc) + return rc; + + /* create gendisk info */ + disk = alloc_disk(OSDBLK_MINORS_PER_MAJOR); + if (!disk) + return -ENOMEM; + + sprintf(disk->disk_name, DRV_NAME "/%d", osdev->id); + disk->major = osdev->major; + disk->first_minor = 0; + disk->fops = &osdblk_bd_ops; + disk->private_data = osdev; + + /* init rq */ + q = blk_init_queue(osdblk_rq_fn, &osdev->lock); + if (!q) { + put_disk(disk); + return -ENOMEM; + } + + /* switch queue to TCQ mode; allocate tag map */ + rc = blk_queue_init_tags(q, OSDBLK_MAX_REQ, NULL); + if (rc) { + blk_cleanup_queue(q); + put_disk(disk); + return rc; + } + + blk_queue_prep_rq(q, blk_queue_start_tag); + blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush); + + disk->queue = q; + + q->queuedata = osdev; + + osdev->disk = disk; + osdev->q = q; + + /* finally, announce the disk to the world */ + set_capacity(disk, obj_size); + add_disk(disk); + + return 0; +} + +/******************************************************************** + /sys/class/osdblk/ + add map OSD object to blkdev + remove unmap OSD object + list show mappings + *******************************************************************/ + +static void class_osdblk_release(struct class *cls) +{ + kfree(cls); +} + +static ssize_t class_osdblk_list(struct class *c, char *data) +{ + int n = 0; + struct list_head *tmp; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + list_for_each(tmp, &osdblkdev_list) { + struct osdblk_device *osdev; + + osdev = list_entry(tmp, struct osdblk_device, node); + + n += sprintf(data+n, "%d %d %llu %llu %s\n", + osdev->id, + osdev->major, + osdev->obj.partition, + osdev->obj.id, + osdev->osd_path); + } + + mutex_unlock(&ctl_mutex); + return n; +} + +static ssize_t class_osdblk_add(struct class *c, const char *buf, size_t count) +{ + struct osdblk_device *osdev; + ssize_t rc; + int irc, new_id = 0; + struct list_head *tmp; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + /* new osdblk_device object */ + osdev = kzalloc(sizeof(*osdev) + strlen(buf) + 1, GFP_KERNEL); + if (!osdev) { + rc = -ENOMEM; + goto err_out_mod; + } + + /* static osdblk_device initialization */ + spin_lock_init(&osdev->lock); + INIT_LIST_HEAD(&osdev->node); + + /* generate unique id: find highest unique id, add one */ + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + list_for_each(tmp, &osdblkdev_list) { + struct osdblk_device *osdev; + + osdev = list_entry(tmp, struct osdblk_device, node); + if (osdev->id > new_id) + new_id = osdev->id + 1; + } + + osdev->id = new_id; + + /* add to global list */ + list_add_tail(&osdev->node, &osdblkdev_list); + + mutex_unlock(&ctl_mutex); + + /* parse add command */ + if (sscanf(buf, "%llu %llu %s", &osdev->obj.partition, &osdev->obj.id, + osdev->osd_path) != 3) { + rc = -EINVAL; + goto err_out_slot; + } + + /* initialize rest of new object */ + sprintf(osdev->name, DRV_NAME "%d", osdev->id); + + /* contact requested OSD */ + osdev->osd = osduld_path_lookup(osdev->osd_path); + if (IS_ERR(osdev->osd)) { + rc = PTR_ERR(osdev->osd); + goto err_out_slot; + } + + /* build OSD credential */ + osdblk_make_credential(osdev->obj_cred, &osdev->obj); + + /* register our block device */ + irc = register_blkdev(0, osdev->name); + if (irc < 0) { + rc = irc; + goto err_out_osd; + } + + osdev->major = irc; + + /* set up and announce blkdev mapping */ + rc = osdblk_init_disk(osdev); + if (rc) + goto err_out_blkdev; + + return 0; + +err_out_blkdev: + unregister_blkdev(osdev->major, osdev->name); +err_out_osd: + osduld_put_device(osdev->osd); +err_out_slot: + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + list_del_init(&osdev->node); + mutex_unlock(&ctl_mutex); + + kfree(osdev); +err_out_mod: + module_put(THIS_MODULE); + return rc; +} + +static ssize_t class_osdblk_remove(struct class *c, const char *buf, + size_t count) +{ + struct osdblk_device *osdev = NULL; + int target_id, rc; + unsigned long ul; + struct list_head *tmp; + + rc = strict_strtoul(buf, 10, &ul); + if (rc) + return rc; + + /* convert to int; abort if we lost anything in the conversion */ + target_id = (int) ul; + if (target_id != ul) + return -EINVAL; + + /* remove object from list immediately */ + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + list_for_each(tmp, &osdblkdev_list) { + osdev = list_entry(tmp, struct osdblk_device, node); + if (osdev->id == target_id) { + list_del_init(&osdev->node); + break; + } + osdev = NULL; + } + + mutex_unlock(&ctl_mutex); + + if (!osdev) + return -ENOENT; + + /* clean up and free blkdev and associated OSD connection */ + osdblk_free_disk(osdev); + unregister_blkdev(osdev->major, osdev->name); + osduld_put_device(osdev->osd); + kfree(osdev); + + /* release module ref */ + module_put(THIS_MODULE); + + return 0; +} + +static struct class_attribute class_osdblk_attrs[] = { + __ATTR(add, 0200, NULL, class_osdblk_add), + __ATTR(remove, 0200, NULL, class_osdblk_remove), + __ATTR(list, 0444, class_osdblk_list, NULL), + __ATTR_NULL +}; + +static int osdblk_sysfs_init(void) +{ + int ret = 0; + + /* + * create control files in sysfs + * /sys/class/osdblk/... + */ + class_osdblk = kzalloc(sizeof(*class_osdblk), GFP_KERNEL); + if (!class_osdblk) + return -ENOMEM; + + class_osdblk->name = DRV_NAME; + class_osdblk->owner = THIS_MODULE; + class_osdblk->class_release = class_osdblk_release; + class_osdblk->class_attrs = class_osdblk_attrs; + + ret = class_register(class_osdblk); + if (ret) { + kfree(class_osdblk); + class_osdblk = NULL; + printk(PFX "failed to create class osdblk\n"); + return ret; + } + + return 0; +} + +static void osdblk_sysfs_cleanup(void) +{ + if (class_osdblk) + class_destroy(class_osdblk); + class_osdblk = NULL; +} + +static int __init osdblk_init(void) +{ + int rc; + + rc = osdblk_sysfs_init(); + if (rc) + return rc; + + return 0; +} + +static void __exit osdblk_exit(void) +{ + osdblk_sysfs_cleanup(); +} + +module_init(osdblk_init); +module_exit(osdblk_exit); + -- 1.6.2.1 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* [PATCH 2/2] osdblk: Adjust queue limits to lower device's limits 2009-05-21 14:06 [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects Boaz Harrosh 2009-05-21 14:09 ` [PATCH 1/2 " Boaz Harrosh @ 2009-05-21 14:11 ` Boaz Harrosh 2009-05-21 22:41 ` [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects Jeff Garzik 2 siblings, 0 replies; 9+ messages in thread From: Boaz Harrosh @ 2009-05-21 14:11 UTC (permalink / raw) To: Jeff Garzik, linux-scsi, open-osd mailing-list; +Cc: Jens Axboe call blk_queue_stack_limits() to copy queue limits from the underline osd scsi_device. This is absolutely needed because osdblk cannot sleep when allocating a lower-request and therefore cannot be bouncing. TODO: Dynamic changes of limits to the lower device queue will not reflect in the upper driver Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> --- drivers/block/osdblk.c | 7 +++++++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 531d234..0da8ce0 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c @@ -66,6 +66,7 @@ #include <scsi/osd_initiator.h> #include <scsi/osd_attributes.h> #include <scsi/osd_sec.h> +#include <scsi/scsi_device.h> #define DRV_NAME "osdblk" #define PFX DRV_NAME ": " @@ -410,6 +411,12 @@ static int osdblk_init_disk(struct osdblk_device *osdev) return rc; } + /* Set our limits to the lower device limits, because osdblk cannot + * sleep when allocating a lower-request and therefore cannot be + * bouncing. + */ + blk_queue_stack_limits(q, osd_request_queue(osdev->osd)); + blk_queue_prep_rq(q, blk_queue_start_tag); blk_queue_ordered(q, QUEUE_ORDERED_DRAIN_FLUSH, osdblk_prepare_flush); -- 1.6.2.1 ^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects 2009-05-21 14:06 [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects Boaz Harrosh 2009-05-21 14:09 ` [PATCH 1/2 " Boaz Harrosh 2009-05-21 14:11 ` [PATCH 2/2] osdblk: Adjust queue limits to lower device's limits Boaz Harrosh @ 2009-05-21 22:41 ` Jeff Garzik 2009-05-26 7:43 ` Boaz Harrosh 2 siblings, 1 reply; 9+ messages in thread From: Jeff Garzik @ 2009-05-21 22:41 UTC (permalink / raw) To: Boaz Harrosh; +Cc: linux-scsi, open-osd mailing-list, Jens Axboe Boaz Harrosh wrote: > I'm posting for review a new version of the osdblk driver. What's new? > > * Once block/for-2.6.31 and all pending osd patches hit mainline. this new version > is ready for submission. > - The relevant osd patches have been posted on the mailing list, but I'll send an orderly > set for scsi-misc and scsi-post-merge on Sunday. > - All the prerequisite block patches are already in Jens's tree. > > * Below is the diff from Jeff's last version of the patch. these things have changed: > {SQUASHME: osdblk} Block and OSD Api fixups and bug fixes > > - Block API changes from Tejuns revamps > - OSD Api changes for supporting bio-chaining > - do_flush requests do not need bio clonning > (And might not have any so prevent such a crash) > - osdblk_make_credential is here to stay > - Use bio_kmalloc and avoid the bio_alloc dead/live locks. > TODO: Split request into smaller chunks if allocations fail. > - Only use __GFP_WAIT on first bio allocation. (Not relevant since > __GFP_WAIT is not used) > > * Added an extra patch: > - [PATCH 2/2] osdblk: Adjust queue limits to lower device's limits > > This is ontop of the post-merge tree. Jeff? will you push this driver > through your tree? > > What is left is to bang some serious testing on this driver. I'll do > that next. The changes look reasonable to me... if you wanted to get it into your tree and push it with other OSD stuff, that would be fine to me. I think you are in a better position to deal with all the pre-req's, and in a better position to test osdblk more completely. Have you messed around with the user tools yet? osdblk needs a tool that creates an OSD object of a specified size, etc. Jeff ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects 2009-05-21 22:41 ` [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects Jeff Garzik @ 2009-05-26 7:43 ` Boaz Harrosh 2009-05-28 22:31 ` Jeff Garzik 0 siblings, 1 reply; 9+ messages in thread From: Boaz Harrosh @ 2009-05-26 7:43 UTC (permalink / raw) To: Jeff Garzik; +Cc: linux-scsi, open-osd mailing-list, Jens Axboe On 05/22/2009 01:41 AM, Jeff Garzik wrote: > Boaz Harrosh wrote: >> I'm posting for review a new version of the osdblk driver. What's new? >> >> * Once block/for-2.6.31 and all pending osd patches hit mainline. this new version >> is ready for submission. >> - The relevant osd patches have been posted on the mailing list, but I'll send an orderly >> set for scsi-misc and scsi-post-merge on Sunday. >> - All the prerequisite block patches are already in Jens's tree. >> >> * Below is the diff from Jeff's last version of the patch. these things have changed: >> {SQUASHME: osdblk} Block and OSD Api fixups and bug fixes >> >> - Block API changes from Tejuns revamps >> - OSD Api changes for supporting bio-chaining >> - do_flush requests do not need bio clonning >> (And might not have any so prevent such a crash) >> - osdblk_make_credential is here to stay >> - Use bio_kmalloc and avoid the bio_alloc dead/live locks. >> TODO: Split request into smaller chunks if allocations fail. >> - Only use __GFP_WAIT on first bio allocation. (Not relevant since >> __GFP_WAIT is not used) >> >> * Added an extra patch: >> - [PATCH 2/2] osdblk: Adjust queue limits to lower device's limits >> >> This is ontop of the post-merge tree. Jeff? will you push this driver >> through your tree? >> >> What is left is to bang some serious testing on this driver. I'll do >> that next. > > The changes look reasonable to me... if you wanted to get it into your > tree and push it with other OSD stuff, that would be fine to me. > > I think you are in a better position to deal with all the pre-req's, and > in a better position to test osdblk more completely. > > Have you messed around with the user tools yet? osdblk needs a tool > that creates an OSD object of a specified size, etc. > > Jeff > Thanks Jeff. So is this a: Signed-off-by: Jeff Garzik <jgarzik@redhat.com> But please send me a commit log, or should I scribble one? About the user-mode tool: Sorry I'm so very busy with the pNFS layout driver and export that I do not have time for it right now. For testing I just use exofs, create a file and dd to some offset to make it of some size. Very very stupid I know, but easy. (The obj-id I can guess as I know the code) Tell me if you absolutely need a fast hack that just takes the ids and size and creates one object, for now. Boaz ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects 2009-05-26 7:43 ` Boaz Harrosh @ 2009-05-28 22:31 ` Jeff Garzik 2009-05-31 9:50 ` Boaz Harrosh 0 siblings, 1 reply; 9+ messages in thread From: Jeff Garzik @ 2009-05-28 22:31 UTC (permalink / raw) To: Boaz Harrosh; +Cc: linux-scsi, open-osd mailing-list, Jens Axboe Boaz Harrosh wrote: > On 05/22/2009 01:41 AM, Jeff Garzik wrote: >> Boaz Harrosh wrote: >>> I'm posting for review a new version of the osdblk driver. What's new? >>> >>> * Once block/for-2.6.31 and all pending osd patches hit mainline. this new version >>> is ready for submission. >>> - The relevant osd patches have been posted on the mailing list, but I'll send an orderly >>> set for scsi-misc and scsi-post-merge on Sunday. >>> - All the prerequisite block patches are already in Jens's tree. >>> >>> * Below is the diff from Jeff's last version of the patch. these things have changed: >>> {SQUASHME: osdblk} Block and OSD Api fixups and bug fixes >>> >>> - Block API changes from Tejuns revamps >>> - OSD Api changes for supporting bio-chaining >>> - do_flush requests do not need bio clonning >>> (And might not have any so prevent such a crash) >>> - osdblk_make_credential is here to stay >>> - Use bio_kmalloc and avoid the bio_alloc dead/live locks. >>> TODO: Split request into smaller chunks if allocations fail. >>> - Only use __GFP_WAIT on first bio allocation. (Not relevant since >>> __GFP_WAIT is not used) >>> >>> * Added an extra patch: >>> - [PATCH 2/2] osdblk: Adjust queue limits to lower device's limits >>> >>> This is ontop of the post-merge tree. Jeff? will you push this driver >>> through your tree? >>> >>> What is left is to bang some serious testing on this driver. I'll do >>> that next. >> The changes look reasonable to me... if you wanted to get it into your >> tree and push it with other OSD stuff, that would be fine to me. >> >> I think you are in a better position to deal with all the pre-req's, and >> in a better position to test osdblk more completely. >> >> Have you messed around with the user tools yet? osdblk needs a tool >> that creates an OSD object of a specified size, etc. >> >> Jeff >> > > Thanks Jeff. > > So is this a: > Signed-off-by: Jeff Garzik <jgarzik@redhat.com> Yes, if it works in your testing. Please make sure the commit is "From: Jeff Garzik" as well, thanks. > But please send me a commit log, or should I scribble one? Just a one-line "add osdblk, block driver for OSD objects" should be fine. > About the user-mode tool: Sorry I'm so very busy with the pNFS > layout driver and export that I do not have time for it right now. > > For testing I just use exofs, create a file and dd to some offset to > make it of some size. Very very stupid I know, but easy. (The obj-id > I can guess as I know the code) > > Tell me if you absolutely need a fast hack that just takes the ids > and size and creates one object, for now. Some simple tool like that is needed; a user shouldn't have to know exofs just to be able to use osdblk :) If nothing else appears, I'll whip something up before 2.6.31, but I was of course hoping to talk you into it, since you could do it faster and better than me :) Jeff ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects 2009-05-28 22:31 ` Jeff Garzik @ 2009-05-31 9:50 ` Boaz Harrosh 2009-06-10 12:52 ` Jeff Garzik 0 siblings, 1 reply; 9+ messages in thread From: Boaz Harrosh @ 2009-05-31 9:50 UTC (permalink / raw) To: Jeff Garzik; +Cc: linux-scsi, open-osd mailing-list, Jens Axboe On 05/29/2009 01:31 AM, Jeff Garzik wrote: > Boaz Harrosh wrote: >> On 05/22/2009 01:41 AM, Jeff Garzik wrote: >>> Boaz Harrosh wrote: >>>> I'm posting for review a new version of the osdblk driver. What's new? >>>> >>>> * Once block/for-2.6.31 and all pending osd patches hit mainline. this new version >>>> is ready for submission. >>>> - The relevant osd patches have been posted on the mailing list, but I'll send an orderly >>>> set for scsi-misc and scsi-post-merge on Sunday. >>>> - All the prerequisite block patches are already in Jens's tree. >>>> >>>> * Below is the diff from Jeff's last version of the patch. these things have changed: >>>> {SQUASHME: osdblk} Block and OSD Api fixups and bug fixes >>>> >>>> - Block API changes from Tejuns revamps >>>> - OSD Api changes for supporting bio-chaining >>>> - do_flush requests do not need bio clonning >>>> (And might not have any so prevent such a crash) >>>> - osdblk_make_credential is here to stay >>>> - Use bio_kmalloc and avoid the bio_alloc dead/live locks. >>>> TODO: Split request into smaller chunks if allocations fail. >>>> - Only use __GFP_WAIT on first bio allocation. (Not relevant since >>>> __GFP_WAIT is not used) >>>> >>>> * Added an extra patch: >>>> - [PATCH 2/2] osdblk: Adjust queue limits to lower device's limits >>>> >>>> This is ontop of the post-merge tree. Jeff? will you push this driver >>>> through your tree? >>>> >>>> What is left is to bang some serious testing on this driver. I'll do >>>> that next. >>> The changes look reasonable to me... if you wanted to get it into your >>> tree and push it with other OSD stuff, that would be fine to me. >>> >>> I think you are in a better position to deal with all the pre-req's, and >>> in a better position to test osdblk more completely. >>> >>> Have you messed around with the user tools yet? osdblk needs a tool >>> that creates an OSD object of a specified size, etc. >>> >>> Jeff >>> >> Thanks Jeff. >> >> So is this a: >> Signed-off-by: Jeff Garzik <jgarzik@redhat.com> > > Yes, if it works in your testing. Please make sure the commit is "From: > Jeff Garzik" as well, thanks. > > >> But please send me a commit log, or should I scribble one? > > Just a one-line "add osdblk, block driver for OSD objects" should be fine. > > >> About the user-mode tool: Sorry I'm so very busy with the pNFS >> layout driver and export that I do not have time for it right now. >> >> For testing I just use exofs, create a file and dd to some offset to >> make it of some size. Very very stupid I know, but easy. (The obj-id >> I can guess as I know the code) >> >> Tell me if you absolutely need a fast hack that just takes the ids >> and size and creates one object, for now. > > Some simple tool like that is needed; a user shouldn't have to know > exofs just to be able to use osdblk :) > > If nothing else appears, I'll whip something up before 2.6.31, but I was > of course hoping to talk you into it, since you could do it faster and > better than me :) > > Jeff > OK I will release something Quick-And-Dirty end of this week. It should do those three tasks osdblk needs: Create Resize Delete object@partition@device Boaz ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects 2009-05-31 9:50 ` Boaz Harrosh @ 2009-06-10 12:52 ` Jeff Garzik 2009-06-10 13:33 ` Boaz Harrosh 0 siblings, 1 reply; 9+ messages in thread From: Jeff Garzik @ 2009-06-10 12:52 UTC (permalink / raw) To: Boaz Harrosh; +Cc: linux-scsi, open-osd mailing-list, Jens Axboe Boaz Harrosh wrote: > OK I will release something Quick-And-Dirty end of this week. > It should do those three tasks osdblk needs: > Create Resize Delete object@partition@device That works, thanks! Honestly, only create and delete are really _needed_ IMO. Jeff ^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects 2009-06-10 12:52 ` Jeff Garzik @ 2009-06-10 13:33 ` Boaz Harrosh 0 siblings, 0 replies; 9+ messages in thread From: Boaz Harrosh @ 2009-06-10 13:33 UTC (permalink / raw) To: Jeff Garzik; +Cc: linux-scsi, open-osd mailing-list, Jens Axboe On 06/10/2009 03:52 PM, Jeff Garzik wrote: > Boaz Harrosh wrote: >> OK I will release something Quick-And-Dirty end of this week. >> It should do those three tasks osdblk needs: >> Create Resize Delete object@partition@device > > That works, thanks! Honestly, only create and delete are really > _needed_ IMO. > > Jeff > > I'll be on this ASAP. I have a working draft just needs a cleanup and submission, which is queued behind all these other pressing things. Boaz ^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2009-06-10 13:33 UTC | newest] Thread overview: 9+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-05-21 14:06 [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects Boaz Harrosh 2009-05-21 14:09 ` [PATCH 1/2 " Boaz Harrosh 2009-05-21 14:11 ` [PATCH 2/2] osdblk: Adjust queue limits to lower device's limits Boaz Harrosh 2009-05-21 22:41 ` [PATCH 0/2 version 4] osdblk: a Linux block device for OSD objects Jeff Garzik 2009-05-26 7:43 ` Boaz Harrosh 2009-05-28 22:31 ` Jeff Garzik 2009-05-31 9:50 ` Boaz Harrosh 2009-06-10 12:52 ` Jeff Garzik 2009-06-10 13:33 ` Boaz Harrosh
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).