* [RFC] Add user scatter gather I/O to SG_IO
@ 2005-06-10 23:59 James Bottomley
2005-06-12 22:19 ` Mike Christie
2005-06-13 6:55 ` Jens Axboe
0 siblings, 2 replies; 4+ messages in thread
From: James Bottomley @ 2005-06-10 23:59 UTC (permalink / raw)
To: Mike Christie, Jens Axboe; +Cc: SCSI Mailing List
This should be the last in the series of patches implementing
scsi_wait_req and scsi_do_req in the block layer. It adds
scatter/gather I/O from the user as a new feature of SG_IO. It also
exports the API for this which can now be used by the sg driver.
The patch is lightly tested, but appears to work fine.
James
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -2152,6 +2152,61 @@ struct request *blk_rq_map_user(request_
EXPORT_SYMBOL(blk_rq_map_user);
/**
+ * blk_rq_map_user - map user data to a request, for REQ_BLOCK_PC usage
+ * @q: request queue where request should be inserted
+ * @rw: READ or WRITE data
+ * @iov: pointer to the iovec
+ * @iov_count: number of elements in the iovec
+ *
+ * Description:
+ * Data will be mapped directly for zero copy io, if possible. Otherwise
+ * a kernel bounce buffer is used.
+ *
+ * A matching blk_rq_unmap_user() must be issued at the end of io, while
+ * still in process context.
+ *
+ * Note: The mapped bio may need to be bounced through blk_queue_bounce()
+ * before being submitted to the device, as pages mapped may be out of
+ * reach. It's the callers responsibility to make sure this happens. The
+ * original bio must be passed back in to blk_rq_unmap_user() for proper
+ * unmapping.
+ */
+struct request *blk_rq_map_user_iov(request_queue_t *q, int rw,
+ struct sg_iovec *iov, int iov_count)
+{
+ struct request *rq;
+ struct bio *bio;
+
+ if (!iov || iov_count <= 0)
+ return ERR_PTR(-EINVAL);
+
+ rq = blk_get_request(q, rw, __GFP_WAIT);
+ if (!rq)
+ return ERR_PTR(-ENOMEM);
+
+ /* we don't allow misaligned data like bio_map_user() does. If the
+ * user is using sg, they're expected to know the alignment constraints
+ * and respect them accordingly */
+ bio = bio_map_user_iov(q, NULL, iov, iov_count, rw == READ);
+
+ if (!IS_ERR(bio)) {
+ rq->bio = rq->biotail = bio;
+ blk_rq_bio_prep(q, rq, bio);
+
+ rq->buffer = rq->data = NULL;
+ rq->data_len = bio->bi_size;
+ return rq;
+ }
+
+ /*
+ * bio is the err-ptr
+ */
+ blk_put_request(rq);
+ return (struct request *) bio;
+}
+EXPORT_SYMBOL(blk_rq_map_user_iov);
+
+/**
* blk_rq_unmap_user - unmap a request with user data
* @rq: request to be unmapped
* @bio: bio for the request
@@ -2231,6 +2286,21 @@ struct request *blk_rq_map_kern(request_
EXPORT_SYMBOL(blk_rq_map_kern);
+void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
+ struct request *rq, int at_head,
+ void (*done)(struct request *))
+{
+ int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
+
+ rq->rq_disk = bd_disk;
+
+
+ rq->flags |= REQ_NOMERGE;
+ rq->end_io = done;
+ elv_add_request(q, rq, where, 1);
+ generic_unplug_device(q);
+}
+
/**
* blk_execute_rq - insert a request into queue for execution
* @q: queue to insert the request in
@@ -2245,13 +2315,10 @@ EXPORT_SYMBOL(blk_rq_map_kern);
int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk,
struct request *rq, int at_head)
{
- int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
DECLARE_COMPLETION(wait);
char sense[SCSI_SENSE_BUFFERSIZE];
int err = 0;
- rq->rq_disk = bd_disk;
-
/*
* we need an extra reference to the request, so we can look at
* it after io completion
@@ -2264,11 +2331,8 @@ int blk_execute_rq(request_queue_t *q, s
rq->sense_len = 0;
}
- rq->flags |= REQ_NOMERGE;
rq->waiting = &wait;
- rq->end_io = blk_end_sync_rq;
- elv_add_request(q, rq, where, 1);
- generic_unplug_device(q);
+ blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
wait_for_completion(&wait);
rq->waiting = NULL;
diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c
--- a/drivers/block/scsi_ioctl.c
+++ b/drivers/block/scsi_ioctl.c
@@ -231,9 +231,6 @@ static int sg_io(struct file *file, requ
if (verify_command(file, cmd))
return -EPERM;
- /*
- * we'll do that later
- */
if (hdr->iovec_count)
return -EOPNOTSUPP;
@@ -241,7 +238,7 @@ static int sg_io(struct file *file, requ
return -EIO;
reading = writing = 0;
- if (hdr->dxfer_len) {
+ if (hdr->dxfer_len)
switch (hdr->dxfer_direction) {
default:
return -EINVAL;
@@ -256,14 +253,29 @@ static int sg_io(struct file *file, requ
break;
}
+ if (hdr->iovec_count) {
+ const int size = sizeof(struct sg_iovec) * hdr->iovec_count;
+ struct sg_iovec *iov = kmalloc(size, GFP_KERNEL);
+ if (!iov)
+ return -ENOMEM;
+
+ if (copy_from_user(iov, hdr->dxferp, size)) {
+ kfree(iov);
+ return -EFAULT;
+ }
+ rq = blk_rq_map_user_iov(q, writing ? WRITE : READ, iov,
+ hdr->iovec_count);
+ kfree(iov);
+ } else if (hdr->dxfer_len) {
rq = blk_rq_map_user(q, writing ? WRITE : READ, hdr->dxferp,
hdr->dxfer_len);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
} else
rq = blk_get_request(q, READ, __GFP_WAIT);
+ if (IS_ERR(rq))
+ return PTR_ERR(rq);
+
/*
* fill in request structure
*/
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1230,6 +1230,9 @@ static void scsi_eh_offline_sdevs(struct
scmd->device->channel,
scmd->device->id,
scmd->device->lun);
+ /* try to reset the bus and the card to a sane state */
+ scsi_try_bus_reset(scmd);
+ scsi_try_host_reset(scmd);
scsi_device_set_state(scmd->device, SDEV_OFFLINE);
if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD)) {
/*
diff --git a/fs/bio.c b/fs/bio.c
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -25,6 +25,7 @@
#include <linux/module.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
+#include <scsi/sg.h> /* for struct sg_iovec */
#define BIO_POOL_SIZE 256
@@ -549,22 +550,34 @@ out_bmd:
return ERR_PTR(ret);
}
-static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev,
- unsigned long uaddr, unsigned int len,
- int write_to_vm)
+static struct bio *__bio_map_user_iov(request_queue_t *q,
+ struct block_device *bdev,
+ struct sg_iovec *iov, int iov_count,
+ int write_to_vm)
{
- unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- unsigned long start = uaddr >> PAGE_SHIFT;
- const int nr_pages = end - start;
- int ret, offset, i;
+ int i, j;
+ int nr_pages = 0;
struct page **pages;
struct bio *bio;
+ int cur_page = 0;
+ int ret, offset;
- /*
- * transfer and buffer must be aligned to at least hardsector
- * size for now, in the future we can relax this restriction
- */
- if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
+ for (i = 0; i < iov_count; i++) {
+ unsigned long uaddr = (unsigned long)iov[i].iov_base;
+ unsigned long len = iov[i].iov_len;
+ unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = uaddr >> PAGE_SHIFT;
+
+ nr_pages += end - start;
+ /*
+ * transfer and buffer must be aligned to at least hardsector
+ * size for now, in the future we can relax this restriction
+ */
+ if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q)))
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!nr_pages)
return ERR_PTR(-EINVAL);
bio = bio_alloc(GFP_KERNEL, nr_pages);
@@ -576,42 +589,54 @@ static struct bio *__bio_map_user(reques
if (!pages)
goto out;
- down_read(¤t->mm->mmap_sem);
- ret = get_user_pages(current, current->mm, uaddr, nr_pages,
- write_to_vm, 0, pages, NULL);
- up_read(¤t->mm->mmap_sem);
+ memset(pages, 0, nr_pages * sizeof(struct page *));
- if (ret < nr_pages)
- goto out;
-
- bio->bi_bdev = bdev;
+ for (i = 0; i < iov_count; i++) {
+ unsigned long uaddr = (unsigned long)iov[i].iov_base;
+ unsigned long len = iov[i].iov_len;
+ unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long start = uaddr >> PAGE_SHIFT;
+ const int local_nr_pages = end - start;
+ const int page_limit = cur_page + local_nr_pages;
+
+ down_read(¤t->mm->mmap_sem);
+ ret = get_user_pages(current, current->mm, uaddr,
+ local_nr_pages,
+ write_to_vm, 0, &pages[cur_page], NULL);
+ up_read(¤t->mm->mmap_sem);
+
+ if (ret < local_nr_pages)
+ goto out_unmap;
+
+
+ offset = uaddr & ~PAGE_MASK;
+ for (j = cur_page; j < page_limit; j++) {
+ unsigned int bytes = PAGE_SIZE - offset;
+
+ if (len <= 0)
+ break;
+
+ if (bytes > len)
+ bytes = len;
+
+ /*
+ * sorry...
+ */
+ if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes)
+ break;
- offset = uaddr & ~PAGE_MASK;
- for (i = 0; i < nr_pages; i++) {
- unsigned int bytes = PAGE_SIZE - offset;
-
- if (len <= 0)
- break;
-
- if (bytes > len)
- bytes = len;
+ len -= bytes;
+ offset = 0;
+ }
+ cur_page = j;
/*
- * sorry...
+ * release the pages we didn't map into the bio, if any
*/
- if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes)
- break;
-
- len -= bytes;
- offset = 0;
+ while (j < page_limit)
+ page_cache_release(pages[j++]);
}
- /*
- * release the pages we didn't map into the bio, if any
- */
- while (i < nr_pages)
- page_cache_release(pages[i++]);
-
kfree(pages);
/*
@@ -620,9 +645,17 @@ static struct bio *__bio_map_user(reques
if (!write_to_vm)
bio->bi_rw |= (1 << BIO_RW);
+ bio->bi_bdev = bdev;
bio->bi_flags |= (1 << BIO_USER_MAPPED);
return bio;
-out:
+
+ out_unmap:
+ for (i = 0; i < nr_pages; i++) {
+ if(!pages[i])
+ break;
+ page_cache_release(pages[i]);
+ }
+ out:
kfree(pages);
bio_put(bio);
return ERR_PTR(ret);
@@ -642,9 +675,33 @@ out:
struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev,
unsigned long uaddr, unsigned int len, int write_to_vm)
{
+ struct sg_iovec iov;
+
+ iov.iov_base = (__user void *)uaddr;
+ iov.iov_len = len;
+
+ return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm);
+}
+
+/**
+ * bio_map_user_iov - map user sg_iovec table into bio
+ * @q: the request_queue_t for the bio
+ * @bdev: destination block device
+ * @iov: the iovec.
+ * @iov_count: number of elements in the iovec
+ * @write_to_vm: bool indicating writing to pages or not
+ *
+ * Map the user space address into a bio suitable for io to a block
+ * device. Returns an error pointer in case of error.
+ */
+struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev,
+ struct sg_iovec *iov, int iov_count,
+ int write_to_vm)
+{
struct bio *bio;
+ int len = 0, i;
- bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm);
+ bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm);
if (IS_ERR(bio))
return bio;
@@ -657,6 +714,9 @@ struct bio *bio_map_user(request_queue_t
*/
bio_get(bio);
+ for (i = 0; i < iov_count; i++)
+ len += iov[i].iov_len;
+
if (bio->bi_size == len)
return bio;
diff --git a/include/linux/bio.h b/include/linux/bio.h
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -281,6 +281,10 @@ extern int bio_add_page(struct bio *, st
extern int bio_get_nr_vecs(struct block_device *);
extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
unsigned long, unsigned int, int);
+struct sg_iovec;
+extern struct bio *bio_map_user_iov(struct request_queue *,
+ struct block_device *,
+ struct sg_iovec *, int, int);
extern void bio_unmap_user(struct bio *);
extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int,
unsigned int);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -559,6 +559,8 @@ extern void __blk_stop_queue(request_que
extern void blk_run_queue(request_queue_t *);
extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *);
extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int);
+struct sg_iovec;
+extern struct request *blk_rq_map_user_iov(request_queue_t *, int, struct sg_iovec *, int);
extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int);
extern struct request *blk_rq_map_kern(request_queue_t *, int, void *,
unsigned int, unsigned int);
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC] Add user scatter gather I/O to SG_IO
2005-06-10 23:59 [RFC] Add user scatter gather I/O to SG_IO James Bottomley
@ 2005-06-12 22:19 ` Mike Christie
2005-06-13 6:55 ` Jens Axboe
1 sibling, 0 replies; 4+ messages in thread
From: Mike Christie @ 2005-06-12 22:19 UTC (permalink / raw)
To: James Bottomley; +Cc: Jens Axboe, SCSI Mailing List
On Fri, 2005-06-10 at 16:59, James Bottomley wrote:
> This should be the last in the series of patches implementing
> scsi_wait_req and scsi_do_req in the block layer. It adds
> scatter/gather I/O from the user as a new feature of SG_IO. It also
> exports the API for this which can now be used by the sg driver.
>
> The patch is lightly tested, but appears to work fine.
>
> James
>
>
> +void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
> + struct request *rq, int at_head,
> + void (*done)(struct request *))
> +{
> + int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
> +
> + rq->rq_disk = bd_disk;
> +
> +
> + rq->flags |= REQ_NOMERGE;
> + rq->end_io = done;
> + elv_add_request(q, rq, where, 1);
> + generic_unplug_device(q);
> +}
> +
This is useful for device mapper hw_hanlders. Can we get it and
__blk_put_request exported? See below.
export blk_execute_rq_nowait and __blk_put_request so device mapper
hw_handlers can use them.
diff -aurp cogito/drivers/block/ll_rw_blk.c linux-block-cogito/drivers/block/ll_rw_blk.c
--- cogito/drivers/block/ll_rw_blk.c 2005-06-11 18:48:25.000000000 -0700
+++ linux-block-cogito/drivers/block/ll_rw_blk.c 2005-06-12 14:27:34.000000000 -0700
@@ -2301,6 +2301,8 @@ void blk_execute_rq_nowait(request_queue
generic_unplug_device(q);
}
+EXPORT_SYMBOL(blk_execute_rq_nowait);
+
/**
* blk_execute_rq - insert a request into queue for execution
* @q: queue to insert the request in
@@ -2482,7 +2484,7 @@ void disk_round_stats(struct gendisk *di
/*
* queue lock must be held
*/
-static void __blk_put_request(request_queue_t *q, struct request *req)
+void __blk_put_request(request_queue_t *q, struct request *req)
{
struct request_list *rl = req->rl;
@@ -2511,6 +2513,8 @@ static void __blk_put_request(request_qu
}
}
+EXPORT_SYMBOL(__blk_put_request);
+
void blk_put_request(struct request *req)
{
/*
diff -aurp cogito/include/linux/blkdev.h linux-block-cogito/include/linux/blkdev.h
--- cogito/include/linux/blkdev.h 2005-06-11 18:51:32.000000000 -0700
+++ linux-block-cogito/include/linux/blkdev.h 2005-06-12 14:38:56.000000000 -0700
@@ -540,11 +540,15 @@ extern void blk_unregister_queue(struct
extern void register_disk(struct gendisk *dev);
extern void generic_make_request(struct bio *bio);
extern void blk_put_request(struct request *);
+extern void __blk_put_request(request_queue_t *, struct request *);
extern void blk_end_sync_rq(struct request *rq);
extern void blk_attempt_remerge(request_queue_t *, struct request *);
extern void __blk_attempt_remerge(request_queue_t *, struct request *);
extern struct request *blk_get_request(request_queue_t *, int, int);
extern void blk_insert_request(request_queue_t *, struct request *, int, void *);
+extern void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
+ struct request *rq, int at_head,
+ void (*done)(struct request *));
extern void blk_requeue_request(request_queue_t *, struct request *);
extern void blk_plug_device(request_queue_t *);
extern int blk_remove_plug(request_queue_t *);
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC] Add user scatter gather I/O to SG_IO
2005-06-10 23:59 [RFC] Add user scatter gather I/O to SG_IO James Bottomley
2005-06-12 22:19 ` Mike Christie
@ 2005-06-13 6:55 ` Jens Axboe
2005-06-13 13:48 ` James Bottomley
1 sibling, 1 reply; 4+ messages in thread
From: Jens Axboe @ 2005-06-13 6:55 UTC (permalink / raw)
To: James Bottomley; +Cc: Mike Christie, SCSI Mailing List
On Fri, Jun 10 2005, James Bottomley wrote:
> diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c
> --- a/drivers/block/scsi_ioctl.c
> +++ b/drivers/block/scsi_ioctl.c
> @@ -231,9 +231,6 @@ static int sg_io(struct file *file, requ
> if (verify_command(file, cmd))
> return -EPERM;
>
> - /*
> - * we'll do that later
> - */
> if (hdr->iovec_count)
> return -EOPNOTSUPP;
Didn't you want to remove more than the comment here?
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -1230,6 +1230,9 @@ static void scsi_eh_offline_sdevs(struct
> scmd->device->channel,
> scmd->device->id,
> scmd->device->lun);
> + /* try to reset the bus and the card to a sane state */
> + scsi_try_bus_reset(scmd);
> + scsi_try_host_reset(scmd);
> scsi_device_set_state(scmd->device, SDEV_OFFLINE);
> if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD)) {
Different patch?
Other than that I think it looks good, thanks. I'll get this added asap.
--
Jens Axboe
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC] Add user scatter gather I/O to SG_IO
2005-06-13 6:55 ` Jens Axboe
@ 2005-06-13 13:48 ` James Bottomley
0 siblings, 0 replies; 4+ messages in thread
From: James Bottomley @ 2005-06-13 13:48 UTC (permalink / raw)
To: Jens Axboe; +Cc: Mike Christie, SCSI Mailing List
On Mon, 2005-06-13 at 08:55 +0200, Jens Axboe wrote:
> Didn't you want to remove more than the comment here?
Well ... like I said, it was lightly tested ...
I figured if I could still get SG_IO to work at all, then I'd proved
that single element SG lists worked ...
> > + /* try to reset the bus and the card to a sane state */
> > + scsi_try_bus_reset(scmd);
> > + scsi_try_host_reset(scmd);
> > scsi_device_set_state(scmd->device, SDEV_OFFLINE);
> > if (scsi_eh_eflags_chk(scmd, SCSI_EH_CANCEL_CMD)) {
>
> Different patch?
Yes, contamination, sorry.
> Other than that I think it looks good, thanks. I'll get this added asap.
Great ... let me know when you have a tree ready and I'll move to base
the SCSI changes off it.
James
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2005-06-13 13:48 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-06-10 23:59 [RFC] Add user scatter gather I/O to SG_IO James Bottomley
2005-06-12 22:19 ` Mike Christie
2005-06-13 6:55 ` Jens Axboe
2005-06-13 13:48 ` James Bottomley
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox