* [PATCH] bsg: iovec support
@ 2007-03-01 22:29 Pete Wyckoff
2007-03-19 10:41 ` FUJITA Tomonori
0 siblings, 1 reply; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-01 22:29 UTC (permalink / raw)
To: Jens Axboe; +Cc: FUJITA Tomonori, linux-scsi
Support vectored IO as in SGv3. The iovec structure uses explicit
sizes to avoid the need for compat conversion.
Signed-off-by: Pete Wyckoff <pw@osc.edu>
---
My application definitely can take advantage of scatter/gather IO,
which is supported in sgv3 but not in the bsg implementation of sgv4.
I understand Tomo's concerns about code bloat and the need for
32/64 compat translations, but this will make things much easier on
users of bsg who read or write out of multiple buffers in a single
SCSI operation.
Clearly we want to avoid doing the compat work that sg.c has to do
now, so I went with __u64 for the addresses in the structures that
userspace sees. But to interface with existing bio structures, that
must be converted back to 32-bit pointers in sg_iovec (only on
32-bit architectures). In the long run, maybe we should have a
bio_map_user_iov() that works on the constant-sized sg_io_v4_vec
proposed here?
-- Pete
block/bsg.c | 132 ++++++++++++++++++++++++++++++++++++++++++---------
include/linux/bsg.h | 16 ++++++
2 files changed, 125 insertions(+), 23 deletions(-)
diff --git a/block/bsg.c b/block/bsg.c
index c85d961..8e3d6c7 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -280,6 +280,95 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
}
/*
+ * Sits around blk_rq_map_user_iov so we can use an iovec type that
+ * does not require compat manipulations. For now we just clumsily
+ * remap the entire iovec if the types do not match. Later consider
+ * changing the bio map function.
+ */
+static int bsg_map_user_iovec(request_queue_t *q, struct request *rq,
+ struct sg_io_v4_vec *vec, int numvec,
+ size_t tot_len, enum dma_data_direction dir)
+{
+ struct bio *bio;
+ struct sg_iovec *iov;
+ int write_to_vm = (dir == DMA_FROM_DEVICE ? 1 : 0);
+ int must_copy_iovec = (sizeof(*iov) != sizeof(*vec));
+
+ /*
+ * For 64-bit everywhere, sg_io_v4_vec using __u64 is same as sg_iovec
+ * using void *. For 64-bit kernel with 32-bit userspace, also no
+ * translation needed as userspace is forced to use __u64. Only in the
+ * all 32-bit case will sg_iovec use 32-bit pointers and hence we
+ * must shrink our 64-bit pointers down into it.
+ */
+ if (must_copy_iovec) {
+ int i;
+ iov = kmalloc(numvec * sizeof(*iov), GFP_KERNEL);
+ for (i=0; i<numvec; i++) {
+ iov[i].iov_base = (void __user *) vec[i].iov_base;
+ iov[i].iov_len = vec[i].iov_len;
+ }
+ } else {
+ iov = (struct sg_iovec *) vec;
+ }
+
+ bio = bio_map_user_iov(q, NULL, iov, numvec, write_to_vm);
+
+ if (must_copy_iovec)
+ kfree(iov);
+
+ if (IS_ERR(bio)) {
+ dprintk("bio_map_user_iov err\n");
+ return PTR_ERR(bio);
+ }
+
+ if (bio->bi_size != tot_len) {
+ dprintk("bio->bi_size %u != len %lu\n", bio->bi_size, tot_len);
+ bio_endio(bio, bio->bi_size, 0);
+ bio_unmap_user(bio);
+ return -EINVAL;
+ }
+
+ bio_get(bio);
+ blk_rq_bio_prep_bidi(q, rq, bio, dir);
+ rq->buffer = rq->data = NULL;
+ return 0;
+}
+
+/*
+ * Map either the in or out bufs.
+ */
+static int bsg_map_data(struct request_queue *q, struct request *rq,
+ __u64 uaddr, __u32 tot_len, __u32 numiov,
+ enum dma_data_direction dir)
+{
+ int ret;
+ void __user *ubuf = (void __user *) (unsigned long) uaddr;
+
+ if (numiov) {
+ struct sg_io_v4_vec *vec;
+ size_t len = numiov * sizeof(*vec);
+
+ vec = kmalloc(len, GFP_KERNEL);
+ if (vec == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (copy_from_user(vec, ubuf, len)) {
+ ret = -EFAULT;
+ kfree(vec);
+ goto out;
+ }
+ ret = bsg_map_user_iovec(q, rq, vec, numiov, tot_len, dir);
+ kfree(vec);
+ } else
+ ret = blk_rq_map_user(q, rq, ubuf, tot_len);
+
+out:
+ return ret;
+}
+
+/*
* map sg_io_v4 to a request.
*/
static struct request *
@@ -288,12 +377,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
request_queue_t *q = bd->queue;
struct request *rq;
int ret, rw = 0; /* shut up gcc */
- unsigned int dxfer_len;
- void *dxferp = NULL;
- dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
- hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
- hdr->din_xfer_len);
+ dprintk("map hdr %llx/%u %llx/%u\n",
+ (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len,
+ (unsigned long long) hdr->din_xferp, hdr->din_xfer_len);
ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
if (ret)
@@ -305,29 +392,28 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
&bd->flags));
- if (ret) {
- blk_put_request(rq);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto errout;
if (hdr->dout_xfer_len) {
- dxfer_len = hdr->dout_xfer_len;
- dxferp = (void*)(unsigned long)hdr->dout_xferp;
+ ret = bsg_map_data(q, rq, hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count, DMA_TO_DEVICE);
+ if (ret)
+ goto errout;
} else if (hdr->din_xfer_len) {
- dxfer_len = hdr->din_xfer_len;
- dxferp = (void*)(unsigned long)hdr->din_xferp;
- } else
- dxfer_len = 0;
-
- if (dxfer_len) {
- ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
- if (ret) {
- dprintk("failed map at %d\n", ret);
- blk_put_request(rq);
- rq = ERR_PTR(ret);
- }
+ ret = bsg_map_data(q, rq, hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count, DMA_FROM_DEVICE);
+ if (ret)
+ goto errout;
}
+ goto out;
+
+errout:
+ blk_put_request(rq);
+ rq = ERR_PTR(ret);
+
+out:
return rq;
}
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 2154a6d..3580921 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -16,6 +16,8 @@ struct sg_io_v4 {
__u64 response; /* [i], [*o] {SCSI: (auto)sense data} */
/* "din_" for data in (from device); "dout_" for data out (to device) */
+ __u32 dout_iovec_count; /* [i] =0 -> "flat" data transfer */
+ __u32 din_iovec_count; /* [i] */
__u32 dout_xfer_len; /* [i] bytes to be transferred to device */
__u32 din_xfer_len; /* [i] bytes to be transferred from device */
__u64 dout_xferp; /* [i], [*i] */
@@ -40,6 +42,20 @@ struct sg_io_v4 {
__u32 padding;
};
+/*
+ * Vector of address/length pairs, used when dout_iovec_count (or din_)
+ * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
+ * and dout_iovec_count is the number of entries in that list. dout_xfer_len
+ * is the total length of the list. Note the use of u64 instead of a
+ * native pointer to avoid compat issues, and padding to avoid structure
+ * alignment problems.
+ */
+struct sg_io_v4_vec {
+ __u64 iov_base;
+ __u32 iov_len;
+ __u32 __pad1;
+};
+
#ifdef __KERNEL__
#if defined(CONFIG_BLK_DEV_BSG)
--
1.5.0.2
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] bsg: iovec support
2007-03-01 22:29 [PATCH] bsg: iovec support Pete Wyckoff
@ 2007-03-19 10:41 ` FUJITA Tomonori
2007-03-19 12:56 ` Douglas Gilbert
0 siblings, 1 reply; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 10:41 UTC (permalink / raw)
To: pw; +Cc: jens.axboe, fujita.tomonori, linux-scsi
From: Pete Wyckoff <pw@osc.edu>
Subject: [PATCH] bsg: iovec support
Date: Thu, 1 Mar 2007 17:29:08 -0500
> Support vectored IO as in SGv3. The iovec structure uses explicit
> sizes to avoid the need for compat conversion.
>
> Signed-off-by: Pete Wyckoff <pw@osc.edu>
> ---
>
> My application definitely can take advantage of scatter/gather IO,
> which is supported in sgv3 but not in the bsg implementation of sgv4.
> I understand Tomo's concerns about code bloat and the need for
> 32/64 compat translations, but this will make things much easier on
> users of bsg who read or write out of multiple buffers in a single
> SCSI operation.
(snip)
> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
> + * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
> + * and dout_iovec_count is the number of entries in that list. dout_xfer_len
> + * is the total length of the list. Note the use of u64 instead of a
> + * native pointer to avoid compat issues, and padding to avoid structure
> + * alignment problems.
> + */
> +struct sg_io_v4_vec {
> + __u64 iov_base;
> + __u32 iov_len;
> + __u32 __pad1;
> +};
I don't think that it's a good idea to add a new scatter/gather
structure and export it to user space.
bsg can support scatter/gather IO with ioctl (SG_IO) easily (I mean,
without adding ugly compat code to bsg.c). I guess that SG_IO doesn't
work for you because it works synchronously. However, all system calls
might work asynchronously in the future.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] bsg: iovec support
2007-03-19 10:41 ` FUJITA Tomonori
@ 2007-03-19 12:56 ` Douglas Gilbert
2007-03-19 13:34 ` FUJITA Tomonori
0 siblings, 1 reply; 10+ messages in thread
From: Douglas Gilbert @ 2007-03-19 12:56 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: pw, jens.axboe, linux-scsi
FUJITA Tomonori wrote:
> From: Pete Wyckoff <pw@osc.edu>
> Subject: [PATCH] bsg: iovec support
> Date: Thu, 1 Mar 2007 17:29:08 -0500
>
>> Support vectored IO as in SGv3. The iovec structure uses explicit
>> sizes to avoid the need for compat conversion.
>>
>> Signed-off-by: Pete Wyckoff <pw@osc.edu>
>> ---
>>
>> My application definitely can take advantage of scatter/gather IO,
>> which is supported in sgv3 but not in the bsg implementation of sgv4.
>> I understand Tomo's concerns about code bloat and the need for
>> 32/64 compat translations, but this will make things much easier on
>> users of bsg who read or write out of multiple buffers in a single
>> SCSI operation.
>
> (snip)
>
>> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
>> + * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
>> + * and dout_iovec_count is the number of entries in that list. dout_xfer_len
>> + * is the total length of the list. Note the use of u64 instead of a
>> + * native pointer to avoid compat issues, and padding to avoid structure
>> + * alignment problems.
>> + */
>> +struct sg_io_v4_vec {
>> + __u64 iov_base;
>> + __u32 iov_len;
>> + __u32 __pad1;
>> +};
>
> I don't think that it's a good idea to add a new scatter/gather
> structure and export it to user space.
User space scatter gather is not a new feature.
It is defined and works in sg v3.
It was also partially defined in sg v4 and dropped
out in the bsg implementation. I agree with Pete that
it should be put back.
Pete is also suggesting (shown above) a revised sg_io_vec
structure that uses a uint64_t for the pointer to simplify
32, 64 bit thunking.
> bsg can support scatter/gather IO with ioctl (SG_IO) easily (I mean,
> without adding ugly compat code to bsg.c). I guess that SG_IO doesn't
> work for you because it works synchronously. However, all system calls
> might work asynchronously in the future.
User space scatter gather is completely decoupled from
in-kernel scatter gather lists built for HBA DMA
engines. Same technique but at different levels.
Someone might user space scatter gather to efficiently
fetch several OSD objects implemented in a block device
as adjacent blocks
Doug Gilbert
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] bsg: iovec support
2007-03-19 12:56 ` Douglas Gilbert
@ 2007-03-19 13:34 ` FUJITA Tomonori
2007-03-19 14:04 ` Douglas Gilbert
0 siblings, 1 reply; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 13:34 UTC (permalink / raw)
To: dougg; +Cc: fujita.tomonori, pw, jens.axboe, linux-scsi
From: Douglas Gilbert <dougg@torque.net>
Subject: Re: [PATCH] bsg: iovec support
Date: Mon, 19 Mar 2007 08:56:39 -0400
> FUJITA Tomonori wrote:
> > From: Pete Wyckoff <pw@osc.edu>
> > Subject: [PATCH] bsg: iovec support
> > Date: Thu, 1 Mar 2007 17:29:08 -0500
> >
> >> Support vectored IO as in SGv3. The iovec structure uses explicit
> >> sizes to avoid the need for compat conversion.
> >>
> >> Signed-off-by: Pete Wyckoff <pw@osc.edu>
> >> ---
> >>
> >> My application definitely can take advantage of scatter/gather IO,
> >> which is supported in sgv3 but not in the bsg implementation of sgv4.
> >> I understand Tomo's concerns about code bloat and the need for
> >> 32/64 compat translations, but this will make things much easier on
> >> users of bsg who read or write out of multiple buffers in a single
> >> SCSI operation.
> >
> > (snip)
> >
> >> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
> >> + * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
> >> + * and dout_iovec_count is the number of entries in that list. dout_xfer_len
> >> + * is the total length of the list. Note the use of u64 instead of a
> >> + * native pointer to avoid compat issues, and padding to avoid structure
> >> + * alignment problems.
> >> + */
> >> +struct sg_io_v4_vec {
> >> + __u64 iov_base;
> >> + __u32 iov_len;
> >> + __u32 __pad1;
> >> +};
> >
> > I don't think that it's a good idea to add a new scatter/gather
> > structure and export it to user space.
>
> User space scatter gather is not a new feature.
> It is defined and works in sg v3.
>
> It was also partially defined in sg v4 and dropped
> out in the bsg implementation. I agree with Pete that
> it should be put back.
I'm fine with supporting iovec (though I don't like it).
> Pete is also suggesting (shown above) a revised sg_io_vec
> structure that uses a uint64_t for the pointer to simplify
> 32, 64 bit thunking.
All I said is that it would be better to use the existing compat
infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
fs/compat_ioctl.c) instead of adding another compat code.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] bsg: iovec support
2007-03-19 13:34 ` FUJITA Tomonori
@ 2007-03-19 14:04 ` Douglas Gilbert
2007-03-19 14:21 ` FUJITA Tomonori
0 siblings, 1 reply; 10+ messages in thread
From: Douglas Gilbert @ 2007-03-19 14:04 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: pw, jens.axboe, linux-scsi
FUJITA Tomonori wrote:
> From: Douglas Gilbert <dougg@torque.net>
> Subject: Re: [PATCH] bsg: iovec support
> Date: Mon, 19 Mar 2007 08:56:39 -0400
>
>> FUJITA Tomonori wrote:
>>> From: Pete Wyckoff <pw@osc.edu>
>>> Subject: [PATCH] bsg: iovec support
>>> Date: Thu, 1 Mar 2007 17:29:08 -0500
>>>
>>>> Support vectored IO as in SGv3. The iovec structure uses explicit
>>>> sizes to avoid the need for compat conversion.
>>>>
>>>> Signed-off-by: Pete Wyckoff <pw@osc.edu>
>>>> ---
>>>>
>>>> My application definitely can take advantage of scatter/gather IO,
>>>> which is supported in sgv3 but not in the bsg implementation of sgv4.
>>>> I understand Tomo's concerns about code bloat and the need for
>>>> 32/64 compat translations, but this will make things much easier on
>>>> users of bsg who read or write out of multiple buffers in a single
>>>> SCSI operation.
>>> (snip)
>>>
>>>> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
>>>> + * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
>>>> + * and dout_iovec_count is the number of entries in that list. dout_xfer_len
>>>> + * is the total length of the list. Note the use of u64 instead of a
>>>> + * native pointer to avoid compat issues, and padding to avoid structure
>>>> + * alignment problems.
>>>> + */
>>>> +struct sg_io_v4_vec {
>>>> + __u64 iov_base;
>>>> + __u32 iov_len;
>>>> + __u32 __pad1;
>>>> +};
>>> I don't think that it's a good idea to add a new scatter/gather
>>> structure and export it to user space.
>> User space scatter gather is not a new feature.
>> It is defined and works in sg v3.
>>
>> It was also partially defined in sg v4 and dropped
>> out in the bsg implementation. I agree with Pete that
>> it should be put back.
>
> I'm fine with supporting iovec (though I don't like it).
Tomo,
You don't need to support it if you don't want to.
So if din_iovec_count or dout_iovec_count are other
than zero, bsg can return an error.
By dropping those fields, other implementations are
precluded from supporting that feature.
>> Pete is also suggesting (shown above) a revised sg_io_vec
>> structure that uses a uint64_t for the pointer to simplify
>> 32, 64 bit thunking.
>
> All I said is that it would be better to use the existing compat
> infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> fs/compat_ioctl.c) instead of adding another compat code.
Won't sg v4 make this even a bigger mess, at least
initially anyway?
Doug Gilbert
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] bsg: iovec support
2007-03-19 14:04 ` Douglas Gilbert
@ 2007-03-19 14:21 ` FUJITA Tomonori
2007-03-19 18:07 ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
2007-03-19 18:13 ` [PATCH v2] bsg: iovec support with explicit u64 Pete Wyckoff
0 siblings, 2 replies; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 14:21 UTC (permalink / raw)
To: dougg; +Cc: fujita.tomonori, pw, jens.axboe, linux-scsi
From: Douglas Gilbert <dougg@torque.net>
Subject: Re: [PATCH] bsg: iovec support
Date: Mon, 19 Mar 2007 10:04:45 -0400
> >> Pete is also suggesting (shown above) a revised sg_io_vec
> >> structure that uses a uint64_t for the pointer to simplify
> >> 32, 64 bit thunking.
> >
> > All I said is that it would be better to use the existing compat
> > infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> > fs/compat_ioctl.c) instead of adding another compat code.
>
> Won't sg v4 make this even a bigger mess, at least
> initially anyway?
Inventing a new iovec structure like sg_io_v4_vec and something like
blk_rq_map_user_iov_sgv4 sounds a mess.
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2] bsg: iovec support with compat
2007-03-19 14:21 ` FUJITA Tomonori
@ 2007-03-19 18:07 ` Pete Wyckoff
2007-03-19 18:22 ` FUJITA Tomonori
2007-03-19 18:13 ` [PATCH v2] bsg: iovec support with explicit u64 Pete Wyckoff
1 sibling, 1 reply; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-19 18:07 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: dougg, jens.axboe, linux-scsi
fujita.tomonori@lab.ntt.co.jp wrote on Mon, 19 Mar 2007 23:21 +0900:
> From: Douglas Gilbert <dougg@torque.net>
> Subject: Re: [PATCH] bsg: iovec support
> Date: Mon, 19 Mar 2007 10:04:45 -0400
>
> > >> Pete is also suggesting (shown above) a revised sg_io_vec
> > >> structure that uses a uint64_t for the pointer to simplify
> > >> 32, 64 bit thunking.
> > >
> > > All I said is that it would be better to use the existing compat
> > > infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> > > fs/compat_ioctl.c) instead of adding another compat code.
> >
> > Won't sg v4 make this even a bigger mess, at least
> > initially anyway?
>
> Inventing a new iovec structure like sg_io_v4_vec and something like
> blk_rq_map_user_iov_sgv4 sounds a mess.
It is sort of a mess to have new blk mapping routine for a new iovec
type. But I very much did not want to introduce the need for
another compat conversion. But, I took a look at how it would be to
pass the existing sg_iovec into bsg.
Adding a new bsg_write_compat would be bad. There is lots of
parsing and setup before we even get to the possibility of iovec
data mapping. Reusing just sg_build_ioctl from compat_ioctl.c is
also suboptimal as this function is built around the idea of a
contiguous sg_io_hdr + iovec in userspace. The function is small
enough that splitting it into a generic + ioctl-specific part would
add too much abstraction to be worth it.
Here is the patch to use sg_iovec, with its userspace void * and
size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace. I'm
not fond of having __u64 for non-iovec buffer representations, and
void * for iovec buffer representations, but it saves having to
build an sg_iovec just to call into the existing blk_rq_map_user_iov.
Comments?
-- Pete
Support vectored IO as in SGv3. This uses the standard struct sg_iovec
(same as struct iovec) and converts if necessary for CONFIG_COMPAT.
Signed-off-by: Pete Wyckoff <pw@osc.edu>
---
block/bsg.c | 105 +++++++++++++++++++++++++++++++++++++++-----------
include/linux/bsg.h | 2 +
2 files changed, 84 insertions(+), 23 deletions(-)
diff --git a/block/bsg.c b/block/bsg.c
index f1ea258..5a31062 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -25,6 +25,7 @@
#include <linux/percpu.h>
#include <linux/uio.h>
#include <linux/bsg.h>
+#include <linux/compat.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
@@ -280,6 +281,65 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
}
/*
+ * Map either the in or out bufs. Handle compat conversion for
+ * iovec too.
+ */
+static int bsg_map_data(struct request_queue *q, struct request *rq,
+ __u64 uaddr, __u32 tot_len, __u32 numiov,
+ enum dma_data_direction dir)
+{
+ int ret;
+ void __user *ubuf = (void __user *) (unsigned long) uaddr;
+ struct sg_iovec fastiov[8], *iov = fastiov;
+
+ if (numiov == 0) {
+ ret = blk_rq_map_user(q, rq, ubuf, tot_len);
+ goto out;
+ }
+
+ if (numiov >= ARRAY_SIZE(fastiov)) {
+ iov = kmalloc(numiov * sizeof(*iov), GFP_KERNEL);
+ if (iov == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+#ifdef CONFIG_COMPAT
+ if (sizeof(struct compat_iovec) != sizeof(*iov)) {
+ struct compat_iovec __user *compat_iov = ubuf;
+ int i;
+ for (i=0; i<numiov; i++) {
+ compat_uptr_t cbase;
+ compat_size_t clen;
+ if (get_user(cbase, &compat_iov[i].iov_base)
+ || get_user(clen, &compat_iov[i].iov_len)) {
+ ret = -EFAULT;
+ goto outfree;
+ }
+ iov[i].iov_base = compat_ptr(cbase);
+ iov[i].iov_len = clen;
+ }
+ goto map;
+ }
+#endif
+
+ if (copy_from_user(iov, ubuf, numiov * sizeof(*iov))) {
+ ret = -EFAULT;
+ goto outfree;
+ }
+map:
+ ret = blk_rq_map_user_iov(q, rq, iov, numiov, tot_len);
+
+outfree:
+ if (iov != fastiov)
+ kfree(iov);
+
+out:
+ return ret;
+}
+
+/*
* map sg_io_v4 to a request.
*/
static struct request *
@@ -288,12 +348,12 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
request_queue_t *q = bd->queue;
struct request *rq;
int ret, rw = 0; /* shut up gcc */
- unsigned int dxfer_len;
- void *dxferp = NULL;
- dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
- hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
- hdr->din_xfer_len);
+ dprintk("map hdr %llx/%u/%u %llx/%u/%u\n",
+ (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count,
+ (unsigned long long) hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count);
ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
if (ret)
@@ -305,29 +365,28 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
&bd->flags));
- if (ret) {
- blk_put_request(rq);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto errout;
if (hdr->dout_xfer_len) {
- dxfer_len = hdr->dout_xfer_len;
- dxferp = (void*)(unsigned long)hdr->dout_xferp;
+ ret = bsg_map_data(q, rq, hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count, DMA_TO_DEVICE);
+ if (ret)
+ goto errout;
} else if (hdr->din_xfer_len) {
- dxfer_len = hdr->din_xfer_len;
- dxferp = (void*)(unsigned long)hdr->din_xferp;
- } else
- dxfer_len = 0;
-
- if (dxfer_len) {
- ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
- if (ret) {
- dprintk("failed map at %d\n", ret);
- blk_put_request(rq);
- rq = ERR_PTR(ret);
- }
+ ret = bsg_map_data(q, rq, hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count, DMA_FROM_DEVICE);
+ if (ret)
+ goto errout;
}
+ goto out;
+
+errout:
+ blk_put_request(rq);
+ rq = ERR_PTR(ret);
+
+out:
return rq;
}
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 51152cb..6bd7311 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -16,6 +16,8 @@ struct sg_io_v4 {
__u64 response; /* [i], [*o] {SCSI: (auto)sense data} */
/* "din_" for data in (from device); "dout_" for data out (to device) */
+ __u32 dout_iovec_count; /* [i] =0 -> "flat" data transfer */
+ __u32 din_iovec_count; /* [i] >0 -> dxfer is struct sg_iovec * */
__u32 dout_xfer_len; /* [i] bytes to be transferred to device */
__u32 din_xfer_len; /* [i] bytes to be transferred from device */
__u64 dout_xferp; /* [i], [*i] */
--
1.5.0.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH v2] bsg: iovec support with explicit u64
2007-03-19 14:21 ` FUJITA Tomonori
2007-03-19 18:07 ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
@ 2007-03-19 18:13 ` Pete Wyckoff
1 sibling, 0 replies; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-19 18:13 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: dougg, jens.axboe, linux-scsi
pw@osc.edu wrote on Mon, 19 Mar 2007 14:07 -0400:
> Here is the patch to use sg_iovec, with its userspace void * and
> size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace. I'm
> not fond of having __u64 for non-iovec buffer representations, and
> void * for iovec buffer representations, but it saves having to
> build an sg_iovec just to call into the existing blk_rq_map_user_iov.
Just for comparison, a cleaned up version of the earlier patch that
does not require compat conversion. Instead, use a new struct
sg_v4_iovec with explicit u64/u32 representation for user pointer
and length.
Perhaps later we might change blk_rq_map_user_iov to take
sg_v4_iovec and let sgv3 convert its sg_iovec into that.
-- Pete
Support vectored IO as in SGv3. The iovec structure uses explicit
sizes to avoid the need for compat conversion.
Signed-off-by: Pete Wyckoff <pw@osc.edu>
---
block/bsg.c | 95 ++++++++++++++++++++++++++++++++++++++------------
include/linux/bsg.h | 14 +++++++
2 files changed, 86 insertions(+), 23 deletions(-)
diff --git a/block/bsg.c b/block/bsg.c
index f1ea258..e334f75 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -280,6 +280,56 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
}
/*
+ * Map either the in or out bufs. Convert sg_v4_iovec to sg_iovec
+ * to use existing blk_rq_map infrastructure. We could do a copy-in-place
+ * conversion on 64-bit kernels, just by zeroing the top half of sg_iovec's
+ * iov_len, but do not, for simplicity.
+ */
+static int bsg_map_data(struct request_queue *q, struct request *rq,
+ __u64 uaddr, __u32 tot_len, __u32 numiov,
+ enum dma_data_direction dir)
+{
+ int i, ret;
+ void __user *ubuf = (void __user *) (unsigned long) uaddr;
+ struct sg_iovec fastiov[8], *iov = fastiov;
+ struct sg_v4_iovec v4_fastiov[8], *v4_iov = v4_fastiov;
+
+ if (numiov == 0) {
+ ret = blk_rq_map_user(q, rq, ubuf, tot_len);
+ goto out;
+ }
+
+ if (numiov >= ARRAY_SIZE(fastiov)) {
+ iov = kmalloc(numiov * (sizeof(*iov) + sizeof(*v4_iov)),
+ GFP_KERNEL);
+ if (iov == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ v4_iov = (void *) &iov[numiov];
+ }
+
+ if (copy_from_user(v4_iov, ubuf, numiov * sizeof(*v4_iov))) {
+ ret = -EFAULT;
+ goto outfree;
+ }
+
+ for (i=0; i<numiov; i++) {
+ iov[i].iov_base = (void *)(unsigned long) v4_iov[i].iov_base;
+ iov[i].iov_len = v4_iov[i].iov_len;
+ }
+
+ ret = blk_rq_map_user_iov(q, rq, iov, numiov, tot_len);
+
+outfree:
+ if (iov != fastiov)
+ kfree(iov);
+
+out:
+ return ret;
+}
+
+/*
* map sg_io_v4 to a request.
*/
static struct request *
@@ -288,12 +338,12 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
request_queue_t *q = bd->queue;
struct request *rq;
int ret, rw = 0; /* shut up gcc */
- unsigned int dxfer_len;
- void *dxferp = NULL;
- dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
- hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
- hdr->din_xfer_len);
+ dprintk("map hdr %llx/%u/%u %llx/%u/%u\n",
+ (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count,
+ (unsigned long long) hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count);
ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
if (ret)
@@ -305,29 +355,28 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
&bd->flags));
- if (ret) {
- blk_put_request(rq);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto errout;
if (hdr->dout_xfer_len) {
- dxfer_len = hdr->dout_xfer_len;
- dxferp = (void*)(unsigned long)hdr->dout_xferp;
+ ret = bsg_map_data(q, rq, hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count, DMA_TO_DEVICE);
+ if (ret)
+ goto errout;
} else if (hdr->din_xfer_len) {
- dxfer_len = hdr->din_xfer_len;
- dxferp = (void*)(unsigned long)hdr->din_xferp;
- } else
- dxfer_len = 0;
-
- if (dxfer_len) {
- ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
- if (ret) {
- dprintk("failed map at %d\n", ret);
- blk_put_request(rq);
- rq = ERR_PTR(ret);
- }
+ ret = bsg_map_data(q, rq, hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count, DMA_FROM_DEVICE);
+ if (ret)
+ goto errout;
}
+ goto out;
+
+errout:
+ blk_put_request(rq);
+ rq = ERR_PTR(ret);
+
+out:
return rq;
}
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 51152cb..7c11e67 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -16,6 +16,8 @@ struct sg_io_v4 {
__u64 response; /* [i], [*o] {SCSI: (auto)sense data} */
/* "din_" for data in (from device); "dout_" for data out (to device) */
+ __u32 dout_iovec_count; /* [i] =0 -> "flat" data transfer */
+ __u32 din_iovec_count; /* [i] >0 -> dxfer is struct sg_v4_iovec * */
__u32 dout_xfer_len; /* [i] bytes to be transferred to device */
__u32 din_xfer_len; /* [i] bytes to be transferred from device */
__u64 dout_xferp; /* [i], [*i] */
@@ -40,6 +42,18 @@ struct sg_io_v4 {
__u32 padding;
};
+/*
+ * Vector of address/length pairs, used when dout_iovec_count (or din_)
+ * is non-zero. In that case, dout_xferp is a list of struct sg_v4_iovec
+ * and dout_iovec_count is the number of entries in that list. dout_xfer_len
+ * is the total length of the list.
+ */
+struct sg_v4_iovec {
+ __u64 iov_base;
+ __u32 iov_len;
+ __u32 __pad1;
+};
+
#ifdef __KERNEL__
#if defined(CONFIG_BLK_DEV_BSG)
--
1.5.0.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v2] bsg: iovec support with compat
2007-03-19 18:07 ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
@ 2007-03-19 18:22 ` FUJITA Tomonori
2007-03-21 15:34 ` Pete Wyckoff
0 siblings, 1 reply; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 18:22 UTC (permalink / raw)
To: pw; +Cc: fujita.tomonori, dougg, jens.axboe, linux-scsi
From: Pete Wyckoff <pw@osc.edu>
Subject: [PATCH v2] bsg: iovec support with compat
Date: Mon, 19 Mar 2007 14:07:27 -0400
> fujita.tomonori@lab.ntt.co.jp wrote on Mon, 19 Mar 2007 23:21 +0900:
> > From: Douglas Gilbert <dougg@torque.net>
> > Subject: Re: [PATCH] bsg: iovec support
> > Date: Mon, 19 Mar 2007 10:04:45 -0400
> >
> > > >> Pete is also suggesting (shown above) a revised sg_io_vec
> > > >> structure that uses a uint64_t for the pointer to simplify
> > > >> 32, 64 bit thunking.
> > > >
> > > > All I said is that it would be better to use the existing compat
> > > > infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> > > > fs/compat_ioctl.c) instead of adding another compat code.
> > >
> > > Won't sg v4 make this even a bigger mess, at least
> > > initially anyway?
> >
> > Inventing a new iovec structure like sg_io_v4_vec and something like
> > blk_rq_map_user_iov_sgv4 sounds a mess.
>
> It is sort of a mess to have new blk mapping routine for a new iovec
> type. But I very much did not want to introduce the need for
> another compat conversion. But, I took a look at how it would be to
> pass the existing sg_iovec into bsg.
>
> Adding a new bsg_write_compat would be bad. There is lots of
> parsing and setup before we even get to the possibility of iovec
> data mapping. Reusing just sg_build_ioctl from compat_ioctl.c is
> also suboptimal as this function is built around the idea of a
> contiguous sg_io_hdr + iovec in userspace. The function is small
> enough that splitting it into a generic + ioctl-specific part would
> add too much abstraction to be worth it.
>
> Here is the patch to use sg_iovec, with its userspace void * and
> size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace. I'm
> not fond of having __u64 for non-iovec buffer representations, and
> void * for iovec buffer representations, but it saves having to
> build an sg_iovec just to call into the existing blk_rq_map_user_iov.
>
> Comments?
The compat code should not go to bsg.c.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2] bsg: iovec support with compat
2007-03-19 18:22 ` FUJITA Tomonori
@ 2007-03-21 15:34 ` Pete Wyckoff
0 siblings, 0 replies; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-21 15:34 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: dougg, jens.axboe, linux-scsi
fujita.tomonori@lab.ntt.co.jp wrote on Tue, 20 Mar 2007 03:22 +0900:
> From: Pete Wyckoff <pw@osc.edu>
> Subject: [PATCH v2] bsg: iovec support with compat
> Date: Mon, 19 Mar 2007 14:07:27 -0400
>
> > Adding a new bsg_write_compat would be bad. There is lots of
> > parsing and setup before we even get to the possibility of iovec
> > data mapping. Reusing just sg_build_ioctl from compat_ioctl.c is
> > also suboptimal as this function is built around the idea of a
> > contiguous sg_io_hdr + iovec in userspace. The function is small
> > enough that splitting it into a generic + ioctl-specific part would
> > add too much abstraction to be worth it.
> >
> > Here is the patch to use sg_iovec, with its userspace void * and
> > size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace. I'm
> > not fond of having __u64 for non-iovec buffer representations, and
> > void * for iovec buffer representations, but it saves having to
> > build an sg_iovec just to call into the existing blk_rq_map_user_iov.
> >
> > Comments?
>
> The compat code should not go to bsg.c.
Agreed. I dislike the entire approach of reusing sg_iovec in bsg,
but thought I'd work it through to see what it is like.
The compat code cannot go in compat_ioctl.c or similar, as explained
above. And it seems wrong to leave it out and silently break on
compat-requiring setups.
Using sg_iovec with bsg is bad from a user perspective too.
sg_iovec pointers are (void *). sg_io_v4 pointers are __u64.
sg_iovec lengths are size_t (64-bit). sg_io_v4 lengths are __u32.
Please consider instead the original proposal of a new iovec
type for bsg. It is less complex than using existing sg_iovec.
http://article.gmane.org/gmane.linux.scsi/30461
There is exactly one user of blk_rq_map_user_iov() in the tree:
sg_io() in scsi_ioctl.c. I could convert that to sg_v4_iovec now
too. The helper function bio_map_user_iov() is only used by
blk_rq_map_user_iov() and can also be fixed. The only use we have
for struct sg_iovec is this one sg_io() caller, and sg.c. Let's
migrate to sg_v4_iovec at the same time we switch from sgv3 to sgv4.
-- Pete
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2007-03-21 15:34 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-01 22:29 [PATCH] bsg: iovec support Pete Wyckoff
2007-03-19 10:41 ` FUJITA Tomonori
2007-03-19 12:56 ` Douglas Gilbert
2007-03-19 13:34 ` FUJITA Tomonori
2007-03-19 14:04 ` Douglas Gilbert
2007-03-19 14:21 ` FUJITA Tomonori
2007-03-19 18:07 ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
2007-03-19 18:22 ` FUJITA Tomonori
2007-03-21 15:34 ` Pete Wyckoff
2007-03-19 18:13 ` [PATCH v2] bsg: iovec support with explicit u64 Pete Wyckoff
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).