* [PATCH] bsg: iovec support
@ 2007-03-01 22:29 Pete Wyckoff
2007-03-19 10:41 ` FUJITA Tomonori
0 siblings, 1 reply; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-01 22:29 UTC (permalink / raw)
To: Jens Axboe; +Cc: FUJITA Tomonori, linux-scsi
Support vectored IO as in SGv3. The iovec structure uses explicit
sizes to avoid the need for compat conversion.
Signed-off-by: Pete Wyckoff <pw@osc.edu>
---
My application definitely can take advantage of scatter/gather IO,
which is supported in sgv3 but not in the bsg implementation of sgv4.
I understand Tomo's concerns about code bloat and the need for
32/64 compat translations, but this will make things much easier on
users of bsg who read or write out of multiple buffers in a single
SCSI operation.
Clearly we want to avoid doing the compat work that sg.c has to do
now, so I went with __u64 for the addresses in the structures that
userspace sees. But to interface with existing bio structures, that
must be converted back to 32-bit pointers in sg_iovec (only on
32-bit architectures). In the long run, maybe we should have a
bio_map_user_iov() that works on the constant-sized sg_io_v4_vec
proposed here?
-- Pete
block/bsg.c | 132 ++++++++++++++++++++++++++++++++++++++++++---------
include/linux/bsg.h | 16 ++++++
2 files changed, 125 insertions(+), 23 deletions(-)
diff --git a/block/bsg.c b/block/bsg.c
index c85d961..8e3d6c7 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -280,6 +280,95 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
}
/*
+ * Sits around blk_rq_map_user_iov so we can use an iovec type that
+ * does not require compat manipulations. For now we just clumsily
+ * remap the entire iovec if the types do not match. Later consider
+ * changing the bio map function.
+ */
+static int bsg_map_user_iovec(request_queue_t *q, struct request *rq,
+ struct sg_io_v4_vec *vec, int numvec,
+ size_t tot_len, enum dma_data_direction dir)
+{
+ struct bio *bio;
+ struct sg_iovec *iov;
+ int write_to_vm = (dir == DMA_FROM_DEVICE ? 1 : 0);
+ int must_copy_iovec = (sizeof(*iov) != sizeof(*vec));
+
+ /*
+ * For 64-bit everywhere, sg_io_v4_vec using __u64 is same as sg_iovec
+ * using void *. For 64-bit kernel with 32-bit userspace, also no
+ * translation needed as userspace is forced to use __u64. Only in the
+ * all 32-bit case will sg_iovec use 32-bit pointers and hence we
+ * must shrink our 64-bit pointers down into it.
+ */
+ if (must_copy_iovec) {
+ int i;
+ iov = kmalloc(numvec * sizeof(*iov), GFP_KERNEL);
+ for (i=0; i<numvec; i++) {
+ iov[i].iov_base = (void __user *) vec[i].iov_base;
+ iov[i].iov_len = vec[i].iov_len;
+ }
+ } else {
+ iov = (struct sg_iovec *) vec;
+ }
+
+ bio = bio_map_user_iov(q, NULL, iov, numvec, write_to_vm);
+
+ if (must_copy_iovec)
+ kfree(iov);
+
+ if (IS_ERR(bio)) {
+ dprintk("bio_map_user_iov err\n");
+ return PTR_ERR(bio);
+ }
+
+ if (bio->bi_size != tot_len) {
+ dprintk("bio->bi_size %u != len %lu\n", bio->bi_size, tot_len);
+ bio_endio(bio, bio->bi_size, 0);
+ bio_unmap_user(bio);
+ return -EINVAL;
+ }
+
+ bio_get(bio);
+ blk_rq_bio_prep_bidi(q, rq, bio, dir);
+ rq->buffer = rq->data = NULL;
+ return 0;
+}
+
+/*
+ * Map either the in or out bufs.
+ */
+static int bsg_map_data(struct request_queue *q, struct request *rq,
+ __u64 uaddr, __u32 tot_len, __u32 numiov,
+ enum dma_data_direction dir)
+{
+ int ret;
+ void __user *ubuf = (void __user *) (unsigned long) uaddr;
+
+ if (numiov) {
+ struct sg_io_v4_vec *vec;
+ size_t len = numiov * sizeof(*vec);
+
+ vec = kmalloc(len, GFP_KERNEL);
+ if (vec == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (copy_from_user(vec, ubuf, len)) {
+ ret = -EFAULT;
+ kfree(vec);
+ goto out;
+ }
+ ret = bsg_map_user_iovec(q, rq, vec, numiov, tot_len, dir);
+ kfree(vec);
+ } else
+ ret = blk_rq_map_user(q, rq, ubuf, tot_len);
+
+out:
+ return ret;
+}
+
+/*
* map sg_io_v4 to a request.
*/
static struct request *
@@ -288,12 +377,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
request_queue_t *q = bd->queue;
struct request *rq;
int ret, rw = 0; /* shut up gcc */
- unsigned int dxfer_len;
- void *dxferp = NULL;
- dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
- hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
- hdr->din_xfer_len);
+ dprintk("map hdr %llx/%u %llx/%u\n",
+ (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len,
+ (unsigned long long) hdr->din_xferp, hdr->din_xfer_len);
ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
if (ret)
@@ -305,29 +392,28 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
&bd->flags));
- if (ret) {
- blk_put_request(rq);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto errout;
if (hdr->dout_xfer_len) {
- dxfer_len = hdr->dout_xfer_len;
- dxferp = (void*)(unsigned long)hdr->dout_xferp;
+ ret = bsg_map_data(q, rq, hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count, DMA_TO_DEVICE);
+ if (ret)
+ goto errout;
} else if (hdr->din_xfer_len) {
- dxfer_len = hdr->din_xfer_len;
- dxferp = (void*)(unsigned long)hdr->din_xferp;
- } else
- dxfer_len = 0;
-
- if (dxfer_len) {
- ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
- if (ret) {
- dprintk("failed map at %d\n", ret);
- blk_put_request(rq);
- rq = ERR_PTR(ret);
- }
+ ret = bsg_map_data(q, rq, hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count, DMA_FROM_DEVICE);
+ if (ret)
+ goto errout;
}
+ goto out;
+
+errout:
+ blk_put_request(rq);
+ rq = ERR_PTR(ret);
+
+out:
return rq;
}
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 2154a6d..3580921 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -16,6 +16,8 @@ struct sg_io_v4 {
__u64 response; /* [i], [*o] {SCSI: (auto)sense data} */
/* "din_" for data in (from device); "dout_" for data out (to device) */
+ __u32 dout_iovec_count; /* [i] =0 -> "flat" data transfer */
+ __u32 din_iovec_count; /* [i] */
__u32 dout_xfer_len; /* [i] bytes to be transferred to device */
__u32 din_xfer_len; /* [i] bytes to be transferred from device */
__u64 dout_xferp; /* [i], [*i] */
@@ -40,6 +42,20 @@ struct sg_io_v4 {
__u32 padding;
};
+/*
+ * Vector of address/length pairs, used when dout_iovec_count (or din_)
+ * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
+ * and dout_iovec_count is the number of entries in that list. dout_xfer_len
+ * is the total length of the list. Note the use of u64 instead of a
+ * native pointer to avoid compat issues, and padding to avoid structure
+ * alignment problems.
+ */
+struct sg_io_v4_vec {
+ __u64 iov_base;
+ __u32 iov_len;
+ __u32 __pad1;
+};
+
#ifdef __KERNEL__
#if defined(CONFIG_BLK_DEV_BSG)
--
1.5.0.2
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH] bsg: iovec support
2007-03-01 22:29 [PATCH] bsg: iovec support Pete Wyckoff
@ 2007-03-19 10:41 ` FUJITA Tomonori
2007-03-19 12:56 ` Douglas Gilbert
0 siblings, 1 reply; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 10:41 UTC (permalink / raw)
To: pw; +Cc: jens.axboe, fujita.tomonori, linux-scsi
From: Pete Wyckoff <pw@osc.edu>
Subject: [PATCH] bsg: iovec support
Date: Thu, 1 Mar 2007 17:29:08 -0500
> Support vectored IO as in SGv3. The iovec structure uses explicit
> sizes to avoid the need for compat conversion.
>
> Signed-off-by: Pete Wyckoff <pw@osc.edu>
> ---
>
> My application definitely can take advantage of scatter/gather IO,
> which is supported in sgv3 but not in the bsg implementation of sgv4.
> I understand Tomo's concerns about code bloat and the need for
> 32/64 compat translations, but this will make things much easier on
> users of bsg who read or write out of multiple buffers in a single
> SCSI operation.
(snip)
> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
> + * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
> + * and dout_iovec_count is the number of entries in that list. dout_xfer_len
> + * is the total length of the list. Note the use of u64 instead of a
> + * native pointer to avoid compat issues, and padding to avoid structure
> + * alignment problems.
> + */
> +struct sg_io_v4_vec {
> + __u64 iov_base;
> + __u32 iov_len;
> + __u32 __pad1;
> +};
I don't think that it's a good idea to add a new scatter/gather
structure and export it to user space.
bsg can support scatter/gather IO with ioctl (SG_IO) easily (I mean,
without adding ugly compat code to bsg.c). I guess that SG_IO doesn't
work for you because it works synchronously. However, all system calls
might work asynchronously in the future.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH] bsg: iovec support
2007-03-19 10:41 ` FUJITA Tomonori
@ 2007-03-19 12:56 ` Douglas Gilbert
2007-03-19 13:34 ` FUJITA Tomonori
0 siblings, 1 reply; 10+ messages in thread
From: Douglas Gilbert @ 2007-03-19 12:56 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: pw, jens.axboe, linux-scsi
FUJITA Tomonori wrote:
> From: Pete Wyckoff <pw@osc.edu>
> Subject: [PATCH] bsg: iovec support
> Date: Thu, 1 Mar 2007 17:29:08 -0500
>
>> Support vectored IO as in SGv3. The iovec structure uses explicit
>> sizes to avoid the need for compat conversion.
>>
>> Signed-off-by: Pete Wyckoff <pw@osc.edu>
>> ---
>>
>> My application definitely can take advantage of scatter/gather IO,
>> which is supported in sgv3 but not in the bsg implementation of sgv4.
>> I understand Tomo's concerns about code bloat and the need for
>> 32/64 compat translations, but this will make things much easier on
>> users of bsg who read or write out of multiple buffers in a single
>> SCSI operation.
>
> (snip)
>
>> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
>> + * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
>> + * and dout_iovec_count is the number of entries in that list. dout_xfer_len
>> + * is the total length of the list. Note the use of u64 instead of a
>> + * native pointer to avoid compat issues, and padding to avoid structure
>> + * alignment problems.
>> + */
>> +struct sg_io_v4_vec {
>> + __u64 iov_base;
>> + __u32 iov_len;
>> + __u32 __pad1;
>> +};
>
> I don't think that it's a good idea to add a new scatter/gather
> structure and export it to user space.
User space scatter gather is not a new feature.
It is defined and works in sg v3.
It was also partially defined in sg v4 and dropped
out in the bsg implementation. I agree with Pete that
it should be put back.
Pete is also suggesting (shown above) a revised sg_io_vec
structure that uses a uint64_t for the pointer to simplify
32, 64 bit thunking.
> bsg can support scatter/gather IO with ioctl (SG_IO) easily (I mean,
> without adding ugly compat code to bsg.c). I guess that SG_IO doesn't
> work for you because it works synchronously. However, all system calls
> might work asynchronously in the future.
User space scatter gather is completely decoupled from
in-kernel scatter gather lists built for HBA DMA
engines. Same technique but at different levels.
Someone might user space scatter gather to efficiently
fetch several OSD objects implemented in a block device
as adjacent blocks
Doug Gilbert
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH] bsg: iovec support
2007-03-19 12:56 ` Douglas Gilbert
@ 2007-03-19 13:34 ` FUJITA Tomonori
2007-03-19 14:04 ` Douglas Gilbert
0 siblings, 1 reply; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 13:34 UTC (permalink / raw)
To: dougg; +Cc: fujita.tomonori, pw, jens.axboe, linux-scsi
From: Douglas Gilbert <dougg@torque.net>
Subject: Re: [PATCH] bsg: iovec support
Date: Mon, 19 Mar 2007 08:56:39 -0400
> FUJITA Tomonori wrote:
> > From: Pete Wyckoff <pw@osc.edu>
> > Subject: [PATCH] bsg: iovec support
> > Date: Thu, 1 Mar 2007 17:29:08 -0500
> >
> >> Support vectored IO as in SGv3. The iovec structure uses explicit
> >> sizes to avoid the need for compat conversion.
> >>
> >> Signed-off-by: Pete Wyckoff <pw@osc.edu>
> >> ---
> >>
> >> My application definitely can take advantage of scatter/gather IO,
> >> which is supported in sgv3 but not in the bsg implementation of sgv4.
> >> I understand Tomo's concerns about code bloat and the need for
> >> 32/64 compat translations, but this will make things much easier on
> >> users of bsg who read or write out of multiple buffers in a single
> >> SCSI operation.
> >
> > (snip)
> >
> >> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
> >> + * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
> >> + * and dout_iovec_count is the number of entries in that list. dout_xfer_len
> >> + * is the total length of the list. Note the use of u64 instead of a
> >> + * native pointer to avoid compat issues, and padding to avoid structure
> >> + * alignment problems.
> >> + */
> >> +struct sg_io_v4_vec {
> >> + __u64 iov_base;
> >> + __u32 iov_len;
> >> + __u32 __pad1;
> >> +};
> >
> > I don't think that it's a good idea to add a new scatter/gather
> > structure and export it to user space.
>
> User space scatter gather is not a new feature.
> It is defined and works in sg v3.
>
> It was also partially defined in sg v4 and dropped
> out in the bsg implementation. I agree with Pete that
> it should be put back.
I'm fine with supporting iovec (though I don't like it).
> Pete is also suggesting (shown above) a revised sg_io_vec
> structure that uses a uint64_t for the pointer to simplify
> 32, 64 bit thunking.
All I said is that it would be better to use the existing compat
infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
fs/compat_ioctl.c) instead of adding another compat code.
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH] bsg: iovec support
2007-03-19 13:34 ` FUJITA Tomonori
@ 2007-03-19 14:04 ` Douglas Gilbert
2007-03-19 14:21 ` FUJITA Tomonori
0 siblings, 1 reply; 10+ messages in thread
From: Douglas Gilbert @ 2007-03-19 14:04 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: pw, jens.axboe, linux-scsi
FUJITA Tomonori wrote:
> From: Douglas Gilbert <dougg@torque.net>
> Subject: Re: [PATCH] bsg: iovec support
> Date: Mon, 19 Mar 2007 08:56:39 -0400
>
>> FUJITA Tomonori wrote:
>>> From: Pete Wyckoff <pw@osc.edu>
>>> Subject: [PATCH] bsg: iovec support
>>> Date: Thu, 1 Mar 2007 17:29:08 -0500
>>>
>>>> Support vectored IO as in SGv3. The iovec structure uses explicit
>>>> sizes to avoid the need for compat conversion.
>>>>
>>>> Signed-off-by: Pete Wyckoff <pw@osc.edu>
>>>> ---
>>>>
>>>> My application definitely can take advantage of scatter/gather IO,
>>>> which is supported in sgv3 but not in the bsg implementation of sgv4.
>>>> I understand Tomo's concerns about code bloat and the need for
>>>> 32/64 compat translations, but this will make things much easier on
>>>> users of bsg who read or write out of multiple buffers in a single
>>>> SCSI operation.
>>> (snip)
>>>
>>>> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
>>>> + * is non-zero. In that case, dout_xferp is a list of struct sg_io_v4_vec
>>>> + * and dout_iovec_count is the number of entries in that list. dout_xfer_len
>>>> + * is the total length of the list. Note the use of u64 instead of a
>>>> + * native pointer to avoid compat issues, and padding to avoid structure
>>>> + * alignment problems.
>>>> + */
>>>> +struct sg_io_v4_vec {
>>>> + __u64 iov_base;
>>>> + __u32 iov_len;
>>>> + __u32 __pad1;
>>>> +};
>>> I don't think that it's a good idea to add a new scatter/gather
>>> structure and export it to user space.
>> User space scatter gather is not a new feature.
>> It is defined and works in sg v3.
>>
>> It was also partially defined in sg v4 and dropped
>> out in the bsg implementation. I agree with Pete that
>> it should be put back.
>
> I'm fine with supporting iovec (though I don't like it).
Tomo,
You don't need to support it if you don't want to.
So if din_iovec_count or dout_iovec_count are other
than zero, bsg can return an error.
By dropping those fields, other implementations are
precluded from supporting that feature.
>> Pete is also suggesting (shown above) a revised sg_io_vec
>> structure that uses a uint64_t for the pointer to simplify
>> 32, 64 bit thunking.
>
> All I said is that it would be better to use the existing compat
> infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> fs/compat_ioctl.c) instead of adding another compat code.
Won't sg v4 make this even a bigger mess, at least
initially anyway?
Doug Gilbert
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [PATCH] bsg: iovec support
2007-03-19 14:04 ` Douglas Gilbert
@ 2007-03-19 14:21 ` FUJITA Tomonori
2007-03-19 18:07 ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
2007-03-19 18:13 ` [PATCH v2] bsg: iovec support with explicit u64 Pete Wyckoff
0 siblings, 2 replies; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 14:21 UTC (permalink / raw)
To: dougg; +Cc: fujita.tomonori, pw, jens.axboe, linux-scsi
From: Douglas Gilbert <dougg@torque.net>
Subject: Re: [PATCH] bsg: iovec support
Date: Mon, 19 Mar 2007 10:04:45 -0400
> >> Pete is also suggesting (shown above) a revised sg_io_vec
> >> structure that uses a uint64_t for the pointer to simplify
> >> 32, 64 bit thunking.
> >
> > All I said is that it would be better to use the existing compat
> > infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> > fs/compat_ioctl.c) instead of adding another compat code.
>
> Won't sg v4 make this even a bigger mess, at least
> initially anyway?
Inventing a new iovec structure like sg_io_v4_vec and something like
blk_rq_map_user_iov_sgv4 sounds a mess.
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2] bsg: iovec support with compat
2007-03-19 14:21 ` FUJITA Tomonori
@ 2007-03-19 18:07 ` Pete Wyckoff
2007-03-19 18:22 ` FUJITA Tomonori
2007-03-19 18:13 ` [PATCH v2] bsg: iovec support with explicit u64 Pete Wyckoff
1 sibling, 1 reply; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-19 18:07 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: dougg, jens.axboe, linux-scsi
fujita.tomonori@lab.ntt.co.jp wrote on Mon, 19 Mar 2007 23:21 +0900:
> From: Douglas Gilbert <dougg@torque.net>
> Subject: Re: [PATCH] bsg: iovec support
> Date: Mon, 19 Mar 2007 10:04:45 -0400
>
> > >> Pete is also suggesting (shown above) a revised sg_io_vec
> > >> structure that uses a uint64_t for the pointer to simplify
> > >> 32, 64 bit thunking.
> > >
> > > All I said is that it would be better to use the existing compat
> > > infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> > > fs/compat_ioctl.c) instead of adding another compat code.
> >
> > Won't sg v4 make this even a bigger mess, at least
> > initially anyway?
>
> Inventing a new iovec structure like sg_io_v4_vec and something like
> blk_rq_map_user_iov_sgv4 sounds a mess.
It is sort of a mess to have new blk mapping routine for a new iovec
type. But I very much did not want to introduce the need for
another compat conversion. But, I took a look at how it would be to
pass the existing sg_iovec into bsg.
Adding a new bsg_write_compat would be bad. There is lots of
parsing and setup before we even get to the possibility of iovec
data mapping. Reusing just sg_build_ioctl from compat_ioctl.c is
also suboptimal as this function is built around the idea of a
contiguous sg_io_hdr + iovec in userspace. The function is small
enough that splitting it into a generic + ioctl-specific part would
add too much abstraction to be worth it.
Here is the patch to use sg_iovec, with its userspace void * and
size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace. I'm
not fond of having __u64 for non-iovec buffer representations, and
void * for iovec buffer representations, but it saves having to
build an sg_iovec just to call into the existing blk_rq_map_user_iov.
Comments?
-- Pete
Support vectored IO as in SGv3. This uses the standard struct sg_iovec
(same as struct iovec) and converts if necessary for CONFIG_COMPAT.
Signed-off-by: Pete Wyckoff <pw@osc.edu>
---
block/bsg.c | 105 +++++++++++++++++++++++++++++++++++++++-----------
include/linux/bsg.h | 2 +
2 files changed, 84 insertions(+), 23 deletions(-)
diff --git a/block/bsg.c b/block/bsg.c
index f1ea258..5a31062 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -25,6 +25,7 @@
#include <linux/percpu.h>
#include <linux/uio.h>
#include <linux/bsg.h>
+#include <linux/compat.h>
#include <scsi/scsi.h>
#include <scsi/scsi_ioctl.h>
@@ -280,6 +281,65 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
}
/*
+ * Map either the in or out bufs. Handle compat conversion for
+ * iovec too.
+ */
+static int bsg_map_data(struct request_queue *q, struct request *rq,
+ __u64 uaddr, __u32 tot_len, __u32 numiov,
+ enum dma_data_direction dir)
+{
+ int ret;
+ void __user *ubuf = (void __user *) (unsigned long) uaddr;
+ struct sg_iovec fastiov[8], *iov = fastiov;
+
+ if (numiov == 0) {
+ ret = blk_rq_map_user(q, rq, ubuf, tot_len);
+ goto out;
+ }
+
+ if (numiov >= ARRAY_SIZE(fastiov)) {
+ iov = kmalloc(numiov * sizeof(*iov), GFP_KERNEL);
+ if (iov == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+#ifdef CONFIG_COMPAT
+ if (sizeof(struct compat_iovec) != sizeof(*iov)) {
+ struct compat_iovec __user *compat_iov = ubuf;
+ int i;
+ for (i=0; i<numiov; i++) {
+ compat_uptr_t cbase;
+ compat_size_t clen;
+ if (get_user(cbase, &compat_iov[i].iov_base)
+ || get_user(clen, &compat_iov[i].iov_len)) {
+ ret = -EFAULT;
+ goto outfree;
+ }
+ iov[i].iov_base = compat_ptr(cbase);
+ iov[i].iov_len = clen;
+ }
+ goto map;
+ }
+#endif
+
+ if (copy_from_user(iov, ubuf, numiov * sizeof(*iov))) {
+ ret = -EFAULT;
+ goto outfree;
+ }
+map:
+ ret = blk_rq_map_user_iov(q, rq, iov, numiov, tot_len);
+
+outfree:
+ if (iov != fastiov)
+ kfree(iov);
+
+out:
+ return ret;
+}
+
+/*
* map sg_io_v4 to a request.
*/
static struct request *
@@ -288,12 +348,12 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
request_queue_t *q = bd->queue;
struct request *rq;
int ret, rw = 0; /* shut up gcc */
- unsigned int dxfer_len;
- void *dxferp = NULL;
- dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
- hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
- hdr->din_xfer_len);
+ dprintk("map hdr %llx/%u/%u %llx/%u/%u\n",
+ (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count,
+ (unsigned long long) hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count);
ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
if (ret)
@@ -305,29 +365,28 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
&bd->flags));
- if (ret) {
- blk_put_request(rq);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto errout;
if (hdr->dout_xfer_len) {
- dxfer_len = hdr->dout_xfer_len;
- dxferp = (void*)(unsigned long)hdr->dout_xferp;
+ ret = bsg_map_data(q, rq, hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count, DMA_TO_DEVICE);
+ if (ret)
+ goto errout;
} else if (hdr->din_xfer_len) {
- dxfer_len = hdr->din_xfer_len;
- dxferp = (void*)(unsigned long)hdr->din_xferp;
- } else
- dxfer_len = 0;
-
- if (dxfer_len) {
- ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
- if (ret) {
- dprintk("failed map at %d\n", ret);
- blk_put_request(rq);
- rq = ERR_PTR(ret);
- }
+ ret = bsg_map_data(q, rq, hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count, DMA_FROM_DEVICE);
+ if (ret)
+ goto errout;
}
+ goto out;
+
+errout:
+ blk_put_request(rq);
+ rq = ERR_PTR(ret);
+
+out:
return rq;
}
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 51152cb..6bd7311 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -16,6 +16,8 @@ struct sg_io_v4 {
__u64 response; /* [i], [*o] {SCSI: (auto)sense data} */
/* "din_" for data in (from device); "dout_" for data out (to device) */
+ __u32 dout_iovec_count; /* [i] =0 -> "flat" data transfer */
+ __u32 din_iovec_count; /* [i] >0 -> dxfer is struct sg_iovec * */
__u32 dout_xfer_len; /* [i] bytes to be transferred to device */
__u32 din_xfer_len; /* [i] bytes to be transferred from device */
__u64 dout_xferp; /* [i], [*i] */
--
1.5.0.3
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v2] bsg: iovec support with compat
2007-03-19 18:07 ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
@ 2007-03-19 18:22 ` FUJITA Tomonori
2007-03-21 15:34 ` Pete Wyckoff
0 siblings, 1 reply; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 18:22 UTC (permalink / raw)
To: pw; +Cc: fujita.tomonori, dougg, jens.axboe, linux-scsi
From: Pete Wyckoff <pw@osc.edu>
Subject: [PATCH v2] bsg: iovec support with compat
Date: Mon, 19 Mar 2007 14:07:27 -0400
> fujita.tomonori@lab.ntt.co.jp wrote on Mon, 19 Mar 2007 23:21 +0900:
> > From: Douglas Gilbert <dougg@torque.net>
> > Subject: Re: [PATCH] bsg: iovec support
> > Date: Mon, 19 Mar 2007 10:04:45 -0400
> >
> > > >> Pete is also suggesting (shown above) a revised sg_io_vec
> > > >> structure that uses a uint64_t for the pointer to simplify
> > > >> 32, 64 bit thunking.
> > > >
> > > > All I said is that it would be better to use the existing compat
> > > > infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> > > > fs/compat_ioctl.c) instead of adding another compat code.
> > >
> > > Won't sg v4 make this even a bigger mess, at least
> > > initially anyway?
> >
> > Inventing a new iovec structure like sg_io_v4_vec and something like
> > blk_rq_map_user_iov_sgv4 sounds a mess.
>
> It is sort of a mess to have new blk mapping routine for a new iovec
> type. But I very much did not want to introduce the need for
> another compat conversion. But, I took a look at how it would be to
> pass the existing sg_iovec into bsg.
>
> Adding a new bsg_write_compat would be bad. There is lots of
> parsing and setup before we even get to the possibility of iovec
> data mapping. Reusing just sg_build_ioctl from compat_ioctl.c is
> also suboptimal as this function is built around the idea of a
> contiguous sg_io_hdr + iovec in userspace. The function is small
> enough that splitting it into a generic + ioctl-specific part would
> add too much abstraction to be worth it.
>
> Here is the patch to use sg_iovec, with its userspace void * and
> size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace. I'm
> not fond of having __u64 for non-iovec buffer representations, and
> void * for iovec buffer representations, but it saves having to
> build an sg_iovec just to call into the existing blk_rq_map_user_iov.
>
> Comments?
The compat code should not go to bsg.c.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2] bsg: iovec support with compat
2007-03-19 18:22 ` FUJITA Tomonori
@ 2007-03-21 15:34 ` Pete Wyckoff
0 siblings, 0 replies; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-21 15:34 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: dougg, jens.axboe, linux-scsi
fujita.tomonori@lab.ntt.co.jp wrote on Tue, 20 Mar 2007 03:22 +0900:
> From: Pete Wyckoff <pw@osc.edu>
> Subject: [PATCH v2] bsg: iovec support with compat
> Date: Mon, 19 Mar 2007 14:07:27 -0400
>
> > Adding a new bsg_write_compat would be bad. There is lots of
> > parsing and setup before we even get to the possibility of iovec
> > data mapping. Reusing just sg_build_ioctl from compat_ioctl.c is
> > also suboptimal as this function is built around the idea of a
> > contiguous sg_io_hdr + iovec in userspace. The function is small
> > enough that splitting it into a generic + ioctl-specific part would
> > add too much abstraction to be worth it.
> >
> > Here is the patch to use sg_iovec, with its userspace void * and
> > size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace. I'm
> > not fond of having __u64 for non-iovec buffer representations, and
> > void * for iovec buffer representations, but it saves having to
> > build an sg_iovec just to call into the existing blk_rq_map_user_iov.
> >
> > Comments?
>
> The compat code should not go to bsg.c.
Agreed. I dislike the entire approach of reusing sg_iovec in bsg,
but thought I'd work it through to see what it is like.
The compat code cannot go in compat_ioctl.c or similar, as explained
above. And it seems wrong to leave it out and silently break on
compat-requiring setups.
Using sg_iovec with bsg is bad from a user perspective too.
sg_iovec pointers are (void *). sg_io_v4 pointers are __u64.
sg_iovec lengths are size_t (64-bit). sg_io_v4 lengths are __u32.
Please consider instead the original proposal of a new iovec
type for bsg. It is less complex than using existing sg_iovec.
http://article.gmane.org/gmane.linux.scsi/30461
There is exactly one user of blk_rq_map_user_iov() in the tree:
sg_io() in scsi_ioctl.c. I could convert that to sg_v4_iovec now
too. The helper function bio_map_user_iov() is only used by
blk_rq_map_user_iov() and can also be fixed. The only use we have
for struct sg_iovec is this one sg_io() caller, and sg.c. Let's
migrate to sg_v4_iovec at the same time we switch from sgv3 to sgv4.
-- Pete
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2] bsg: iovec support with explicit u64
2007-03-19 14:21 ` FUJITA Tomonori
2007-03-19 18:07 ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
@ 2007-03-19 18:13 ` Pete Wyckoff
1 sibling, 0 replies; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-19 18:13 UTC (permalink / raw)
To: FUJITA Tomonori; +Cc: dougg, jens.axboe, linux-scsi
pw@osc.edu wrote on Mon, 19 Mar 2007 14:07 -0400:
> Here is the patch to use sg_iovec, with its userspace void * and
> size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace. I'm
> not fond of having __u64 for non-iovec buffer representations, and
> void * for iovec buffer representations, but it saves having to
> build an sg_iovec just to call into the existing blk_rq_map_user_iov.
Just for comparison, a cleaned up version of the earlier patch that
does not require compat conversion. Instead, use a new struct
sg_v4_iovec with explicit u64/u32 representation for user pointer
and length.
Perhaps later we might change blk_rq_map_user_iov to take
sg_v4_iovec and let sgv3 convert its sg_iovec into that.
-- Pete
Support vectored IO as in SGv3. The iovec structure uses explicit
sizes to avoid the need for compat conversion.
Signed-off-by: Pete Wyckoff <pw@osc.edu>
---
block/bsg.c | 95 ++++++++++++++++++++++++++++++++++++++------------
include/linux/bsg.h | 14 +++++++
2 files changed, 86 insertions(+), 23 deletions(-)
diff --git a/block/bsg.c b/block/bsg.c
index f1ea258..e334f75 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -280,6 +280,56 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
}
/*
+ * Map either the in or out bufs. Convert sg_v4_iovec to sg_iovec
+ * to use existing blk_rq_map infrastructure. We could do a copy-in-place
+ * conversion on 64-bit kernels, just by zeroing the top half of sg_iovec's
+ * iov_len, but do not, for simplicity.
+ */
+static int bsg_map_data(struct request_queue *q, struct request *rq,
+ __u64 uaddr, __u32 tot_len, __u32 numiov,
+ enum dma_data_direction dir)
+{
+ int i, ret;
+ void __user *ubuf = (void __user *) (unsigned long) uaddr;
+ struct sg_iovec fastiov[8], *iov = fastiov;
+ struct sg_v4_iovec v4_fastiov[8], *v4_iov = v4_fastiov;
+
+ if (numiov == 0) {
+ ret = blk_rq_map_user(q, rq, ubuf, tot_len);
+ goto out;
+ }
+
+ if (numiov >= ARRAY_SIZE(fastiov)) {
+ iov = kmalloc(numiov * (sizeof(*iov) + sizeof(*v4_iov)),
+ GFP_KERNEL);
+ if (iov == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ v4_iov = (void *) &iov[numiov];
+ }
+
+ if (copy_from_user(v4_iov, ubuf, numiov * sizeof(*v4_iov))) {
+ ret = -EFAULT;
+ goto outfree;
+ }
+
+ for (i=0; i<numiov; i++) {
+ iov[i].iov_base = (void *)(unsigned long) v4_iov[i].iov_base;
+ iov[i].iov_len = v4_iov[i].iov_len;
+ }
+
+ ret = blk_rq_map_user_iov(q, rq, iov, numiov, tot_len);
+
+outfree:
+ if (iov != fastiov)
+ kfree(iov);
+
+out:
+ return ret;
+}
+
+/*
* map sg_io_v4 to a request.
*/
static struct request *
@@ -288,12 +338,12 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
request_queue_t *q = bd->queue;
struct request *rq;
int ret, rw = 0; /* shut up gcc */
- unsigned int dxfer_len;
- void *dxferp = NULL;
- dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
- hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
- hdr->din_xfer_len);
+ dprintk("map hdr %llx/%u/%u %llx/%u/%u\n",
+ (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count,
+ (unsigned long long) hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count);
ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
if (ret)
@@ -305,29 +355,28 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
rq = blk_get_request(q, rw, GFP_KERNEL);
ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
&bd->flags));
- if (ret) {
- blk_put_request(rq);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto errout;
if (hdr->dout_xfer_len) {
- dxfer_len = hdr->dout_xfer_len;
- dxferp = (void*)(unsigned long)hdr->dout_xferp;
+ ret = bsg_map_data(q, rq, hdr->dout_xferp, hdr->dout_xfer_len,
+ hdr->dout_iovec_count, DMA_TO_DEVICE);
+ if (ret)
+ goto errout;
} else if (hdr->din_xfer_len) {
- dxfer_len = hdr->din_xfer_len;
- dxferp = (void*)(unsigned long)hdr->din_xferp;
- } else
- dxfer_len = 0;
-
- if (dxfer_len) {
- ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
- if (ret) {
- dprintk("failed map at %d\n", ret);
- blk_put_request(rq);
- rq = ERR_PTR(ret);
- }
+ ret = bsg_map_data(q, rq, hdr->din_xferp, hdr->din_xfer_len,
+ hdr->din_iovec_count, DMA_FROM_DEVICE);
+ if (ret)
+ goto errout;
}
+ goto out;
+
+errout:
+ blk_put_request(rq);
+ rq = ERR_PTR(ret);
+
+out:
return rq;
}
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 51152cb..7c11e67 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -16,6 +16,8 @@ struct sg_io_v4 {
__u64 response; /* [i], [*o] {SCSI: (auto)sense data} */
/* "din_" for data in (from device); "dout_" for data out (to device) */
+ __u32 dout_iovec_count; /* [i] =0 -> "flat" data transfer */
+ __u32 din_iovec_count; /* [i] >0 -> dxfer is struct sg_v4_iovec * */
__u32 dout_xfer_len; /* [i] bytes to be transferred to device */
__u32 din_xfer_len; /* [i] bytes to be transferred from device */
__u64 dout_xferp; /* [i], [*i] */
@@ -40,6 +42,18 @@ struct sg_io_v4 {
__u32 padding;
};
+/*
+ * Vector of address/length pairs, used when dout_iovec_count (or din_)
+ * is non-zero. In that case, dout_xferp is a list of struct sg_v4_iovec
+ * and dout_iovec_count is the number of entries in that list. dout_xfer_len
+ * is the total length of the list.
+ */
+struct sg_v4_iovec {
+ __u64 iov_base;
+ __u32 iov_len;
+ __u32 __pad1;
+};
+
#ifdef __KERNEL__
#if defined(CONFIG_BLK_DEV_BSG)
--
1.5.0.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2007-03-21 15:34 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-01 22:29 [PATCH] bsg: iovec support Pete Wyckoff
2007-03-19 10:41 ` FUJITA Tomonori
2007-03-19 12:56 ` Douglas Gilbert
2007-03-19 13:34 ` FUJITA Tomonori
2007-03-19 14:04 ` Douglas Gilbert
2007-03-19 14:21 ` FUJITA Tomonori
2007-03-19 18:07 ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
2007-03-19 18:22 ` FUJITA Tomonori
2007-03-21 15:34 ` Pete Wyckoff
2007-03-19 18:13 ` [PATCH v2] bsg: iovec support with explicit u64 Pete Wyckoff
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.