linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] bsg: iovec support
@ 2007-03-01 22:29 Pete Wyckoff
  2007-03-19 10:41 ` FUJITA Tomonori
  0 siblings, 1 reply; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-01 22:29 UTC (permalink / raw)
  To: Jens Axboe; +Cc: FUJITA Tomonori, linux-scsi

Support vectored IO as in SGv3.  The iovec structure uses explicit
sizes to avoid the need for compat conversion.

Signed-off-by: Pete Wyckoff <pw@osc.edu>
---

My application definitely can take advantage of scatter/gather IO,
which is supported in sgv3 but not in the bsg implementation of sgv4.
I understand Tomo's concerns about code bloat and the need for
32/64 compat translations, but this will make things much easier on
users of bsg who read or write out of multiple buffers in a single
SCSI operation.

Clearly we want to avoid doing the compat work that sg.c has to do
now, so I went with __u64 for the addresses in the structures that
userspace sees.  But to interface with existing bio structures, that
must be converted back to 32-bit pointers in sg_iovec (only on
32-bit architectures).  In the long run, maybe we should have a
bio_map_user_iov() that works on the constant-sized sg_io_v4_vec
proposed here?

		-- Pete


 block/bsg.c         |  132 ++++++++++++++++++++++++++++++++++++++++++---------
 include/linux/bsg.h |   16 ++++++
 2 files changed, 125 insertions(+), 23 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index c85d961..8e3d6c7 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -280,6 +280,95 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
 }
 
 /*
+ * Sits around blk_rq_map_user_iov so we can use an iovec type that
+ * does not require compat manipulations.  For now we just clumsily
+ * remap the entire iovec if the types do not match.  Later consider
+ * changing the bio map function.
+ */
+static int bsg_map_user_iovec(request_queue_t *q, struct request *rq,
+			      struct sg_io_v4_vec *vec, int numvec,
+			      size_t tot_len, enum dma_data_direction dir)
+{
+	struct bio *bio;
+	struct sg_iovec *iov;
+	int write_to_vm = (dir == DMA_FROM_DEVICE ? 1 : 0);
+	int must_copy_iovec = (sizeof(*iov) != sizeof(*vec));
+
+	/*
+	 * For 64-bit everywhere, sg_io_v4_vec using __u64 is same as sg_iovec
+	 * using void *.  For 64-bit kernel with 32-bit userspace, also no
+	 * translation needed as userspace is forced to use __u64.  Only in the
+	 * all 32-bit case will sg_iovec use 32-bit pointers and hence we
+	 * must shrink our 64-bit pointers down into it.
+	 */
+	if (must_copy_iovec) {
+		int i;
+		iov = kmalloc(numvec * sizeof(*iov), GFP_KERNEL);
+		for (i=0; i<numvec; i++) {
+			iov[i].iov_base = (void __user *) vec[i].iov_base;
+			iov[i].iov_len = vec[i].iov_len;
+		}
+	} else {
+		iov = (struct sg_iovec *) vec;
+	}
+
+	bio = bio_map_user_iov(q, NULL, iov, numvec, write_to_vm);
+
+	if (must_copy_iovec)
+		kfree(iov);
+
+	if (IS_ERR(bio)) {
+		dprintk("bio_map_user_iov err\n");
+		return PTR_ERR(bio);
+	}
+
+	if (bio->bi_size != tot_len) {
+		dprintk("bio->bi_size %u != len %lu\n", bio->bi_size, tot_len);
+		bio_endio(bio, bio->bi_size, 0);
+		bio_unmap_user(bio);
+		return -EINVAL;
+	}
+
+	bio_get(bio);
+	blk_rq_bio_prep_bidi(q, rq, bio, dir);
+	rq->buffer = rq->data = NULL;
+	return 0;
+}
+
+/*
+ * Map either the in or out bufs.
+ */
+static int bsg_map_data(struct request_queue *q, struct request *rq,
+			__u64 uaddr, __u32 tot_len, __u32 numiov,
+			enum dma_data_direction dir)
+{
+	int ret;
+	void __user *ubuf = (void __user *) (unsigned long) uaddr;
+
+	if (numiov) {
+		struct sg_io_v4_vec *vec;
+		size_t len = numiov * sizeof(*vec);
+
+		vec = kmalloc(len, GFP_KERNEL);
+		if (vec == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		if (copy_from_user(vec, ubuf, len)) {
+			ret = -EFAULT;
+			kfree(vec);
+			goto out;
+		}
+		ret = bsg_map_user_iovec(q, rq, vec, numiov, tot_len, dir);
+		kfree(vec);
+	} else
+		ret = blk_rq_map_user(q, rq, ubuf, tot_len);
+
+out:
+	return ret;
+}
+
+/*
  * map sg_io_v4 to a request.
  */
 static struct request *
@@ -288,12 +377,10 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
 	request_queue_t *q = bd->queue;
 	struct request *rq;
 	int ret, rw = 0; /* shut up gcc */
-	unsigned int dxfer_len;
-	void *dxferp = NULL;
 
-	dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
-		hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
-		hdr->din_xfer_len);
+	dprintk("map hdr %llx/%u %llx/%u\n",
+	        (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len,
+		(unsigned long long) hdr->din_xferp, hdr->din_xfer_len);
 
 	ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
 	if (ret)
@@ -305,29 +392,28 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
 						       &bd->flags));
-	if (ret) {
-		blk_put_request(rq);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto errout;
 
 	if (hdr->dout_xfer_len) {
-		dxfer_len = hdr->dout_xfer_len;
-		dxferp = (void*)(unsigned long)hdr->dout_xferp;
+		ret = bsg_map_data(q, rq, hdr->dout_xferp, hdr->dout_xfer_len,
+		                   hdr->dout_iovec_count, DMA_TO_DEVICE);
+		if (ret)
+			goto errout;
 	} else if (hdr->din_xfer_len) {
-		dxfer_len = hdr->din_xfer_len;
-		dxferp = (void*)(unsigned long)hdr->din_xferp;
-	} else
-		dxfer_len = 0;
-
-	if (dxfer_len) {
-		ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
-		if (ret) {
-			dprintk("failed map at %d\n", ret);
-			blk_put_request(rq);
-			rq = ERR_PTR(ret);
-		}
+		ret = bsg_map_data(q, rq, hdr->din_xferp, hdr->din_xfer_len,
+		                   hdr->din_iovec_count, DMA_FROM_DEVICE);
+		if (ret)
+			goto errout;
 	}
 
+	goto out;
+
+errout:
+	blk_put_request(rq);
+	rq = ERR_PTR(ret);
+
+out:
 	return rq;
 }
 
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 2154a6d..3580921 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -16,6 +16,8 @@ struct sg_io_v4 {
 	__u64 response;		/* [i], [*o] {SCSI: (auto)sense data} */
 
 	/* "din_" for data in (from device); "dout_" for data out (to device) */
+	__u32 dout_iovec_count; /* [i] =0 -> "flat" data transfer */
+	__u32 din_iovec_count;  /* [i] */
 	__u32 dout_xfer_len;	/* [i] bytes to be transferred to device */
 	__u32 din_xfer_len;	/* [i] bytes to be transferred from device */
 	__u64 dout_xferp;	/* [i], [*i] */
@@ -40,6 +42,20 @@ struct sg_io_v4 {
 	__u32 padding;
 };
 
+/*
+ * Vector of address/length pairs, used when dout_iovec_count (or din_)
+ * is non-zero.  In that case, dout_xferp is a list of struct sg_io_v4_vec
+ * and dout_iovec_count is the number of entries in that list.  dout_xfer_len
+ * is the total length of the list.  Note the use of u64 instead of a
+ * native pointer to avoid compat issues, and padding to avoid structure
+ * alignment problems.
+ */
+struct sg_io_v4_vec {
+	__u64 iov_base;
+	__u32 iov_len;
+	__u32 __pad1;
+};
+
 #ifdef __KERNEL__
 
 #if defined(CONFIG_BLK_DEV_BSG)
-- 
1.5.0.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] bsg: iovec support
  2007-03-01 22:29 [PATCH] bsg: iovec support Pete Wyckoff
@ 2007-03-19 10:41 ` FUJITA Tomonori
  2007-03-19 12:56   ` Douglas Gilbert
  0 siblings, 1 reply; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 10:41 UTC (permalink / raw)
  To: pw; +Cc: jens.axboe, fujita.tomonori, linux-scsi

From: Pete Wyckoff <pw@osc.edu>
Subject: [PATCH] bsg: iovec support
Date: Thu, 1 Mar 2007 17:29:08 -0500

> Support vectored IO as in SGv3.  The iovec structure uses explicit
> sizes to avoid the need for compat conversion.
> 
> Signed-off-by: Pete Wyckoff <pw@osc.edu>
> ---
> 
> My application definitely can take advantage of scatter/gather IO,
> which is supported in sgv3 but not in the bsg implementation of sgv4.
> I understand Tomo's concerns about code bloat and the need for
> 32/64 compat translations, but this will make things much easier on
> users of bsg who read or write out of multiple buffers in a single
> SCSI operation.

(snip)

> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
> + * is non-zero.  In that case, dout_xferp is a list of struct sg_io_v4_vec
> + * and dout_iovec_count is the number of entries in that list.  dout_xfer_len
> + * is the total length of the list.  Note the use of u64 instead of a
> + * native pointer to avoid compat issues, and padding to avoid structure
> + * alignment problems.
> + */
> +struct sg_io_v4_vec {
> +	__u64 iov_base;
> +	__u32 iov_len;
> +	__u32 __pad1;
> +};

I don't think that it's a good idea to add a new scatter/gather
structure and export it to user space.

bsg can support scatter/gather IO with ioctl (SG_IO) easily (I mean,
without adding ugly compat code to bsg.c). I guess that SG_IO doesn't
work for you because it works synchronously. However, all system calls
might work asynchronously in the future.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bsg: iovec support
  2007-03-19 10:41 ` FUJITA Tomonori
@ 2007-03-19 12:56   ` Douglas Gilbert
  2007-03-19 13:34     ` FUJITA Tomonori
  0 siblings, 1 reply; 10+ messages in thread
From: Douglas Gilbert @ 2007-03-19 12:56 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: pw, jens.axboe, linux-scsi

FUJITA Tomonori wrote:
> From: Pete Wyckoff <pw@osc.edu>
> Subject: [PATCH] bsg: iovec support
> Date: Thu, 1 Mar 2007 17:29:08 -0500
> 
>> Support vectored IO as in SGv3.  The iovec structure uses explicit
>> sizes to avoid the need for compat conversion.
>>
>> Signed-off-by: Pete Wyckoff <pw@osc.edu>
>> ---
>>
>> My application definitely can take advantage of scatter/gather IO,
>> which is supported in sgv3 but not in the bsg implementation of sgv4.
>> I understand Tomo's concerns about code bloat and the need for
>> 32/64 compat translations, but this will make things much easier on
>> users of bsg who read or write out of multiple buffers in a single
>> SCSI operation.
> 
> (snip)
> 
>> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
>> + * is non-zero.  In that case, dout_xferp is a list of struct sg_io_v4_vec
>> + * and dout_iovec_count is the number of entries in that list.  dout_xfer_len
>> + * is the total length of the list.  Note the use of u64 instead of a
>> + * native pointer to avoid compat issues, and padding to avoid structure
>> + * alignment problems.
>> + */
>> +struct sg_io_v4_vec {
>> +	__u64 iov_base;
>> +	__u32 iov_len;
>> +	__u32 __pad1;
>> +};
> 
> I don't think that it's a good idea to add a new scatter/gather
> structure and export it to user space.

User space scatter gather is not a new feature.
It is defined and works in sg v3.

It was also partially defined in sg v4 and dropped
out in the bsg implementation. I agree with Pete that
it should be put back.

Pete is also suggesting (shown above) a revised sg_io_vec
structure that uses a uint64_t for the pointer to simplify
32, 64 bit thunking.

> bsg can support scatter/gather IO with ioctl (SG_IO) easily (I mean,
> without adding ugly compat code to bsg.c). I guess that SG_IO doesn't
> work for you because it works synchronously. However, all system calls
> might work asynchronously in the future.

User space scatter gather is completely decoupled from
in-kernel scatter gather lists built for HBA DMA
engines. Same technique but at different levels.

Someone might user space scatter gather to efficiently
fetch several OSD objects implemented in a block device
as adjacent blocks

Doug Gilbert



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bsg: iovec support
  2007-03-19 12:56   ` Douglas Gilbert
@ 2007-03-19 13:34     ` FUJITA Tomonori
  2007-03-19 14:04       ` Douglas Gilbert
  0 siblings, 1 reply; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 13:34 UTC (permalink / raw)
  To: dougg; +Cc: fujita.tomonori, pw, jens.axboe, linux-scsi

From: Douglas Gilbert <dougg@torque.net>
Subject: Re: [PATCH] bsg: iovec support
Date: Mon, 19 Mar 2007 08:56:39 -0400

> FUJITA Tomonori wrote:
> > From: Pete Wyckoff <pw@osc.edu>
> > Subject: [PATCH] bsg: iovec support
> > Date: Thu, 1 Mar 2007 17:29:08 -0500
> > 
> >> Support vectored IO as in SGv3.  The iovec structure uses explicit
> >> sizes to avoid the need for compat conversion.
> >>
> >> Signed-off-by: Pete Wyckoff <pw@osc.edu>
> >> ---
> >>
> >> My application definitely can take advantage of scatter/gather IO,
> >> which is supported in sgv3 but not in the bsg implementation of sgv4.
> >> I understand Tomo's concerns about code bloat and the need for
> >> 32/64 compat translations, but this will make things much easier on
> >> users of bsg who read or write out of multiple buffers in a single
> >> SCSI operation.
> > 
> > (snip)
> > 
> >> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
> >> + * is non-zero.  In that case, dout_xferp is a list of struct sg_io_v4_vec
> >> + * and dout_iovec_count is the number of entries in that list.  dout_xfer_len
> >> + * is the total length of the list.  Note the use of u64 instead of a
> >> + * native pointer to avoid compat issues, and padding to avoid structure
> >> + * alignment problems.
> >> + */
> >> +struct sg_io_v4_vec {
> >> +	__u64 iov_base;
> >> +	__u32 iov_len;
> >> +	__u32 __pad1;
> >> +};
> > 
> > I don't think that it's a good idea to add a new scatter/gather
> > structure and export it to user space.
> 
> User space scatter gather is not a new feature.
> It is defined and works in sg v3.
> 
> It was also partially defined in sg v4 and dropped
> out in the bsg implementation. I agree with Pete that
> it should be put back.

I'm fine with supporting iovec (though I don't like it).


> Pete is also suggesting (shown above) a revised sg_io_vec
> structure that uses a uint64_t for the pointer to simplify
> 32, 64 bit thunking.

All I said is that it would be better to use the existing compat
infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
fs/compat_ioctl.c) instead of adding another compat code.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bsg: iovec support
  2007-03-19 13:34     ` FUJITA Tomonori
@ 2007-03-19 14:04       ` Douglas Gilbert
  2007-03-19 14:21         ` FUJITA Tomonori
  0 siblings, 1 reply; 10+ messages in thread
From: Douglas Gilbert @ 2007-03-19 14:04 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: pw, jens.axboe, linux-scsi

FUJITA Tomonori wrote:
> From: Douglas Gilbert <dougg@torque.net>
> Subject: Re: [PATCH] bsg: iovec support
> Date: Mon, 19 Mar 2007 08:56:39 -0400
> 
>> FUJITA Tomonori wrote:
>>> From: Pete Wyckoff <pw@osc.edu>
>>> Subject: [PATCH] bsg: iovec support
>>> Date: Thu, 1 Mar 2007 17:29:08 -0500
>>>
>>>> Support vectored IO as in SGv3.  The iovec structure uses explicit
>>>> sizes to avoid the need for compat conversion.
>>>>
>>>> Signed-off-by: Pete Wyckoff <pw@osc.edu>
>>>> ---
>>>>
>>>> My application definitely can take advantage of scatter/gather IO,
>>>> which is supported in sgv3 but not in the bsg implementation of sgv4.
>>>> I understand Tomo's concerns about code bloat and the need for
>>>> 32/64 compat translations, but this will make things much easier on
>>>> users of bsg who read or write out of multiple buffers in a single
>>>> SCSI operation.
>>> (snip)
>>>
>>>> + * Vector of address/length pairs, used when dout_iovec_count (or din_)
>>>> + * is non-zero.  In that case, dout_xferp is a list of struct sg_io_v4_vec
>>>> + * and dout_iovec_count is the number of entries in that list.  dout_xfer_len
>>>> + * is the total length of the list.  Note the use of u64 instead of a
>>>> + * native pointer to avoid compat issues, and padding to avoid structure
>>>> + * alignment problems.
>>>> + */
>>>> +struct sg_io_v4_vec {
>>>> +	__u64 iov_base;
>>>> +	__u32 iov_len;
>>>> +	__u32 __pad1;
>>>> +};
>>> I don't think that it's a good idea to add a new scatter/gather
>>> structure and export it to user space.
>> User space scatter gather is not a new feature.
>> It is defined and works in sg v3.
>>
>> It was also partially defined in sg v4 and dropped
>> out in the bsg implementation. I agree with Pete that
>> it should be put back.
> 
> I'm fine with supporting iovec (though I don't like it).

Tomo,
You don't need to support it if you don't want to.
So if din_iovec_count or dout_iovec_count are other
than zero, bsg can return an error.

By dropping those fields, other implementations are
precluded from supporting that feature.

>> Pete is also suggesting (shown above) a revised sg_io_vec
>> structure that uses a uint64_t for the pointer to simplify
>> 32, 64 bit thunking.
> 
> All I said is that it would be better to use the existing compat
> infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> fs/compat_ioctl.c) instead of adding another compat code.

Won't sg v4 make this even a bigger mess, at least
initially anyway?

Doug Gilbert



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] bsg: iovec support
  2007-03-19 14:04       ` Douglas Gilbert
@ 2007-03-19 14:21         ` FUJITA Tomonori
  2007-03-19 18:07           ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
  2007-03-19 18:13           ` [PATCH v2] bsg: iovec support with explicit u64 Pete Wyckoff
  0 siblings, 2 replies; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 14:21 UTC (permalink / raw)
  To: dougg; +Cc: fujita.tomonori, pw, jens.axboe, linux-scsi

From: Douglas Gilbert <dougg@torque.net>
Subject: Re: [PATCH] bsg: iovec support
Date: Mon, 19 Mar 2007 10:04:45 -0400

> >> Pete is also suggesting (shown above) a revised sg_io_vec
> >> structure that uses a uint64_t for the pointer to simplify
> >> 32, 64 bit thunking.
> > 
> > All I said is that it would be better to use the existing compat
> > infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> > fs/compat_ioctl.c) instead of adding another compat code.
> 
> Won't sg v4 make this even a bigger mess, at least
> initially anyway?

Inventing a new iovec structure like sg_io_v4_vec and something like
blk_rq_map_user_iov_sgv4 sounds a mess.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2] bsg: iovec support with compat
  2007-03-19 14:21         ` FUJITA Tomonori
@ 2007-03-19 18:07           ` Pete Wyckoff
  2007-03-19 18:22             ` FUJITA Tomonori
  2007-03-19 18:13           ` [PATCH v2] bsg: iovec support with explicit u64 Pete Wyckoff
  1 sibling, 1 reply; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-19 18:07 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: dougg, jens.axboe, linux-scsi

fujita.tomonori@lab.ntt.co.jp wrote on Mon, 19 Mar 2007 23:21 +0900:
> From: Douglas Gilbert <dougg@torque.net>
> Subject: Re: [PATCH] bsg: iovec support
> Date: Mon, 19 Mar 2007 10:04:45 -0400
> 
> > >> Pete is also suggesting (shown above) a revised sg_io_vec
> > >> structure that uses a uint64_t for the pointer to simplify
> > >> 32, 64 bit thunking.
> > > 
> > > All I said is that it would be better to use the existing compat
> > > infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> > > fs/compat_ioctl.c) instead of adding another compat code.
> > 
> > Won't sg v4 make this even a bigger mess, at least
> > initially anyway?
> 
> Inventing a new iovec structure like sg_io_v4_vec and something like
> blk_rq_map_user_iov_sgv4 sounds a mess.

It is sort of a mess to have new blk mapping routine for a new iovec
type.  But I very much did not want to introduce the need for
another compat conversion.  But, I took a look at how it would be to
pass the existing sg_iovec into bsg.

Adding a new bsg_write_compat would be bad.  There is lots of
parsing and setup before we even get to the possibility of iovec
data mapping.  Reusing just sg_build_ioctl from compat_ioctl.c is
also suboptimal as this function is built around the idea of a
contiguous sg_io_hdr + iovec in userspace.  The function is small
enough that splitting it into a generic + ioctl-specific part would
add too much abstraction to be worth it.

Here is the patch to use sg_iovec, with its userspace void * and
size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace.  I'm
not fond of having __u64 for non-iovec buffer representations, and
void * for iovec buffer representations, but it saves having to
build an sg_iovec just to call into the existing blk_rq_map_user_iov.

Comments?

		-- Pete


Support vectored IO as in SGv3.  This uses the standard struct sg_iovec
(same as struct iovec) and converts if necessary for CONFIG_COMPAT.

Signed-off-by: Pete Wyckoff <pw@osc.edu>
---
 block/bsg.c         |  105 +++++++++++++++++++++++++++++++++++++++-----------
 include/linux/bsg.h |    2 +
 2 files changed, 84 insertions(+), 23 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index f1ea258..5a31062 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -25,6 +25,7 @@
 #include <linux/percpu.h>
 #include <linux/uio.h>
 #include <linux/bsg.h>
+#include <linux/compat.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_ioctl.h>
@@ -280,6 +281,65 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
 }
 
 /*
+ * Map either the in or out bufs.  Handle compat conversion for
+ * iovec too.
+ */
+static int bsg_map_data(struct request_queue *q, struct request *rq,
+			__u64 uaddr, __u32 tot_len, __u32 numiov,
+			enum dma_data_direction dir)
+{
+	int ret;
+	void __user *ubuf = (void __user *) (unsigned long) uaddr;
+	struct sg_iovec fastiov[8], *iov = fastiov;
+
+	if (numiov == 0) {
+		ret = blk_rq_map_user(q, rq, ubuf, tot_len);
+		goto out;
+	}
+
+	if (numiov >= ARRAY_SIZE(fastiov)) {
+		iov = kmalloc(numiov * sizeof(*iov), GFP_KERNEL);
+		if (iov == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+
+#ifdef CONFIG_COMPAT
+	if (sizeof(struct compat_iovec) != sizeof(*iov)) {
+		struct compat_iovec __user *compat_iov = ubuf;
+		int i;
+		for (i=0; i<numiov; i++) {
+			compat_uptr_t cbase;
+			compat_size_t clen;
+			if (get_user(cbase, &compat_iov[i].iov_base)
+			 || get_user(clen, &compat_iov[i].iov_len)) {
+				ret = -EFAULT;
+				goto outfree;
+			}
+			iov[i].iov_base = compat_ptr(cbase);
+			iov[i].iov_len = clen;
+		}
+		goto map;
+	}
+#endif
+
+	if (copy_from_user(iov, ubuf, numiov * sizeof(*iov))) {
+		ret = -EFAULT;
+		goto outfree;
+	}
+map:
+	ret = blk_rq_map_user_iov(q, rq, iov, numiov, tot_len);
+
+outfree:
+	if (iov != fastiov)
+		kfree(iov);
+
+out:
+	return ret;
+}
+
+/*
  * map sg_io_v4 to a request.
  */
 static struct request *
@@ -288,12 +348,12 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
 	request_queue_t *q = bd->queue;
 	struct request *rq;
 	int ret, rw = 0; /* shut up gcc */
-	unsigned int dxfer_len;
-	void *dxferp = NULL;
 
-	dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
-		hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
-		hdr->din_xfer_len);
+	dprintk("map hdr %llx/%u/%u %llx/%u/%u\n",
+	        (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len,
+		hdr->dout_iovec_count,
+		(unsigned long long) hdr->din_xferp, hdr->din_xfer_len,
+		hdr->din_iovec_count);
 
 	ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
 	if (ret)
@@ -305,29 +365,28 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
 						       &bd->flags));
-	if (ret) {
-		blk_put_request(rq);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto errout;
 
 	if (hdr->dout_xfer_len) {
-		dxfer_len = hdr->dout_xfer_len;
-		dxferp = (void*)(unsigned long)hdr->dout_xferp;
+		ret = bsg_map_data(q, rq, hdr->dout_xferp, hdr->dout_xfer_len,
+		                   hdr->dout_iovec_count, DMA_TO_DEVICE);
+		if (ret)
+			goto errout;
 	} else if (hdr->din_xfer_len) {
-		dxfer_len = hdr->din_xfer_len;
-		dxferp = (void*)(unsigned long)hdr->din_xferp;
-	} else
-		dxfer_len = 0;
-
-	if (dxfer_len) {
-		ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
-		if (ret) {
-			dprintk("failed map at %d\n", ret);
-			blk_put_request(rq);
-			rq = ERR_PTR(ret);
-		}
+		ret = bsg_map_data(q, rq, hdr->din_xferp, hdr->din_xfer_len,
+		                   hdr->din_iovec_count, DMA_FROM_DEVICE);
+		if (ret)
+			goto errout;
 	}
 
+	goto out;
+
+errout:
+	blk_put_request(rq);
+	rq = ERR_PTR(ret);
+
+out:
 	return rq;
 }
 
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 51152cb..6bd7311 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -16,6 +16,8 @@ struct sg_io_v4 {
 	__u64 response;		/* [i], [*o] {SCSI: (auto)sense data} */
 
 	/* "din_" for data in (from device); "dout_" for data out (to device) */
+	__u32 dout_iovec_count; /* [i] =0 -> "flat" data transfer */
+	__u32 din_iovec_count;  /* [i] >0 -> dxfer is struct sg_iovec * */
 	__u32 dout_xfer_len;	/* [i] bytes to be transferred to device */
 	__u32 din_xfer_len;	/* [i] bytes to be transferred from device */
 	__u64 dout_xferp;	/* [i], [*i] */
-- 
1.5.0.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2] bsg: iovec support with explicit u64
  2007-03-19 14:21         ` FUJITA Tomonori
  2007-03-19 18:07           ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
@ 2007-03-19 18:13           ` Pete Wyckoff
  1 sibling, 0 replies; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-19 18:13 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: dougg, jens.axboe, linux-scsi

pw@osc.edu wrote on Mon, 19 Mar 2007 14:07 -0400:
> Here is the patch to use sg_iovec, with its userspace void * and
> size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace.  I'm
> not fond of having __u64 for non-iovec buffer representations, and
> void * for iovec buffer representations, but it saves having to
> build an sg_iovec just to call into the existing blk_rq_map_user_iov.

Just for comparison, a cleaned up version of the earlier patch that
does not require compat conversion.  Instead, use a new struct
sg_v4_iovec with explicit u64/u32 representation for user pointer
and length.

Perhaps later we might change blk_rq_map_user_iov to take
sg_v4_iovec and let sgv3 convert its sg_iovec into that.

		-- Pete


Support vectored IO as in SGv3.  The iovec structure uses explicit
sizes to avoid the need for compat conversion.

Signed-off-by: Pete Wyckoff <pw@osc.edu>
---
 block/bsg.c         |   95 ++++++++++++++++++++++++++++++++++++++------------
 include/linux/bsg.h |   14 +++++++
 2 files changed, 86 insertions(+), 23 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index f1ea258..e334f75 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -280,6 +280,56 @@ bsg_validate_sgv4_hdr(request_queue_t *q, struct sg_io_v4 *hdr, int *rw)
 }
 
 /*
+ * Map either the in or out bufs.  Convert sg_v4_iovec to sg_iovec
+ * to use existing blk_rq_map infrastructure.  We could do a copy-in-place
+ * conversion on 64-bit kernels, just by zeroing the top half of sg_iovec's
+ * iov_len, but do not, for simplicity.
+ */
+static int bsg_map_data(struct request_queue *q, struct request *rq,
+			__u64 uaddr, __u32 tot_len, __u32 numiov,
+			enum dma_data_direction dir)
+{
+	int i, ret;
+	void __user *ubuf = (void __user *) (unsigned long) uaddr;
+	struct sg_iovec fastiov[8], *iov = fastiov;
+	struct sg_v4_iovec v4_fastiov[8], *v4_iov = v4_fastiov;
+
+	if (numiov == 0) {
+		ret = blk_rq_map_user(q, rq, ubuf, tot_len);
+		goto out;
+	}
+
+	if (numiov >= ARRAY_SIZE(fastiov)) {
+		iov = kmalloc(numiov * (sizeof(*iov) + sizeof(*v4_iov)),
+			      GFP_KERNEL);
+		if (iov == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		v4_iov = (void *) &iov[numiov];
+	}
+
+	if (copy_from_user(v4_iov, ubuf, numiov * sizeof(*v4_iov))) {
+		ret = -EFAULT;
+		goto outfree;
+	}
+
+	for (i=0; i<numiov; i++) {
+		iov[i].iov_base = (void *)(unsigned long) v4_iov[i].iov_base;
+		iov[i].iov_len = v4_iov[i].iov_len;
+	}
+
+	ret = blk_rq_map_user_iov(q, rq, iov, numiov, tot_len);
+
+outfree:
+	if (iov != fastiov)
+		kfree(iov);
+
+out:
+	return ret;
+}
+
+/*
  * map sg_io_v4 to a request.
  */
 static struct request *
@@ -288,12 +338,12 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
 	request_queue_t *q = bd->queue;
 	struct request *rq;
 	int ret, rw = 0; /* shut up gcc */
-	unsigned int dxfer_len;
-	void *dxferp = NULL;
 
-	dprintk("map hdr %llx/%u %llx/%u\n", (unsigned long long) hdr->dout_xferp,
-		hdr->dout_xfer_len, (unsigned long long) hdr->din_xferp,
-		hdr->din_xfer_len);
+	dprintk("map hdr %llx/%u/%u %llx/%u/%u\n",
+	        (unsigned long long) hdr->dout_xferp, hdr->dout_xfer_len,
+		hdr->dout_iovec_count,
+		(unsigned long long) hdr->din_xferp, hdr->din_xfer_len,
+		hdr->din_iovec_count);
 
 	ret = bsg_validate_sgv4_hdr(q, hdr, &rw);
 	if (ret)
@@ -305,29 +355,28 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr)
 	rq = blk_get_request(q, rw, GFP_KERNEL);
 	ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, test_bit(BSG_F_WRITE_PERM,
 						       &bd->flags));
-	if (ret) {
-		blk_put_request(rq);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto errout;
 
 	if (hdr->dout_xfer_len) {
-		dxfer_len = hdr->dout_xfer_len;
-		dxferp = (void*)(unsigned long)hdr->dout_xferp;
+		ret = bsg_map_data(q, rq, hdr->dout_xferp, hdr->dout_xfer_len,
+		                   hdr->dout_iovec_count, DMA_TO_DEVICE);
+		if (ret)
+			goto errout;
 	} else if (hdr->din_xfer_len) {
-		dxfer_len = hdr->din_xfer_len;
-		dxferp = (void*)(unsigned long)hdr->din_xferp;
-	} else
-		dxfer_len = 0;
-
-	if (dxfer_len) {
-		ret = blk_rq_map_user(q, rq, dxferp, dxfer_len);
-		if (ret) {
-			dprintk("failed map at %d\n", ret);
-			blk_put_request(rq);
-			rq = ERR_PTR(ret);
-		}
+		ret = bsg_map_data(q, rq, hdr->din_xferp, hdr->din_xfer_len,
+		                   hdr->din_iovec_count, DMA_FROM_DEVICE);
+		if (ret)
+			goto errout;
 	}
 
+	goto out;
+
+errout:
+	blk_put_request(rq);
+	rq = ERR_PTR(ret);
+
+out:
 	return rq;
 }
 
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 51152cb..7c11e67 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -16,6 +16,8 @@ struct sg_io_v4 {
 	__u64 response;		/* [i], [*o] {SCSI: (auto)sense data} */
 
 	/* "din_" for data in (from device); "dout_" for data out (to device) */
+	__u32 dout_iovec_count; /* [i] =0 -> "flat" data transfer */
+	__u32 din_iovec_count;  /* [i] >0 -> dxfer is struct sg_v4_iovec * */
 	__u32 dout_xfer_len;	/* [i] bytes to be transferred to device */
 	__u32 din_xfer_len;	/* [i] bytes to be transferred from device */
 	__u64 dout_xferp;	/* [i], [*i] */
@@ -40,6 +42,18 @@ struct sg_io_v4 {
 	__u32 padding;
 };
 
+/*
+ * Vector of address/length pairs, used when dout_iovec_count (or din_)
+ * is non-zero.  In that case, dout_xferp is a list of struct sg_v4_iovec
+ * and dout_iovec_count is the number of entries in that list.  dout_xfer_len
+ * is the total length of the list.
+ */
+struct sg_v4_iovec {
+	__u64 iov_base;
+	__u32 iov_len;
+	__u32 __pad1;
+};
+
 #ifdef __KERNEL__
 
 #if defined(CONFIG_BLK_DEV_BSG)
-- 
1.5.0.3


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] bsg: iovec support with compat
  2007-03-19 18:07           ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
@ 2007-03-19 18:22             ` FUJITA Tomonori
  2007-03-21 15:34               ` Pete Wyckoff
  0 siblings, 1 reply; 10+ messages in thread
From: FUJITA Tomonori @ 2007-03-19 18:22 UTC (permalink / raw)
  To: pw; +Cc: fujita.tomonori, dougg, jens.axboe, linux-scsi

From: Pete Wyckoff <pw@osc.edu>
Subject: [PATCH v2] bsg: iovec support with compat
Date: Mon, 19 Mar 2007 14:07:27 -0400

> fujita.tomonori@lab.ntt.co.jp wrote on Mon, 19 Mar 2007 23:21 +0900:
> > From: Douglas Gilbert <dougg@torque.net>
> > Subject: Re: [PATCH] bsg: iovec support
> > Date: Mon, 19 Mar 2007 10:04:45 -0400
> > 
> > > >> Pete is also suggesting (shown above) a revised sg_io_vec
> > > >> structure that uses a uint64_t for the pointer to simplify
> > > >> 32, 64 bit thunking.
> > > > 
> > > > All I said is that it would be better to use the existing compat
> > > > infrastructure (sg_build_iovec, sg_ioctl_trans, etc in
> > > > fs/compat_ioctl.c) instead of adding another compat code.
> > > 
> > > Won't sg v4 make this even a bigger mess, at least
> > > initially anyway?
> > 
> > Inventing a new iovec structure like sg_io_v4_vec and something like
> > blk_rq_map_user_iov_sgv4 sounds a mess.
> 
> It is sort of a mess to have new blk mapping routine for a new iovec
> type.  But I very much did not want to introduce the need for
> another compat conversion.  But, I took a look at how it would be to
> pass the existing sg_iovec into bsg.
> 
> Adding a new bsg_write_compat would be bad.  There is lots of
> parsing and setup before we even get to the possibility of iovec
> data mapping.  Reusing just sg_build_ioctl from compat_ioctl.c is
> also suboptimal as this function is built around the idea of a
> contiguous sg_io_hdr + iovec in userspace.  The function is small
> enough that splitting it into a generic + ioctl-specific part would
> add too much abstraction to be worth it.
> 
> Here is the patch to use sg_iovec, with its userspace void * and
> size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace.  I'm
> not fond of having __u64 for non-iovec buffer representations, and
> void * for iovec buffer representations, but it saves having to
> build an sg_iovec just to call into the existing blk_rq_map_user_iov.
> 
> Comments?

The compat code should not go to bsg.c.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2] bsg: iovec support with compat
  2007-03-19 18:22             ` FUJITA Tomonori
@ 2007-03-21 15:34               ` Pete Wyckoff
  0 siblings, 0 replies; 10+ messages in thread
From: Pete Wyckoff @ 2007-03-21 15:34 UTC (permalink / raw)
  To: FUJITA Tomonori; +Cc: dougg, jens.axboe, linux-scsi

fujita.tomonori@lab.ntt.co.jp wrote on Tue, 20 Mar 2007 03:22 +0900:
> From: Pete Wyckoff <pw@osc.edu>
> Subject: [PATCH v2] bsg: iovec support with compat
> Date: Mon, 19 Mar 2007 14:07:27 -0400
> 
> > Adding a new bsg_write_compat would be bad.  There is lots of
> > parsing and setup before we even get to the possibility of iovec
> > data mapping.  Reusing just sg_build_ioctl from compat_ioctl.c is
> > also suboptimal as this function is built around the idea of a
> > contiguous sg_io_hdr + iovec in userspace.  The function is small
> > enough that splitting it into a generic + ioctl-specific part would
> > add too much abstraction to be worth it.
> > 
> > Here is the patch to use sg_iovec, with its userspace void * and
> > size_t, and the CONFIG_COMPAT code to fixup 32-bit userspace.  I'm
> > not fond of having __u64 for non-iovec buffer representations, and
> > void * for iovec buffer representations, but it saves having to
> > build an sg_iovec just to call into the existing blk_rq_map_user_iov.
> > 
> > Comments?
> 
> The compat code should not go to bsg.c.

Agreed.  I dislike the entire approach of reusing sg_iovec in bsg,
but thought I'd work it through to see what it is like.

The compat code cannot go in compat_ioctl.c or similar, as explained
above.  And it seems wrong to leave it out and silently break on
compat-requiring setups.

Using sg_iovec with bsg is bad from a user perspective too.
sg_iovec pointers are (void *).  sg_io_v4 pointers are __u64.
sg_iovec lengths are size_t (64-bit).  sg_io_v4 lengths are __u32.

Please consider instead the original proposal of a new iovec
type for bsg.  It is less complex than using existing sg_iovec.
http://article.gmane.org/gmane.linux.scsi/30461

There is exactly one user of blk_rq_map_user_iov() in the tree:
sg_io() in scsi_ioctl.c.  I could convert that to sg_v4_iovec now
too.  The helper function bio_map_user_iov() is only used by
blk_rq_map_user_iov() and can also be fixed.  The only use we have
for struct sg_iovec is this one sg_io() caller, and sg.c.  Let's
migrate to sg_v4_iovec at the same time we switch from sgv3 to sgv4.

		-- Pete

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2007-03-21 15:34 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-01 22:29 [PATCH] bsg: iovec support Pete Wyckoff
2007-03-19 10:41 ` FUJITA Tomonori
2007-03-19 12:56   ` Douglas Gilbert
2007-03-19 13:34     ` FUJITA Tomonori
2007-03-19 14:04       ` Douglas Gilbert
2007-03-19 14:21         ` FUJITA Tomonori
2007-03-19 18:07           ` [PATCH v2] bsg: iovec support with compat Pete Wyckoff
2007-03-19 18:22             ` FUJITA Tomonori
2007-03-21 15:34               ` Pete Wyckoff
2007-03-19 18:13           ` [PATCH v2] bsg: iovec support with explicit u64 Pete Wyckoff

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).