public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] SG_IO readcd and various bugs
@ 2003-05-30 13:02 Jens Axboe
  2003-05-30 13:47 ` Markus Plail
  0 siblings, 1 reply; 15+ messages in thread
From: Jens Axboe @ 2003-05-30 13:02 UTC (permalink / raw)
  To: Linux Kernel, Markus Plail; +Cc: Linus Torvalds

Hi,

Below patch should make readcd (and others) behave, please test.

===== drivers/block/ioctl.c 1.54 vs edited =====
--- 1.54/drivers/block/ioctl.c	Sat Apr 26 00:16:28 2003
+++ edited/drivers/block/ioctl.c	Fri May 30 10:25:55 2003
@@ -207,11 +207,8 @@
 		set_device_ro(bdev, n);
 		return 0;
 	default:
-		if (disk->fops->ioctl) {
-			ret = disk->fops->ioctl(inode, file, cmd, arg);
-			if (ret != -EINVAL)
-				return ret;
-		}
+		if (disk->fops->ioctl)
+			return disk->fops->ioctl(inode, file, cmd, arg);
 	}
 	return -ENOTTY;
 }
===== drivers/block/scsi_ioctl.c 1.27 vs edited =====
--- 1.27/drivers/block/scsi_ioctl.c	Sat May 17 12:05:21 2003
+++ edited/drivers/block/scsi_ioctl.c	Fri May 30 15:00:08 2003
@@ -68,7 +68,6 @@
 
 	rq->flags |= REQ_NOMERGE;
 	rq->waiting = &wait;
-        drive_stat_acct(rq, rq->nr_sectors, 1);
 	elv_add_request(q, rq, 1, 1);
 	generic_unplug_device(q);
 	wait_for_completion(&wait);
@@ -99,7 +98,7 @@
 
 static int sg_get_timeout(request_queue_t *q)
 {
-	return q->sg_timeout;
+	return q->sg_timeout / (HZ / USER_HZ);
 }
 
 static int sg_set_timeout(request_queue_t *q, int *p)
@@ -107,7 +106,7 @@
 	int timeout, err = get_user(timeout, p);
 
 	if (!err)
-		q->sg_timeout = timeout;
+		q->sg_timeout = timeout * (HZ / USER_HZ);
 
 	return err;
 }
@@ -121,10 +120,14 @@
 {
 	int size, err = get_user(size, p);
 
-	if (!err)
-		q->sg_reserved_size = size;
+	if (err)
+		return err;
 
-	return err;
+	if (size > (q->max_sectors << 9))
+		return -EINVAL;
+
+	q->sg_reserved_size = size;
+	return 0;
 }
 
 /*
@@ -139,16 +142,14 @@
 static int sg_io(request_queue_t *q, struct block_device *bdev,
 		 struct sg_io_hdr *uptr)
 {
-	unsigned long uaddr, start_time;
-	int reading, writing, nr_sectors;
+	unsigned long start_time;
+	int reading, writing;
 	struct sg_io_hdr hdr;
 	struct request *rq;
 	struct bio *bio;
 	char sense[SCSI_SENSE_BUFFERSIZE];
 	void *buffer;
 
-	if (!access_ok(VERIFY_WRITE, uptr, sizeof(*uptr)))
-		return -EFAULT;
 	if (copy_from_user(&hdr, uptr, sizeof(*uptr)))
 		return -EFAULT;
 
@@ -156,11 +157,6 @@
 		return -EINVAL;
 	if (hdr.cmd_len > sizeof(rq->cmd))
 		return -EINVAL;
-	if (!access_ok(VERIFY_READ, hdr.cmdp, hdr.cmd_len))
-		return -EFAULT;
-
-	if (hdr.dxfer_len > 65536)
-		return -EINVAL;
 
 	/*
 	 * we'll do that later
@@ -168,7 +164,9 @@
 	if (hdr.iovec_count)
 		return -EOPNOTSUPP;
 
-	nr_sectors = 0;
+	if (hdr.dxfer_len > (q->max_sectors << 9))
+		return -EIO;
+
 	reading = writing = 0;
 	buffer = NULL;
 	bio = NULL;
@@ -189,19 +187,12 @@
 			break;
 		}
 
-		uaddr = (unsigned long) hdr.dxferp;
-		/* writing to device -> reading from vm */
-		if (writing && !access_ok(VERIFY_READ, uaddr, bytes))
-			return -EFAULT;
-		/* reading from device -> writing to vm */
-		else if (reading && !access_ok(VERIFY_WRITE, uaddr, bytes))
-			return -EFAULT;
-
 		/*
 		 * first try to map it into a bio. reading from device will
 		 * be a write to vm.
 		 */
-		bio = bio_map_user(bdev, uaddr, hdr.dxfer_len, reading);
+		bio = bio_map_user(bdev, (unsigned long) hdr.dxferp,
+				   hdr.dxfer_len, reading);
 
 		/*
 		 * if bio setup failed, fall back to slow approach
@@ -211,10 +202,11 @@
 			if (!buffer)
 				return -ENOMEM;
 
-			nr_sectors = bytes >> 9;
-			if (writing)
-				copy_from_user(buffer,hdr.dxferp,hdr.dxfer_len);
-			else
+			if (writing) {
+				if (copy_from_user(buffer, hdr.dxferp,
+						   hdr.dxfer_len))
+					goto out_buffer;
+			} else
 				memset(buffer, 0, hdr.dxfer_len);
 		}
 	}
@@ -225,7 +217,8 @@
 	 * fill in request structure
 	 */
 	rq->cmd_len = hdr.cmd_len;
-	copy_from_user(rq->cmd, hdr.cmdp, hdr.cmd_len);
+	if (copy_from_user(rq->cmd, hdr.cmdp, hdr.cmd_len))
+		goto out_request;
 	if (sizeof(rq->cmd) != hdr.cmd_len)
 		memset(rq->cmd + hdr.cmd_len, 0, sizeof(rq->cmd) - hdr.cmd_len);
 
@@ -235,18 +228,15 @@
 
 	rq->flags |= REQ_BLOCK_PC;
 
-	rq->hard_nr_sectors = rq->nr_sectors = nr_sectors;
-	rq->hard_cur_sectors = rq->current_nr_sectors = nr_sectors;
-
-	rq->bio = rq->biotail = bio;
+	rq->bio = rq->biotail = NULL;
 
 	if (bio)
 		blk_rq_bio_prep(q, rq, bio);
 
-	rq->data_len = hdr.dxfer_len;
 	rq->data = buffer;
+	rq->data_len = hdr.dxfer_len;
 
-	rq->timeout = hdr.timeout;
+	rq->timeout = (hdr.timeout * HZ) / 1000;
 	if (!rq->timeout)
 		rq->timeout = q->sg_timeout;
 	if (!rq->timeout)
@@ -273,7 +263,7 @@
 	if (hdr.masked_status || hdr.host_status || hdr.driver_status)
 		hdr.info |= SG_INFO_CHECK;
 	hdr.resid = rq->data_len;
-	hdr.duration = (jiffies - start_time) * (1000 / HZ);
+	hdr.duration = ((jiffies - start_time) * 1000) / HZ;
 	hdr.sb_len_wr = 0;
 
 	if (rq->sense_len && hdr.sbp) {
@@ -286,17 +276,25 @@
 
 	blk_put_request(rq);
 
-	copy_to_user(uptr, &hdr, sizeof(*uptr));
+	if (copy_to_user(uptr, &hdr, sizeof(*uptr)))
+		goto out_buffer;
 
 	if (buffer) {
 		if (reading)
-			copy_to_user(hdr.dxferp, buffer, hdr.dxfer_len);
+			if (copy_to_user(hdr.dxferp, buffer, hdr.dxfer_len))
+				goto out_buffer;
 
 		kfree(buffer);
 	}
+
 	/* may not have succeeded, but output values written to control
 	 * structure (struct sg_io_hdr).  */
 	return 0;
+out_request:
+	blk_put_request(rq);
+out_buffer:
+	kfree(buffer);
+	return -EFAULT;
 }
 
 #define FORMAT_UNIT_TIMEOUT		(2 * 60 * 60 * HZ)
===== drivers/ide/ide-cd.c 1.46 vs edited =====
--- 1.46/drivers/ide/ide-cd.c	Thu May  8 10:39:34 2003
+++ edited/drivers/ide/ide-cd.c	Fri May 30 14:38:59 2003
@@ -666,8 +666,10 @@
 		struct cdrom_info *info = drive->driver_data;
 		void *sense = &info->sense_data;
 		
-		if (failed && failed->sense)
+		if (failed && failed->sense) {
 			sense = failed->sense;
+			failed->sense_len = rq->sense_len;
+		}
 
 		cdrom_analyze_sense_data(drive, failed, sense);
 	}
@@ -723,7 +725,7 @@
 		 * scsi status byte
 		 */
 		if ((rq->flags & REQ_BLOCK_PC) && !rq->errors)
-			rq->errors = CHECK_CONDITION;
+			rq->errors = SAM_STAT_CHECK_CONDITION;
 
 		/* Check for tray open. */
 		if (sense_key == NOT_READY) {
@@ -1609,10 +1611,18 @@
 
 static void post_transform_command(struct request *req)
 {
-	char *ibuf = req->buffer;
 	u8 *c = req->cmd;
+	char *ibuf;
 
 	if (!blk_pc_request(req))
+		return;
+
+	if (req->bio)
+		ibuf = bio_data(req->bio);
+	else
+		ibuf = req->data;
+
+	if (!ibuf)
 		return;
 
 	/*
===== fs/bio.c 1.45 vs edited =====
--- 1.45/fs/bio.c	Tue May 27 03:53:39 2003
+++ edited/fs/bio.c	Fri May 30 14:32:56 2003
@@ -538,12 +538,6 @@
 	bio = __bio_map_user(bdev, uaddr, len, write_to_vm);
 
 	if (bio) {
-		if (bio->bi_size < len) {
-			bio_endio(bio, bio->bi_size, 0);
-			bio_unmap_user(bio, 0);
-			return NULL;
-		}
-
 		/*
 		 * subtle -- if __bio_map_user() ended up bouncing a bio,
 		 * it would normally disappear when its bi_end_io is run.
@@ -551,6 +545,12 @@
 		 * reference to it
 		 */
 		bio_get(bio);
+
+		if (bio->bi_size < len) {
+			bio_endio(bio, bio->bi_size, 0);
+			bio_unmap_user(bio, 0);
+			return NULL;
+		}
 	}
 
 	return bio;

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 15+ messages in thread
* Re: [PATCH] SG_IO readcd and various bugs
@ 2003-05-31  8:23 Douglas Gilbert
  2003-05-31 10:57 ` Jens Axboe
  0 siblings, 1 reply; 15+ messages in thread
From: Douglas Gilbert @ 2003-05-31  8:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-scsi

[-- Attachment #1: Type: text/plain, Size: 3289 bytes --]

Jens Axboe wrote:
 > On Fri, May 30 2003, Markus Plail wrote:
 > > On Fri, 30 May 2003, Markus Plail wrote:
 > >
 > > > The patch makes readcd work just fine here :-) Many thanks!
 > >
 > > Just realized that C2 scans don't yet work.
 >
 > Updated patch, please give that a shot. These sense_len wasn't
 > being set correctly.

Jens,
At the end of this post is an incremental patch on
top of your most recent one.


Here are some timing and CPU utilization numbers on the
combined patches. Reading
1 GB data from the beginning of a Fujitsu MAM3184MP SCSI
disk whose streaming speed for outer zones is about
57 MB/sec (according to Storage Review). Each atomic read
is 64 KB (and "of=." is sg_dd shorthand for "of=/dev/null").

Here is a normal read (i.e. treating /dev/sda as a normal file):
  $ /usr/bin/time sg_dd if=/dev/sda of=. bs=512 time=1 count=2M
  time to transfer data was 17.821534 secs, 57.46 MB/sec
  0.00user 4.37system 0:17.82elapsed 24%CPU

The transfer time is a little fast due to cache hits. The
24% CPU utilization is the price paid for those cache hits.

Next using O_DIRECT:
  $ /usr/bin/time sg_dd if=/dev/sda of=. bs=512 time=1 count=2M
      odir=1
  time to transfer data was 18.003662 secs, 56.88 MB/sec
  0.00user 0.52system 0:18.00elapsed 2%CPU

The time to transfer is about right and the CPU utilization is
down to 2% (0.52 seconds).

Next using the block layer SG_IO command:
  $ /usr/bin/time sg_dd if=/dev/sda of=. bs=512 time=1 count=2M
      blk_sgio=1
  time to transfer data was 18.780551 secs, 54.52 MB/sec
  0.00user 6.40system 0:18.78elapsed 34%CPU

The throughput is worse and the CPU utilization is now
worse than a normal file.

Setting the "dio=1" flag in sg_dd page aligns its buffers
which causes bio_map_user() to succeed (in
drivers/block/scsi_ioctl.c:sg_io()). In other words it turns
on direct I/O:
  $ /usr/bin/time sg_dd if=/dev/sda of=. bs=512 time=1 count=2M
      blk_sgio=1 dio=1
  time to transfer data was 17.899802 secs, 57.21 MB/sec
  0.00user 0.31system 0:17.90elapsed 1%CPU

So this result is comparable with O_DIRECT on the normal
file. CPU utilization is down to 1% (0.31 seonds).


With the latest changes from Jens (mainly dropping the
artificial 64 KB per operation limit) the maximum
element size in the block layer SG_IO is:
   - 128 KB when direct I/O is not used (i.e. the user
     space buffer does not meet bio_map_user()'s
     requirements). This seems to be the largest buffer
     kmalloc() will allow (in my tests)
   - (sg_tablesize * page_size) when direct I/O is used.
     My test was with the sym53c8xx_2 driver in which
     sg_tablesize==96 so my largest element size was 384 KB



Incremental patch (on top of Jens's 2nd patch in this
thread) changelog:
   - change version number (effectively to 3.7.29) so
     apps can distinguish if the want (current sg driver
     version is 3.5.29). The main thing that app do is
     check the version >= 3.0.0 as that implies the
     SG_IO ioctl is there.
   - reject requests for mmap()-ed I/O with a "not
     supported error"
   - if direct I/O is requested, send back via info
     field whether it was done (or fell back to indirect
     I/O). This is what happens in the sg driver.
   - ultra-paranoid buffer zeroing (in padding at end)

Doug Gilbert

[-- Attachment #2: blk_sg_io2570ja_dg.diff --]
[-- Type: text/plain, Size: 2332 bytes --]

--- linux/drivers/block/scsi_ioctl.c	2003-05-31 13:50:04.000000000 +1000
+++ linux/drivers/block/scsi_ioctl.c2570dpg2	2003-05-31 14:13:15.000000000 +1000
@@ -82,7 +82,7 @@
 
 static int sg_get_version(int *p)
 {
-	static int sg_version_num = 30527;
+	static int sg_version_num = 30729;   /* version 3.7.* to distinguish */
 	return put_user(sg_version_num, p);
 }
 
@@ -143,7 +143,7 @@
 		 struct sg_io_hdr *uptr)
 {
 	unsigned long start_time;
-	int reading, writing;
+	int reading, writing, dio;
 	struct sg_io_hdr hdr;
 	struct request *rq;
 	struct bio *bio;
@@ -163,11 +163,14 @@
 	 */
 	if (hdr.iovec_count)
 		return -EOPNOTSUPP;
+	if (hdr.flags & SG_FLAG_MMAP_IO)
+		return -EOPNOTSUPP;
 
 	if (hdr.dxfer_len > (q->max_sectors << 9))
 		return -EIO;
 
 	reading = writing = 0;
+	dio = 0;
 	buffer = NULL;
 	bio = NULL;
 	if (hdr.dxfer_len) {
@@ -194,10 +197,12 @@
 		bio = bio_map_user(bdev, (unsigned long) hdr.dxferp,
 				   hdr.dxfer_len, reading);
 
-		/*
-		 * if bio setup failed, fall back to slow approach
-		 */
-		if (!bio) {
+		if (bio)
+			dio = 1;
+		else {
+			/*
+			 * if bio setup failed, fall back to slow approach
+			 */
 			buffer = kmalloc(bytes, q->bounce_gfp | GFP_USER);
 			if (!buffer)
 				return -ENOMEM;
@@ -206,8 +211,11 @@
 				if (copy_from_user(buffer, hdr.dxferp,
 						   hdr.dxfer_len))
 					goto out_buffer;
+				if (bytes > hdr.dxfer_len)
+					memset((char *)buffer + hdr.dxfer_len,
+					       0, bytes - hdr.dxfer_len);
 			} else
-				memset(buffer, 0, hdr.dxfer_len);
+				memset(buffer, 0, bytes);
 		}
 	}
 
@@ -257,11 +265,13 @@
 	hdr.status = rq->errors;	
 	hdr.masked_status = (hdr.status >> 1) & 0x1f;
 	hdr.msg_status = 0;
-	hdr.host_status = 0;
+	hdr.host_status = 0;	/* "lost nexus" error could go here */
 	hdr.driver_status = 0;
 	hdr.info = 0;
 	if (hdr.masked_status || hdr.host_status || hdr.driver_status)
 		hdr.info |= SG_INFO_CHECK;
+	if (dio && (hdr.flags & SG_FLAG_DIRECT_IO))
+		hdr.info |= SG_INFO_DIRECT_IO;
 	hdr.resid = rq->data_len;
 	hdr.duration = ((jiffies - start_time) * 1000) / HZ;
 	hdr.sb_len_wr = 0;
@@ -286,7 +296,7 @@
 		kfree(buffer);
 	}
 
-	/* may not have succeeded, but output values written to control
+	/* may not have succeeded, but output status written to control
 	 * structure (struct sg_io_hdr).  */
 	return 0;
 out_request:

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2003-06-03  8:16 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-05-30 13:02 [PATCH] SG_IO readcd and various bugs Jens Axboe
2003-05-30 13:47 ` Markus Plail
2003-05-30 13:52   ` Markus Plail
2003-05-30 14:58     ` Jens Axboe
2003-05-30 16:57       ` Markus Plail
2003-06-01  7:50         ` uaca
2003-06-01  9:18           ` Markus Plail
2003-06-01 10:19             ` uaca
  -- strict thread matches above, loose matches on Subject: below --
2003-05-31  8:23 Douglas Gilbert
2003-05-31 10:57 ` Jens Axboe
2003-06-01  7:39   ` Douglas Gilbert
2003-06-02  7:27     ` Jens Axboe
2003-06-03  5:23       ` Douglas Gilbert
2003-06-03  5:39         ` Nick Piggin
2003-06-03  8:29         ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox