public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] 2.5.21 IDE 91
  2002-06-09  5:42 Linux 2.5.21 Linus Torvalds
@ 2002-06-14 14:02 ` Martin Dalecki
  2002-06-14 15:17   ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: Martin Dalecki @ 2002-06-14 14:02 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Kernel Mailing List

[-- Attachment #1: Type: text/plain, Size: 915 bytes --]

Thu Jun 13 22:59:54 CEST 2002 ide-clean-91

- Realize that the only place where ata_do_taskfile gets used is ide-disk.c
   move it and its "friends' over there.

- Unify the do_request method for disk devices. This saves quite a lot of code.

- Make task_muin_intr and task_in_intr use the same busy status checks on
   entry.

- Unfold get_command at the single only place where it's used.

- Add missing __ata_end_request on kill_rq path.

- Rename udma_tcq_taskfile() to udma_tcq_init to make the code look like to
   normal udma_init. Revert the logics of udma_init and it's
   implementations to mirror that of udma_tcq_init().

- Fix a tinny bug in pmac_udma_init() where it was reporting the wrong value up
   on failure.

- Revert the logics of udma_start(). It's called from udma_init context.
   Realize that it is always returning ide_started. Make it self and the
   implementations of it return void.


[-- Attachment #2: ide-clean-91.diff --]
[-- Type: text/plain, Size: 38509 bytes --]

diff -urN linux-2.5.21/drivers/ide/alim15x3.c linux/drivers/ide/alim15x3.c
--- linux-2.5.21/drivers/ide/alim15x3.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/alim15x3.c	2002-06-14 14:23:39.000000000 +0200
@@ -259,7 +259,7 @@
 static int ali15x3_udma_init(struct ata_device *drive, struct request *rq)
 {
 	if ((m5229_revision < 0xC2) && (drive->type != ATA_DISK))
-		return 1;	/* try PIO instead of DMA */
+		return ide_stopped;	/* try PIO instead of DMA */
 
 	return udma_pci_init(drive, rq);
 }
diff -urN linux-2.5.21/drivers/ide/hpt34x.c linux/drivers/ide/hpt34x.c
--- linux-2.5.21/drivers/ide/hpt34x.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/hpt34x.c	2002-06-14 14:20:16.000000000 +0200
@@ -175,7 +175,7 @@
 	u8 cmd;
 
 	if (!(count = udma_new_table(drive, rq)))
-		return 1;	/* try PIO instead of DMA */
+		return ide_stopped;	/* try PIO instead of DMA */
 
 	if (rq_data_dir(rq) == READ)
 		cmd = 0x09;
@@ -192,7 +192,7 @@
 		OUT_BYTE((cmd == 0x09) ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG);
 	}
 
-	return 0;
+	return ide_started;
 }
 #endif
 
diff -urN linux-2.5.21/drivers/ide/hpt366.c linux/drivers/ide/hpt366.c
--- linux-2.5.21/drivers/ide/hpt366.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/hpt366.c	2002-06-14 15:11:39.000000000 +0200
@@ -858,7 +858,7 @@
 	udelay(10);
 }
 
-static int hpt370_udma_start(struct ata_device *drive, struct request *__rq)
+static void hpt370_udma_start(struct ata_device *drive, struct request *__rq)
 {
 	struct ata_channel *ch = drive->channel;
 
@@ -870,8 +870,6 @@
 	 */
 
 	outb(inb(ch->dma_base) | 1, ch->dma_base);	/* start DMA */
-
-	return 0;
 }
 
 static void do_timeout_irq(struct ata_device *drive)
diff -urN linux-2.5.21/drivers/ide/icside.c linux/drivers/ide/icside.c
--- linux-2.5.21/drivers/ide/icside.c	2002-06-14 12:45:00.000000000 +0200
+++ linux/drivers/ide/icside.c	2002-06-14 15:11:08.000000000 +0200
@@ -447,18 +447,13 @@
 	return get_dma_residue(ch->hw.dma) != 0;
 }
 
-static int icside_dma_start(struct ata_device *drive, struct request *rq)
+static void icside_dma_start(struct ata_device *drive, struct request *rq)
 {
 	struct ata_channel *ch = drive->channel;
 
-	/*
-	 * We can not enable DMA on both channels.
-	 */
+	/* We can not enable DMA on both channels simultaneously. */
 	BUG_ON(dma_channel_active(ch->hw.dma));
-
 	enable_dma(ch->hw.dma);
-
-	return 0;
 }
 
 /*
@@ -524,10 +519,10 @@
 	u8 int cmd;
 
 	if (icside_dma_common(drive, rq, DMA_MODE_WRITE))
-		return 1;
+		return ide_stopped;
 
 	if (drive->type != ATA_DISK)
-		return 0;
+		return ide_started;
 
 	ata_set_handler(drive, icside_dmaintr, WAIT_CMD, NULL);
 
@@ -543,7 +538,7 @@
 
 	enable_dma(ch->hw.dma);
 
-	return 0;
+	return ide_started;
 }
 
 static int icside_irq_status(struct ata_device *drive)
diff -urN linux-2.5.21/drivers/ide/ide.c linux/drivers/ide/ide.c
--- linux-2.5.21/drivers/ide/ide.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/ide.c	2002-06-14 13:18:44.000000000 +0200
@@ -702,7 +702,8 @@
 			spin_unlock_irq(ch->lock);
 			ata_ops(drive)->end_request(drive, rq, 0);
 			spin_lock_irq(ch->lock);
-		}
+		} else
+			__ata_end_request(drive, rq, 0, 0);
 	} else
 		__ata_end_request(drive, rq, 0, 0);
 
diff -urN linux-2.5.21/drivers/ide/ide-cd.c linux/drivers/ide/ide-cd.c
--- linux-2.5.21/drivers/ide/ide-cd.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/ide-cd.c	2002-06-14 14:23:18.000000000 +0200
@@ -741,7 +741,7 @@
 	else {
 		if (info->dma) {
 			if (info->cmd == READ || info->cmd == WRITE)
-				info->dma = !udma_init(drive, rq);
+				info->dma = udma_init(drive, rq);
 			else
 				printk("ide-cd: DMA set, but not allowed\n");
 		}
diff -urN linux-2.5.21/drivers/ide/ide-disk.c linux/drivers/ide/ide-disk.c
--- linux-2.5.21/drivers/ide/ide-disk.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/ide-disk.c	2002-06-14 14:18:25.000000000 +0200
@@ -102,40 +102,44 @@
 	spin_lock_irqsave(ch->lock, flags);
 
 	if (!ata_status(drive, DATA_READY, BAD_R_STAT)) {
-		if (drive->status & (ERR_STAT|DRQ_STAT)) {
+		if (drive->status & (ERR_STAT | DRQ_STAT)) {
 			spin_unlock_irqrestore(ch->lock, flags);
 
 			return ata_error(drive, rq, __FUNCTION__);
 		}
 
-		if (!(drive->status & BUSY_STAT)) {
-//			printk("task_in_intr to Soon wait for next interrupt\n");
-			ata_set_handler(drive, task_in_intr, WAIT_CMD, NULL);
-			spin_unlock_irqrestore(ch->lock, flags);
+		/* no data yet, so wait for another interrupt */
+		ata_set_handler(drive, task_in_intr, WAIT_CMD, NULL);
 
-			return ide_started;
+		ret = ide_started;
+	} else {
+
+		//	printk("Read: %p, rq->current_nr_sectors: %d\n", buf, (int) rq->current_nr_sectors);
+		{
+			unsigned long flags;
+			char *buf;
+
+			buf = ide_map_rq(rq, &flags);
+			ata_read(drive, buf, SECTOR_WORDS);
+			ide_unmap_rq(rq, buf, &flags);
 		}
-	}
 
-//	printk("Read: %p, rq->current_nr_sectors: %d\n", buf, (int) rq->current_nr_sectors);
-	{
-		unsigned long flags;
-		char *buf;
-
-		buf = ide_map_rq(rq, &flags);
-		ata_read(drive, buf, SECTOR_WORDS);
-		ide_unmap_rq(rq, buf, &flags);
-	}
+		/* First segment of the request is complete. note that this does not
+		 * necessarily mean that the entire request is done!! this is only true
+		 * if ata_end_request() returns 0.
+		 */
+		rq->errors = 0;
+		--rq->current_nr_sectors;
 
-	/* First segment of the request is complete. note that this does not
-	 * necessarily mean that the entire request is done!! this is only true
-	 * if ata_end_request() returns 0.
-	 */
+		if (rq->current_nr_sectors <= 0) {
+			if (!__ata_end_request(drive, rq, 1, 0)) {
+			//		printk("Request Ended stat: %02x\n", drive->status);
+				spin_unlock_irqrestore(ch->lock, flags);
+
+				return ide_stopped;
+			}
+		}
 
-	if (--rq->current_nr_sectors <= 0 && !__ata_end_request(drive, rq, 1, 0)) {
-//		printk("Request Ended stat: %02x\n", drive->status);
-		ret = ide_stopped;
-	} else {
 		/* still data left to transfer */
 		ata_set_handler(drive, task_in_intr,  WAIT_CMD, NULL);
 
@@ -197,7 +201,7 @@
 
 	spin_lock_irqsave(ch->lock, flags);
 	if (!ata_status(drive, DATA_READY, BAD_R_STAT)) {
-		if (drive->status & (ERR_STAT|DRQ_STAT)) {
+		if (drive->status & (ERR_STAT | DRQ_STAT)) {
 			spin_unlock_irqrestore(ch->lock, flags);
 
 			return ata_error(drive, rq, __FUNCTION__);
@@ -235,16 +239,16 @@
 
 			rq->errors = 0;
 			rq->current_nr_sectors -= nsect;
-			msect -= nsect;
 
 			/* FIXME: this seems buggy */
-			if (!rq->current_nr_sectors) {
+			if (rq->current_nr_sectors <= 0) {
 				if (!__ata_end_request(drive, rq, 1, 0)) {
 					spin_unlock_irqrestore(ch->lock, flags);
 
 					return ide_stopped;
 				}
 			}
+			msect -= nsect;
 		} while (msect);
 
 		/* more data left */
@@ -343,212 +347,138 @@
 }
 
 /*
- * Decode with physical ATA command to use and setup associated data.
+ * Channel lock should be held on entry.
  */
-static u8 get_command(struct ata_device *drive, struct ata_taskfile *ar, int cmd)
+static ide_startstop_t __do_request(struct ata_device *drive,
+		struct ata_taskfile *ar, struct request *rq)
 {
-	int lba48bit = (drive->id->cfs_enable_2 & 0x0400) ? 1 : 0;
-
-#if 1
-	lba48bit = drive->addressing;
-#endif
-
-	if (lba48bit) {
-		if (cmd == READ) {
-			ar->command_type = IDE_DRIVE_TASK_IN;
-			if (drive->using_tcq) {
-				return WIN_READDMA_QUEUED_EXT;
-			} else if (drive->using_dma) {
-				return WIN_READDMA_EXT;
-			} else if (drive->mult_count) {
-				ar->handler = task_mulin_intr;
-				return WIN_MULTREAD_EXT;
-			} else {
-				ar->handler = task_in_intr;
-				return WIN_READ_EXT;
-			}
-		} else if (cmd == WRITE) {
-			ar->command_type = IDE_DRIVE_TASK_RAW_WRITE;
-			if (drive->using_tcq) {
-				return WIN_WRITEDMA_QUEUED_EXT;
-			} else if (drive->using_dma) {
-				return WIN_WRITEDMA_EXT;
-			} else if (drive->mult_count) {
-				ar->handler = task_mulout_intr;
-				return WIN_MULTWRITE_EXT;
-			} else {
-				ar->handler = task_out_intr;
-				return WIN_WRITE_EXT;
-			}
-		}
-	} else {
-		if (cmd == READ) {
-			ar->command_type = IDE_DRIVE_TASK_IN;
-			if (drive->using_tcq) {
-				return WIN_READDMA_QUEUED;
-			} else if (drive->using_dma) {
-				return WIN_READDMA;
-			} else if (drive->mult_count) {
-				ar->handler = task_in_intr;
-				return WIN_MULTREAD;
-			} else {
-				ar->handler = task_in_intr;
-				return WIN_READ;
-			}
-		} else if (cmd == WRITE) {
-			ar->command_type = IDE_DRIVE_TASK_RAW_WRITE;
-			if (drive->using_tcq) {
-				return WIN_WRITEDMA_QUEUED;
-			} else if (drive->using_dma) {
-				return WIN_WRITEDMA;
-			} else if (drive->mult_count) {
-				ar->handler = task_mulout_intr;
-				return WIN_MULTWRITE;
-			} else {
-				ar->handler = task_out_intr;
-				return WIN_WRITE;
-			}
-		}
-	}
-
-	/* not reached! */
-	return WIN_NOP;
-}
-
-static ide_startstop_t chs_do_request(struct ata_device *drive, struct request *rq, sector_t block)
-{
-	struct ata_taskfile args;
-	int sectors;
-
-	unsigned int track	= (block / drive->sect);
-	unsigned int sect	= (block % drive->sect) + 1;
-	unsigned int head	= (track % drive->head);
-	unsigned int cyl	= (track / drive->head);
-
-	sectors = rq->nr_sectors;
-	if (sectors == 256)
-		sectors = 0;
-
-	memset(&args, 0, sizeof(args));
-
-	if (blk_rq_tagged(rq)) {
-		args.taskfile.feature = sectors;
-		args.taskfile.sector_count = rq->tag << 3;
-	} else
-		args.taskfile.sector_count = sectors;
-
-	args.taskfile.sector_number = sect;
-	args.taskfile.low_cylinder = cyl;
-	args.taskfile.high_cylinder = (cyl>>8);
-
-	args.taskfile.device_head = head;
-	args.taskfile.device_head |= drive->select.all;
-	args.cmd = get_command(drive, &args, rq_data_dir(rq));
-
-#ifdef DEBUG
-	printk("%s: %sing: ", drive->name,
-		(rq_data_dir(rq)==READ) ? "read" : "writ");
-	if (lba)	printk("LBAsect=%lld, ", block);
-	else		printk("CHS=%d/%d/%d, ", cyl, head, sect);
-	printk("sectors=%ld, ", rq->nr_sectors);
-	printk("buffer=%p\n", rq->buffer);
-#endif
-
-	rq->special = &args;
-
-	return ata_do_taskfile(drive, &args, rq);
-}
-
-static ide_startstop_t lba28_do_request(struct ata_device *drive, struct request *rq, sector_t block)
-{
-	struct ata_taskfile args;
-	int sectors;
-
-	sectors = rq->nr_sectors;
-	if (sectors == 256)
-		sectors = 0;
-
-	memset(&args, 0, sizeof(args));
-
-	if (blk_rq_tagged(rq)) {
-		args.taskfile.feature = sectors;
-		args.taskfile.sector_count = rq->tag << 3;
-	} else
-		args.taskfile.sector_count = sectors;
-
-	args.taskfile.sector_number = block;
-	args.taskfile.low_cylinder = (block >>= 8);
-
-	args.taskfile.high_cylinder = (block >>= 8);
+	struct hd_driveid *id = drive->id;
 
-	args.taskfile.device_head = ((block >> 8) & 0x0f);
-	args.taskfile.device_head |= drive->select.all;
-	args.cmd = get_command(drive, &args, rq_data_dir(rq));
+	/* (ks/hs): Moved to start, do not use for multiple out commands.
+	 * FIXME: why not?! */
+	if (!(ar->cmd == CFA_WRITE_MULTI_WO_ERASE ||
+	      ar->cmd == WIN_MULTWRITE ||
+	      ar->cmd == WIN_MULTWRITE_EXT)) {
+		ata_irq_enable(drive, 1);
+		ata_mask(drive);
+	}
+
+	if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400) &&
+	    (drive->addressing == 1))
+		ata_out_regfile(drive, &ar->hobfile);
+
+	ata_out_regfile(drive, &ar->taskfile);
+
+	OUT_BYTE((ar->taskfile.device_head & (drive->addressing ? 0xE0 : 0xEF)) | drive->select.all,
+			IDE_SELECT_REG);
+
+	if (ar->XXX_handler) {
+		struct ata_channel *ch = drive->channel;
+
+		ata_set_handler(drive, ar->XXX_handler, WAIT_CMD, NULL);
+		OUT_BYTE(ar->cmd, IDE_COMMAND_REG);
+
+		/* FIXME: Warning check for race between handler and prehandler
+		 * for writing first block of data.  however since we are well
+		 * inside the boundaries of the seek, we should be okay.
+		 *
+		 * FIXME: Replace the switch by using a proper command_type.
+		 */
 
-#ifdef DEBUG
-	printk("%s: %sing: ", drive->name,
-		(rq_data_dir(rq)==READ) ? "read" : "writ");
-	if (lba)	printk("LBAsect=%lld, ", block);
-	else		printk("CHS=%d/%d/%d, ", cyl, head, sect);
-	printk("sectors=%ld, ", rq->nr_sectors);
-	printk("buffer=%p\n", rq->buffer);
-#endif
+		if (ar->cmd == CFA_WRITE_SECT_WO_ERASE ||
+		    ar->cmd == WIN_WRITE ||
+		    ar->cmd == WIN_WRITE_EXT ||
+		    ar->cmd == WIN_WRITE_VERIFY ||
+		    ar->cmd == WIN_WRITE_BUFFER ||
+		    ar->cmd == WIN_DOWNLOAD_MICROCODE ||
+		    ar->cmd == CFA_WRITE_MULTI_WO_ERASE ||
+		    ar->cmd == WIN_MULTWRITE ||
+		    ar->cmd == WIN_MULTWRITE_EXT) {
+			ide_startstop_t startstop;
+
+			if (ata_status_poll(drive, DATA_READY, drive->bad_wstat,
+						WAIT_DRQ, rq, &startstop)) {
+				printk(KERN_ERR "%s: no DRQ after issuing %s\n",
+						drive->name, drive->mult_count ? "MULTWRITE" : "WRITE");
 
-	rq->special = &args;
+				return startstop;
+			}
 
-	return ata_do_taskfile(drive, &args, rq);
-}
+			/* FIXME: This doesn't make the slightest sense.
+			 * (ks/hs): Fixed Multi Write
+			 */
+			if (!(ar->cmd == CFA_WRITE_MULTI_WO_ERASE ||
+			      ar->cmd == WIN_MULTWRITE ||
+			      ar->cmd == WIN_MULTWRITE_EXT)) {
+				unsigned long flags;
+				char *buf = ide_map_rq(rq, &flags);
 
-/*
- * 268435455  == 137439 MB or 28bit limit
- * 320173056  == 163929 MB or 48bit addressing
- * 1073741822 == 549756 MB or 48bit addressing fake drive
- */
+				/* For Write_sectors we need to stuff the first sector */
+				ata_write(drive, buf, SECTOR_WORDS);
 
-static ide_startstop_t lba48_do_request(struct ata_device *drive, struct request *rq, sector_t block)
-{
-	struct ata_taskfile args;
-	int sectors;
+				rq->current_nr_sectors--;
+				ide_unmap_rq(rq, buf, &flags);
 
-	sectors = rq->nr_sectors;
-	if (sectors == 65536)
-		sectors = 0;
+				return ide_started;
+			} else {
+				int i;
+				int ret;
 
-	memset(&args, 0, sizeof(args));
+				/* Polling wait until the drive is ready.
+				 *
+				 * Stuff the first sector(s) by calling the
+				 * handler driectly therafter.
+				 *
+				 * FIXME: Replace hard-coded 100, what about
+				 * error handling?
+				 */
+
+				for (i = 0; i < 100; ++i) {
+					if (drive_is_ready(drive))
+						break;
+				}
+				if (!drive_is_ready(drive)) {
+					printk(KERN_ERR "DISASTER WAITING TO HAPPEN!\n");
+				}
+				/* FIXME: make this unlocking go away*/
+				spin_unlock_irq(ch->lock);
+				ret =  ar->XXX_handler(drive, rq);
+				spin_lock_irq(ch->lock);
 
-	if (blk_rq_tagged(rq)) {
-		args.taskfile.feature = sectors;
-		args.hobfile.feature = sectors >> 8;
-		args.taskfile.sector_count = rq->tag << 3;
+				return ret;
+			}
+		}
 	} else {
-		args.taskfile.sector_count = sectors;
-		args.hobfile.sector_count = sectors >> 8;
-	}
-
-	args.taskfile.sector_number = block;		/* low lba */
-	args.taskfile.low_cylinder = (block >>= 8);	/* mid lba */
-	args.taskfile.high_cylinder = (block >>= 8);	/* hi  lba */
-	args.taskfile.device_head = drive->select.all;
-
-	args.hobfile.sector_number = (block >>= 8);	/* low lba */
-	args.hobfile.low_cylinder = (block >>= 8);	/* mid lba */
-	args.hobfile.high_cylinder = (block >>= 8);	/* hi  lba */
-	args.hobfile.device_head = drive->select.all;
+		/*
+		 * FIXME: This is a gross hack, need to unify tcq dma proc and
+		 * regular dma proc. It should now be easier.
+		 *
+		 * FIXME: Handle the alternateives by a command type.
+		 */
 
-	args.cmd = get_command(drive, &args, rq_data_dir(rq));
+		if (!drive->using_dma)
+			return ide_started;
 
-#ifdef DEBUG
-	printk("%s: %sing: ", drive->name,
-		(rq_data_dir(rq)==READ) ? "read" : "writ");
-	if (lba)	printk("LBAsect=%lld, ", block);
-	else		printk("CHS=%d/%d/%d, ", cyl, head, sect);
-	printk("sectors=%ld, ", rq->nr_sectors);
-	printk("buffer=%p\n",rq->buffer);
+		/* for dma commands we don't set the handler */
+		if (ar->cmd == WIN_WRITEDMA ||
+		    ar->cmd == WIN_WRITEDMA_EXT ||
+		    ar->cmd == WIN_READDMA ||
+		    ar->cmd == WIN_READDMA_EXT)
+			return udma_init(drive, rq);
+#ifdef CONFIG_BLK_DEV_IDE_TCQ
+		else if (ar->cmd == WIN_WRITEDMA_QUEUED ||
+			 ar->cmd == WIN_WRITEDMA_QUEUED_EXT ||
+			 ar->cmd == WIN_READDMA_QUEUED ||
+			 ar->cmd == WIN_READDMA_QUEUED_EXT)
+			return udma_tcq_init(drive, rq);
 #endif
+		else {
+			printk(KERN_ERR "%s: unknown command %x\n", __FUNCTION__, ar->cmd);
+			return ide_stopped;
+		}
+	}
 
-	rq->special = &args;
-
-	return ata_do_taskfile(drive, &args, rq);
+	return ide_started;
 }
 
 /*
@@ -560,10 +490,13 @@
  */
 static ide_startstop_t idedisk_do_request(struct ata_device *drive, struct request *rq, sector_t block)
 {
+	struct ata_taskfile args;
+	unsigned int sectors;
+
 	/* This issues a special drive command.
 	 */
 	if (rq->flags & REQ_SPECIAL)
-		return ata_do_taskfile(drive, rq->special, rq);
+		return __do_request(drive, rq->special, rq);
 
 	/* FIXME: this check doesn't make sense */
 	if (!(rq->flags & REQ_CMD)) {
@@ -597,15 +530,150 @@
 		}
 	}
 
-	if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing))
-		return lba48_do_request(drive, rq, block);
-	else if (drive->select.b.lba)
-		return lba28_do_request(drive, rq, block);
-	else
-		return chs_do_request(drive, rq, block);
+	memset(&args, 0, sizeof(args));
+	sectors = rq->nr_sectors;
+	/* Dispatch depending up on the drive access method. */
+	if ((drive->id->cfs_enable_2 & 0x0400) && (drive->addressing)) {
+		/* LBA 48 bit */
+		/*
+		 * 268435455  == 137439 MB or 28bit limit
+		 * 320173056  == 163929 MB or 48bit addressing
+		 * 1073741822 == 549756 MB or 48bit addressing fake drive
+		 */
+		if (sectors == 65536)
+			sectors = 0;
+
+		if (blk_rq_tagged(rq)) {
+			args.taskfile.feature = sectors;
+			args.hobfile.feature = sectors >> 8;
+			args.taskfile.sector_count = rq->tag << 3;
+		} else {
+			args.taskfile.sector_count = sectors;
+			args.hobfile.sector_count = sectors >> 8;
+		}
+
+		args.taskfile.sector_number = block;		/* low lba */
+		args.taskfile.low_cylinder = (block >>= 8);	/* mid lba */
+		args.taskfile.high_cylinder = (block >>= 8);	/* hi  lba */
+		args.taskfile.device_head = drive->select.all;
+
+		args.hobfile.sector_number = (block >>= 8);	/* low lba */
+		args.hobfile.low_cylinder = (block >>= 8);	/* mid lba */
+		args.hobfile.high_cylinder = (block >>= 8);	/* hi  lba */
+	} else if (drive->select.b.lba) {
+		/* LBA 28 bit  */
+		if (sectors == 256)
+			sectors = 0;
+
+		if (blk_rq_tagged(rq)) {
+			args.taskfile.feature = sectors;
+			args.taskfile.sector_count = rq->tag << 3;
+		} else
+			args.taskfile.sector_count = sectors;
+
+		args.taskfile.sector_number = block;
+		args.taskfile.low_cylinder = (block >>= 8);
+		args.taskfile.high_cylinder = (block >>= 8);
+		args.taskfile.device_head = ((block >> 8) & 0x0f);
+	} else {
+		/* CHS */
+		unsigned int track	= (block / drive->sect);
+		unsigned int sect	= (block % drive->sect) + 1;
+		unsigned int head	= (track % drive->head);
+		unsigned int cyl	= (track / drive->head);
+
+		if (sectors == 256)
+			sectors = 0;
+
+		if (blk_rq_tagged(rq)) {
+			args.taskfile.feature = sectors;
+			args.taskfile.sector_count = rq->tag << 3;
+		} else
+			args.taskfile.sector_count = sectors;
+
+		args.taskfile.sector_number = sect;
+		args.taskfile.low_cylinder = cyl;
+		args.taskfile.high_cylinder = (cyl>>8);
+		args.taskfile.device_head = head;
+	}
+	args.taskfile.device_head |= drive->select.all;
+
+	/*
+	 * Decode with physical ATA command to use and setup associated data.
+	 */
+
+	if (rq_data_dir(rq) == READ) {
+		args.command_type = IDE_DRIVE_TASK_IN;
+		if (drive->addressing) {
+			if (drive->using_tcq) {
+				args.cmd = WIN_READDMA_QUEUED_EXT;
+			} else if (drive->using_dma) {
+				args.cmd = WIN_READDMA_EXT;
+			} else if (drive->mult_count) {
+				args.XXX_handler = task_mulin_intr;
+				args.cmd = WIN_MULTREAD_EXT;
+			} else {
+				args.XXX_handler = task_in_intr;
+				args.cmd = WIN_READ_EXT;
+			}
+		} else {
+			if (drive->using_tcq) {
+				args.cmd = WIN_READDMA_QUEUED;
+			} else if (drive->using_dma) {
+				args.cmd = WIN_READDMA;
+			} else if (drive->mult_count) {
+				/* FIXME : Shouldn't this be task_mulin_intr?! */
+				args.XXX_handler = task_in_intr;
+				args.cmd = WIN_MULTREAD;
+			} else {
+				args.XXX_handler = task_in_intr;
+				args.cmd = WIN_READ;
+			}
+		}
+	} else {
+		args.command_type = IDE_DRIVE_TASK_RAW_WRITE;
+		if (drive->addressing) {
+			if (drive->using_tcq) {
+				args.cmd = WIN_WRITEDMA_QUEUED_EXT;
+			} else if (drive->using_dma) {
+				args.cmd = WIN_WRITEDMA_EXT;
+			} else if (drive->mult_count) {
+				args.XXX_handler = task_mulout_intr;
+				args.cmd = WIN_MULTWRITE_EXT;
+			} else {
+				args.XXX_handler = task_out_intr;
+				args.cmd = WIN_WRITE_EXT;
+			}
+		} else {
+			if (drive->using_tcq) {
+				args.cmd = WIN_WRITEDMA_QUEUED;
+			} else if (drive->using_dma) {
+				args.cmd = WIN_WRITEDMA;
+			} else if (drive->mult_count) {
+				args.XXX_handler = task_mulout_intr;
+				args.cmd = WIN_MULTWRITE;
+			} else {
+				args.XXX_handler = task_out_intr;
+				args.cmd = WIN_WRITE;
+			}
+		}
+	}
+
+
+#ifdef DEBUG
+	printk("%s: %sing: ", drive->name,
+			(rq_data_dir(rq)==READ) ? "read" : "writ");
+	if (lba)	printk("LBAsect=%lld, ", block);
+	else		printk("CHS=%d/%d/%d, ", cyl, head, sect);
+	printk("sectors=%ld, ", rq->nr_sectors);
+	printk("buffer=%p\n", rq->buffer);
+#endif
+	rq->special = &args;
+
+	return __do_request(drive, &args, rq);
 }
 
-static int idedisk_open(struct inode *inode, struct file *filp, struct ata_device *drive)
+static int idedisk_open(struct inode *inode, struct file *__fp, struct ata_device *drive)
 {
 	MOD_INC_USE_COUNT;
 	if (drive->removable && drive->usage == 1) {
diff -urN linux-2.5.21/drivers/ide/ide-floppy.c linux/drivers/ide/ide-floppy.c
--- linux-2.5.21/drivers/ide/ide-floppy.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/ide-floppy.c	2002-06-14 14:21:59.000000000 +0200
@@ -1102,7 +1102,7 @@
 		udma_enable(drive, 0, 1);
 
 	if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
-		dma_ok = !udma_init(drive, rq);
+		dma_ok = udma_init(drive, rq);
 #endif
 
 	ata_irq_enable(drive, 1);
diff -urN linux-2.5.21/drivers/ide/ide-pmac.c linux/drivers/ide/ide-pmac.c
--- linux-2.5.21/drivers/ide/ide-pmac.c	2002-06-14 12:45:00.000000000 +0200
+++ linux/drivers/ide/ide-pmac.c	2002-06-14 15:03:31.000000000 +0200
@@ -1365,7 +1365,8 @@
 	 */
 	ix = pmac_ide_find(drive);
 	if (ix < 0)
-		return 0;
+		return ide_started;
+
 	dma = pmac_ide[ix].dma_regs;
 	ata4 = (pmac_ide[ix].kind == controller_kl_ata4 ||
 		pmac_ide[ix].kind == controller_kl_ata4_80);
@@ -1373,7 +1374,8 @@
 	out_le32(&dma->control, (RUN << 16) | RUN);
 	/* Make sure it gets to the controller right now */
 	(void)in_le32(&dma->control);
-	return 0;
+
+	return ide_started;
 }
 
 static int pmac_udma_stop(struct ata_device *drive)
@@ -1411,7 +1413,7 @@
 	 */
 	ix = pmac_ide_find(drive);
 	if (ix < 0)
-		return 0;
+		return ide_stopped;
 
 	if (rq_data_dir(rq) == READ)
 		reading = 1;
@@ -1423,7 +1425,7 @@
 		pmac_ide[ix].kind == controller_kl_ata4_80);
 
 	if (!pmac_ide_build_dmatable(drive, rq, ix, !reading))
-		return 1;
+		return ide_stopped;
 	/* Apple adds 60ns to wrDataSetup on reads */
 	if (ata4 && (pmac_ide[ix].timings[unit] & TR_66_UDMA_EN)) {
 		out_le32((unsigned *)(IDE_DATA_REG + IDE_TIMING_CONFIG + _IO_BASE),
@@ -1433,7 +1435,7 @@
 	}
 	drive->waiting_for_dma = 1;
 	if (drive->type != ATA_DISK)
-		return 0;
+		return ide_started;
 
 	ata_set_handler(drive, ide_dma_intr, WAIT_CMD, NULL);
 	if ((rq->flags & REQ_SPECIAL) &&
@@ -1447,7 +1449,9 @@
 		OUT_BYTE(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG);
 	}
 
-	return udma_start(drive, rq);
+	udma_start(drive, rq);
+
+	return ide_started;
 }
 
 /*
diff -urN linux-2.5.21/drivers/ide/ide-tape.c linux/drivers/ide/ide-tape.c
--- linux-2.5.21/drivers/ide/ide-tape.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/ide-tape.c	2002-06-14 14:20:30.000000000 +0200
@@ -2290,7 +2290,7 @@
 		udma_enable(drive, 0, 1);
 	}
 	if (test_bit (PC_DMA_RECOMMENDED, &pc->flags) && drive->using_dma)
-		dma_ok = !udma_init(drive, rq);
+		dma_ok = udma_init(drive, rq);
 #endif
 
 	ata_irq_enable(drive, 1);
diff -urN linux-2.5.21/drivers/ide/ide-taskfile.c linux/drivers/ide/ide-taskfile.c
--- linux-2.5.21/drivers/ide/ide-taskfile.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/ide-taskfile.c	2002-06-14 02:04:27.000000000 +0200
@@ -177,145 +177,6 @@
 }
 
 /*
- * Channel lock should be held on entry.
- */
-ide_startstop_t ata_do_taskfile(struct ata_device *drive,
-		struct ata_taskfile *ar, struct request *rq)
-{
-	struct hd_driveid *id = drive->id;
-
-	/* (ks/hs): Moved to start, do not use for multiple out commands.
-	 * FIXME: why not?! */
-	if (!(ar->cmd == CFA_WRITE_MULTI_WO_ERASE ||
-	      ar->cmd == WIN_MULTWRITE ||
-	      ar->cmd == WIN_MULTWRITE_EXT)) {
-		ata_irq_enable(drive, 1);
-		ata_mask(drive);
-	}
-
-	if ((id->command_set_2 & 0x0400) && (id->cfs_enable_2 & 0x0400) &&
-	    (drive->addressing == 1))
-		ata_out_regfile(drive, &ar->hobfile);
-
-	ata_out_regfile(drive, &ar->taskfile);
-
-	OUT_BYTE((ar->taskfile.device_head & (drive->addressing ? 0xE0 : 0xEF)) | drive->select.all,
-			IDE_SELECT_REG);
-
-	if (ar->handler) {
-		struct ata_channel *ch = drive->channel;
-
-		/* This is apparently supposed to reset the wait timeout for
-		 * the interrupt to accur.
-		 */
-
-		ata_set_handler(drive, ar->handler, WAIT_CMD, NULL);
-		OUT_BYTE(ar->cmd, IDE_COMMAND_REG);
-
-		/* FIXME: Warning check for race between handler and prehandler
-		 * for writing first block of data.  however since we are well
-		 * inside the boundaries of the seek, we should be okay.
-		 *
-		 * FIXME: Replace the switch by using a proper command_type.
-		 */
-
-		if (ar->cmd == CFA_WRITE_SECT_WO_ERASE ||
-		    ar->cmd == WIN_WRITE ||
-		    ar->cmd == WIN_WRITE_EXT ||
-		    ar->cmd == WIN_WRITE_VERIFY ||
-		    ar->cmd == WIN_WRITE_BUFFER ||
-		    ar->cmd == WIN_DOWNLOAD_MICROCODE ||
-		    ar->cmd == CFA_WRITE_MULTI_WO_ERASE ||
-		    ar->cmd == WIN_MULTWRITE ||
-		    ar->cmd == WIN_MULTWRITE_EXT) {
-			ide_startstop_t startstop;
-
-			if (ata_status_poll(drive, DATA_READY, drive->bad_wstat,
-						WAIT_DRQ, rq, &startstop)) {
-				printk(KERN_ERR "%s: no DRQ after issuing %s\n",
-						drive->name, drive->mult_count ? "MULTWRITE" : "WRITE");
-
-				return startstop;
-			}
-
-			/* FIXME: This doesn't make the slightest sense.
-			 * (ks/hs): Fixed Multi Write
-			 */
-			if (!(ar->cmd == CFA_WRITE_MULTI_WO_ERASE ||
-			      ar->cmd == WIN_MULTWRITE ||
-			      ar->cmd == WIN_MULTWRITE_EXT)) {
-				unsigned long flags;
-				char *buf = ide_map_rq(rq, &flags);
-
-				/* For Write_sectors we need to stuff the first sector */
-				ata_write(drive, buf, SECTOR_WORDS);
-
-				rq->current_nr_sectors--;
-				ide_unmap_rq(rq, buf, &flags);
-
-				return ide_started;
-			} else {
-				int i;
-				int ret;
-
-				/* Polling wait until the drive is ready.
-				 *
-				 * Stuff the first sector(s) by calling the
-				 * handler driectly therafter.
-				 *
-				 * FIXME: Replace hard-coded 100, what about
-				 * error handling?
-				 */
-
-				for (i = 0; i < 100; ++i) {
-					if (drive_is_ready(drive))
-						break;
-				}
-				if (!drive_is_ready(drive)) {
-					printk(KERN_ERR "DISASTER WAITING TO HAPPEN!\n");
-				}
-				/* FIXME: make this unlocking go away*/
-				spin_unlock_irq(ch->lock);
-				ret =  ar->handler(drive, rq);
-				spin_lock_irq(ch->lock);
-
-				return ret;
-			}
-		}
-	} else {
-		/*
-		 * FIXME: This is a gross hack, need to unify tcq dma proc and
-		 * regular dma proc. It should now be easier.
-		 *
-		 * FIXME: Handle the alternateives by a command type.
-		 */
-
-		if (!drive->using_dma)
-			return ide_started;
-
-		/* for dma commands we don't set the handler */
-		if (ar->cmd == WIN_WRITEDMA ||
-		    ar->cmd == WIN_WRITEDMA_EXT ||
-		    ar->cmd == WIN_READDMA ||
-		    ar->cmd == WIN_READDMA_EXT)
-			return !udma_init(drive, rq);
-#ifdef CONFIG_BLK_DEV_IDE_TCQ
-		else if (ar->cmd == WIN_WRITEDMA_QUEUED ||
-			 ar->cmd == WIN_WRITEDMA_QUEUED_EXT ||
-			 ar->cmd == WIN_READDMA_QUEUED ||
-			 ar->cmd == WIN_READDMA_QUEUED_EXT)
-			return udma_tcq_taskfile(drive, rq);
-#endif
-		else {
-			printk(KERN_ERR "%s: unknown command %x\n", __FUNCTION__, ar->cmd);
-			return ide_stopped;
-		}
-	}
-
-	return ide_started;
-}
-
-/*
  * This function issues a special IDE device request onto the request queue.
  *
  * If action is ide_wait, then the rq is queued at the end of the request
@@ -436,7 +297,7 @@
 	struct request req;
 
 	ar->command_type = IDE_DRIVE_TASK_NO_DATA;
-	ar->handler = ata_special_intr;
+	ar->XXX_handler = ata_special_intr;
 
 	memset(&req, 0, sizeof(req));
 	req.flags = REQ_SPECIAL;
@@ -449,6 +310,5 @@
 EXPORT_SYMBOL(ide_do_drive_cmd);
 EXPORT_SYMBOL(ata_read);
 EXPORT_SYMBOL(ata_write);
-EXPORT_SYMBOL(ata_do_taskfile);
 EXPORT_SYMBOL(ata_special_intr);
 EXPORT_SYMBOL(ide_raw_taskfile);
diff -urN linux-2.5.21/drivers/ide/ioctl.c linux/drivers/ide/ioctl.c
--- linux-2.5.21/drivers/ide/ioctl.c	2002-06-14 12:45:00.000000000 +0200
+++ linux/drivers/ide/ioctl.c	2002-06-14 02:08:02.000000000 +0200
@@ -54,9 +54,6 @@
 	if (copy_from_user(vals, (void *)arg, 4))
 		return -EFAULT;
 
-	memset(&req, 0, sizeof(req));
-	req.flags = REQ_SPECIAL;
-
 	memset(&args, 0, sizeof(args));
 
 	args.taskfile.feature = vals[2];
@@ -83,10 +80,14 @@
 
 	/* Issue ATA command and wait for completion.
 	 */
-	args.handler = ata_special_intr;
+	args.command_type = IDE_DRIVE_TASK_NO_DATA;
+	args.XXX_handler = ata_special_intr;
 
-	req.buffer = argbuf + 4;
+	memset(&req, 0, sizeof(req));
+	req.flags = REQ_SPECIAL;
 	req.special = &args;
+
+	req.buffer = argbuf + 4;
 	err = ide_do_drive_cmd(drive, &req, ide_wait);
 
 	argbuf[0] = drive->status;
diff -urN linux-2.5.21/drivers/ide/ns87415.c linux/drivers/ide/ns87415.c
--- linux-2.5.21/drivers/ide/ns87415.c	2002-06-09 07:28:49.000000000 +0200
+++ linux/drivers/ide/ns87415.c	2002-06-14 14:27:42.000000000 +0200
@@ -105,12 +105,12 @@
 {
 	ns87415_prepare_drive(drive, 1);	/* select DMA xfer */
 
-	if (!udma_pci_init(drive, rq))		/* use standard DMA stuff */
-		return 0;
+	if (udma_pci_init(drive, rq))		/* use standard DMA stuff */
+		return ide_started;
 
 	ns87415_prepare_drive(drive, 0);	/* DMA failed: select PIO xfer */
 
-	return 1;
+	return ide_stopped;
 }
 
 static int ns87415_udma_setup(struct ata_device *drive)
diff -urN linux-2.5.21/drivers/ide/pcidma.c linux/drivers/ide/pcidma.c
--- linux-2.5.21/drivers/ide/pcidma.c	2002-06-14 12:45:03.000000000 +0200
+++ linux/drivers/ide/pcidma.c	2002-06-14 15:12:30.000000000 +0200
@@ -420,13 +420,11 @@
 	struct ata_channel *ch = drive->channel;
 	unsigned long dma_base = ch->dma_base;
 
-	/* Note that this is done *after* the cmd has
-	 * been issued to the drive, as per the BM-IDE spec.
-	 * The Promise Ultra33 doesn't work correctly when
-	 * we do this part before issuing the drive cmd.
+	/* Note that this is done *after* the cmd has been issued to the drive,
+	 * as per the BM-IDE spec.  The Promise Ultra33 doesn't work correctly
+	 * when we do this part before issuing the drive cmd.
 	 */
-	outb(inb(dma_base)|1, dma_base);		/* start DMA */
-	return 0;
+	outb(inb(dma_base) | 1, dma_base);	/* start DMA */
 }
 
 /*
@@ -545,11 +543,11 @@
 	u8 cmd;
 
 	if (ata_start_dma(drive, rq))
-		return 1;
+		return ide_stopped;
 
 	/* No DMA transfers on ATAPI devices. */
 	if (drive->type != ATA_DISK)
-		return 0;
+		return ide_started;
 
 	if (rq_data_dir(rq) == READ)
 		cmd = 0x08;
@@ -562,7 +560,9 @@
 	else
 		outb(cmd ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG);
 
-	return udma_start(drive, rq);
+	udma_start(drive, rq);
+
+	return ide_started;
 }
 
 EXPORT_SYMBOL(ide_dma_intr);
diff -urN linux-2.5.21/drivers/ide/pdc202xx.c linux/drivers/ide/pdc202xx.c
--- linux-2.5.21/drivers/ide/pdc202xx.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/pdc202xx.c	2002-06-14 15:13:37.000000000 +0200
@@ -573,8 +573,6 @@
 	 */
 
 	outb(inb(ch->dma_base) | 1, ch->dma_base); /* start DMA */
-
-	return 0;
 }
 
 int pdc202xx_udma_stop(struct ata_device *drive)
diff -urN linux-2.5.21/drivers/ide/pdc4030.c linux/drivers/ide/pdc4030.c
--- linux-2.5.21/drivers/ide/pdc4030.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/ide/pdc4030.c	2002-06-14 02:09:46.000000000 +0200
@@ -784,7 +784,7 @@
 	args.taskfile.high_cylinder	= (block>>=8);
 	args.taskfile.device_head	= ((block>>8)&0x0f)|drive->select.all;
 	args.cmd = (rq_data_dir(rq) == READ) ? PROMISE_READ : PROMISE_WRITE;
-	args.handler	= NULL;
+	args.XXX_handler	= NULL;
 	rq->special	= &args;
 
 	return do_pdc4030_io(drive, &args, rq);
diff -urN linux-2.5.21/drivers/ide/sl82c105.c linux/drivers/ide/sl82c105.c
--- linux-2.5.21/drivers/ide/sl82c105.c	2002-06-09 07:28:39.000000000 +0200
+++ linux/drivers/ide/sl82c105.c	2002-06-14 14:22:23.000000000 +0200
@@ -209,6 +209,7 @@
 static int sl82c105_dma_init(struct ata_device *drive, struct request *rq)
 {
 	sl82c105_reset_host(drive->channel->pci_dev);
+
 	return udma_pci_init(drive, rq);
 }
 
diff -urN linux-2.5.21/drivers/ide/tcq.c linux/drivers/ide/tcq.c
--- linux-2.5.21/drivers/ide/tcq.c	2002-06-14 12:45:03.000000000 +0200
+++ linux/drivers/ide/tcq.c	2002-06-14 15:04:59.000000000 +0200
@@ -84,7 +84,7 @@
 {
 	struct ata_channel *ch = drive->channel;
 	request_queue_t *q = &drive->queue;
-	struct ata_taskfile *args;
+	struct ata_taskfile *ar;
 	struct request *rq;
 	unsigned long flags;
 
@@ -110,8 +110,8 @@
 	 * executed before any new commands are started. issue a NOP
 	 * to clear internal queue on drive.
 	 */
-	args = kmalloc(sizeof(*args), GFP_ATOMIC);
-	if (!args) {
+	ar = kmalloc(sizeof(*ar), GFP_ATOMIC);
+	if (!ar) {
 		printk(KERN_ERR "ATA: %s: failed to issue NOP\n", drive->name);
 		goto out;
 	}
@@ -126,10 +126,10 @@
 	 */
 	BUG_ON(!rq);
 
-	rq->special = args;
-	args->cmd = WIN_NOP;
-	args->handler = tcq_nop_handler;
-	args->command_type = IDE_DRIVE_TASK_NO_DATA;
+	rq->special = ar;
+	ar->cmd = WIN_NOP;
+	ar->XXX_handler = tcq_nop_handler;
+	ar->command_type = IDE_DRIVE_TASK_NO_DATA;
 
 	rq->rq_dev = mk_kdev(drive->channel->major, (drive->select.b.unit)<<PARTN_BITS);
 	_elv_add_request(q, rq, 0, 0);
@@ -539,10 +539,9 @@
 		return ide_stopped;
 
 	set_irq(drive, ide_dmaq_intr);
-	if (!udma_start(drive, rq))
-		return ide_started;
+	udma_start(drive, rq);
 
-	return ide_stopped;
+	return ide_started;
 }
 
 /*
@@ -550,7 +549,7 @@
  *
  * Channel lock should be held.
  */
-ide_startstop_t udma_tcq_taskfile(struct ata_device *drive, struct request *rq)
+ide_startstop_t udma_tcq_init(struct ata_device *drive, struct request *rq)
 {
 	u8 stat;
 	u8 feat;
diff -urN linux-2.5.21/drivers/ide/trm290.c linux/drivers/ide/trm290.c
--- linux-2.5.21/drivers/ide/trm290.c	2002-06-14 12:45:00.000000000 +0200
+++ linux/drivers/ide/trm290.c	2002-06-14 15:12:48.000000000 +0200
@@ -179,7 +179,6 @@
 static int trm290_udma_start(struct ata_device *drive, struct request *__rq)
 {
 	/* Nothing to be done here. */
-	return 0;
 }
 
 static int trm290_udma_stop(struct ata_device *drive)
@@ -210,7 +209,7 @@
 #ifdef TRM290_NO_DMA_WRITES
 		trm290_prepare_drive(drive, 0);	/* select PIO xfer */
 
-		return 1;
+		return ide_stopped;
 #endif
 	} else {
 		reading = 2;
@@ -219,7 +218,7 @@
 
 	if (!(count = udma_new_table(drive, rq))) {
 		trm290_prepare_drive(drive, 0);	/* select PIO xfer */
-		return 1;	/* try PIO instead of DMA */
+		return ide_stopped;	/* try PIO instead of DMA */
 	}
 
 	trm290_prepare_drive(drive, 1);	/* select DMA xfer */
@@ -233,7 +232,7 @@
 		outb(reading ? WIN_READDMA : WIN_WRITEDMA, IDE_COMMAND_REG);
 	}
 
-	return 0;
+	return ide_started;
 }
 
 static int trm290_udma_irq_status(struct ata_device *drive)
diff -urN linux-2.5.21/drivers/scsi/ide-scsi.c linux/drivers/scsi/ide-scsi.c
--- linux-2.5.21/drivers/scsi/ide-scsi.c	2002-06-14 12:45:13.000000000 +0200
+++ linux/drivers/scsi/ide-scsi.c	2002-06-14 14:18:13.000000000 +0200
@@ -444,7 +444,7 @@
 	bcount = min(pc->request_transfer, 63 * 1024);		/* Request to transfer the entire buffer at once */
 
 	if (drive->using_dma && rq->bio)
-		dma_ok = !udma_init(drive, rq);
+		dma_ok = udma_init(drive, rq);
 
 	ata_select(drive, 10);
 	ata_irq_enable(drive, 1);
diff -urN linux-2.5.21/include/linux/ide.h linux/include/linux/ide.h
--- linux-2.5.21/include/linux/ide.h	2002-06-14 12:45:13.000000000 +0200
+++ linux/include/linux/ide.h	2002-06-14 15:14:20.000000000 +0200
@@ -459,9 +459,9 @@
 	int (*udma_setup)(struct ata_device *);
 
 	void (*udma_enable)(struct ata_device *, int, int);
-	int (*udma_start) (struct ata_device *, struct request *rq);
+	void (*udma_start) (struct ata_device *, struct request *);
 	int (*udma_stop) (struct ata_device *);
-	int (*udma_init) (struct ata_device *, struct request *rq);
+	int (*udma_init) (struct ata_device *, struct request *);
 	int (*udma_irq_status) (struct ata_device *);
 	void (*udma_timeout) (struct ata_device *);
 	void (*udma_irq_lost) (struct ata_device *);
@@ -651,15 +651,12 @@
 	struct hd_drive_task_hdr  hobfile;
 	u8 cmd;					/* actual ATA command */
 	int command_type;
-	ide_startstop_t (*handler)(struct ata_device *, struct request *);
+	ide_startstop_t (*XXX_handler)(struct ata_device *, struct request *);
 };
 
 extern void ata_read(struct ata_device *, void *, unsigned int);
 extern void ata_write(struct ata_device *, void *, unsigned int);
 
-extern ide_startstop_t ata_do_taskfile(struct ata_device *,
-	struct ata_taskfile *, struct request *);
-
 /*
  * Special Flagged Register Validation Caller
  */
@@ -751,9 +748,9 @@
 	drive->channel->udma_enable(drive, on, verbose);
 }
 
-static inline int udma_start(struct ata_device *drive, struct request *rq)
+static inline void udma_start(struct ata_device *drive, struct request *rq)
 {
-	return drive->channel->udma_start(drive, rq);
+	drive->channel->udma_start(drive, rq);
 }
 
 static inline int udma_stop(struct ata_device *drive)
@@ -764,7 +761,7 @@
 /*
  * Initiate actual DMA data transfer. The direction is encoded in the request.
  */
-static inline int udma_init(struct ata_device *drive, struct request *rq)
+static inline ide_startstop_t udma_init(struct ata_device *drive, struct request *rq)
 {
 	return drive->channel->udma_init(drive, rq);
 }
@@ -802,7 +799,7 @@
 extern int udma_black_list(struct ata_device *);
 extern int udma_white_list(struct ata_device *);
 
-extern ide_startstop_t udma_tcq_taskfile(struct ata_device *, struct request *);
+extern ide_startstop_t udma_tcq_init(struct ata_device *, struct request *);
 extern int udma_tcq_enable(struct ata_device *, int);
 
 extern ide_startstop_t ide_dma_intr(struct ata_device *, struct request *);

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 14:02 ` [PATCH] 2.5.21 IDE 91 Martin Dalecki
@ 2002-06-14 15:17   ` Jens Axboe
  2002-06-14 15:42     ` John Weber
                       ` (4 more replies)
  0 siblings, 5 replies; 36+ messages in thread
From: Jens Axboe @ 2002-06-14 15:17 UTC (permalink / raw)
  To: Martin Dalecki; +Cc: Linus Torvalds, Kernel Mailing List

On Fri, Jun 14 2002, Martin Dalecki wrote:
> Thu Jun 13 22:59:54 CEST 2002 ide-clean-91
> 
> - Realize that the only place where ata_do_taskfile gets used is ide-disk.c
>   move it and its "friends' over there.

Ehm, isn't that a bit odd? The typical "I'm not looking at the
interface, if only one person is currently using it then heck lets move
it" Martin approach (refer to prep_rq_fn cdb builder as well)

The above is just a minor thing, the reason I'm mailing is really:

- current 2.5 bk deadlocks reading partition info off disk. Again a
  locking problem I suppose, to be honest I just got very tired when
  seeing it happen and didn't want to spend tim looking into it.

I thought IDE locking couldn't get worse than 2.4, but I think you are
well into doing just that. What exactly are you plans with the channel
locks? Right now, to me, it seems you are extending the use of them to
the point where they would be used to serialize the entire request
operation start? Heck, ide-cd is even holding the channel lock when
outputting the cdb now.

- ata_end_request(). why didn't you just remove the last argument to
  __ata_end_request() instead just changing the comment saying why we
  pass nr_secs == 0 in from some sites?

- what's the reasoning behind moving the active request into the
  ata_device?! we are serializing requests for ata_device's on the
  ata_channel anyways, which is why it made sense to have the active
  request there.

And finally a small plea for more testing. Do you even test before
blindly sending patches off to Linus?! Sometimes just watching how
quickly these big patches appears makes it impossible that they have
gotten any kind of testing other than the 'hey it compiles', which I
think it just way too little for something that could possible screw
peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
currently. The success ratio of posted over working patches is too big.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 15:17   ` Jens Axboe
@ 2002-06-14 15:42     ` John Weber
  2002-06-14 15:43     ` Dave Jones
                       ` (3 subsequent siblings)
  4 siblings, 0 replies; 36+ messages in thread
From: John Weber @ 2002-06-14 15:42 UTC (permalink / raw)
  To: Jens Axboe, Martin Dalecki; +Cc: Kernel Mailing List

  > And finally a small plea for more testing. Do you even test before
> blindly sending patches off to Linus?! Sometimes just watching how
> quickly these big patches appears makes it impossible that they have
> gotten any kind of testing other than the 'hey it compiles', which I
> think it just way too little for something that could possible screw
> peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
> currently. The success ratio of posted over working patches is too big.

I run all all of Martin's patches on my machine, and I haven't run into
any catastrophic problems (by this I mean problems that cause data loss 
though I realize that this may not be much of a standard).

I rather like the fact that Martin releases early and often... it 
usually means that I get a fix right away. I like the "release early and 
often" approach in a development kernel branch since I do not believe 
that releasing less often has any correlation to the stability of the 
product released (look at Windows :).

Martin or Jens, are there any test suites that you would like me to run?

I am currently too stupid to be able to aid in the development directly 
(lord knows i'm trying to get up to speed), but, in the meantime, I can 
test the crap out of a kernel :).

  -o)  J o h n   W e b e r
  /\\ john.weber@linuxhq.com
_\/v http://www.linuxhq.com/people/weber/


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 15:17   ` Jens Axboe
  2002-06-14 15:42     ` John Weber
@ 2002-06-14 15:43     ` Dave Jones
  2002-06-14 16:06       ` Bartlomiej Zolnierkiewicz
  2002-06-14 17:56       ` Linus Torvalds
  2002-06-14 15:56     ` Benjamin LaHaise
                       ` (2 subsequent siblings)
  4 siblings, 2 replies; 36+ messages in thread
From: Dave Jones @ 2002-06-14 15:43 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Martin Dalecki, Linus Torvalds, Kernel Mailing List

On Fri, Jun 14, 2002 at 05:17:03PM +0200, Jens Axboe wrote:

 > And finally a small plea for more testing. Do you even test before
 > blindly sending patches off to Linus?! Sometimes just watching how
 > quickly these big patches appears makes it impossible that they have
 > gotten any kind of testing other than the 'hey it compiles', which I
 > think it just way too little for something that could possible screw
 > peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
 > currently. The success ratio of posted over working patches is too big.

Ditto. As we rapidly approach the 100th incarnation of Martin's IDE
patches, there are still cases where we die within seconds of booting
on some old PIO-only boxes for eg.  There seems to be far too much
"lets rip this out as it doesn't do much anyway" rather than fixing
known problems.

When the IDE carnage first started back circa 2.5.3, I had contemplated
not merging *any* of the IDE patches, just so that people who want to
work on other areas could have something solid to build upon.
I regret not following through on that instinct.

        Dave.

-- 
| Dave Jones.        http://www.codemonkey.org.uk
| SuSE Labs

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 15:17   ` Jens Axboe
  2002-06-14 15:42     ` John Weber
  2002-06-14 15:43     ` Dave Jones
@ 2002-06-14 15:56     ` Benjamin LaHaise
  2002-06-14 16:04       ` Dave Jones
  2002-06-14 16:09       ` Bartlomiej Zolnierkiewicz
  2002-06-14 16:15     ` Martin Dalecki
  2002-06-14 16:43     ` Linus Torvalds
  4 siblings, 2 replies; 36+ messages in thread
From: Benjamin LaHaise @ 2002-06-14 15:56 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Martin Dalecki, Linus Torvalds, Kernel Mailing List

On Fri, Jun 14, 2002 at 05:17:03PM +0200, Jens Axboe wrote:
> And finally a small plea for more testing. Do you even test before
> blindly sending patches off to Linus?! Sometimes just watching how
> quickly these big patches appears makes it impossible that they have
> gotten any kind of testing other than the 'hey it compiles', which I
> think it just way too little for something that could possible screw
> peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
> currently. The success ratio of posted over working patches is too big.

Add my voice to these concerns.  At the very least the code should have 
been moved into a second tree to allow people to work with the old stable 
driver as needed.

		-ben
-- 
"You will be reincarnated as a toad; and you will be much happier."

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 15:56     ` Benjamin LaHaise
@ 2002-06-14 16:04       ` Dave Jones
  2002-06-14 17:23         ` Martin Dalecki
  2002-06-14 16:09       ` Bartlomiej Zolnierkiewicz
  1 sibling, 1 reply; 36+ messages in thread
From: Dave Jones @ 2002-06-14 16:04 UTC (permalink / raw)
  To: Benjamin LaHaise
  Cc: Jens Axboe, Martin Dalecki, Linus Torvalds, Kernel Mailing List

On Fri, Jun 14, 2002 at 11:56:34AM -0400, Benjamin LaHaise wrote:

 > Add my voice to these concerns.  At the very least the code should have 
 > been moved into a second tree to allow people to work with the old stable 
 > driver as needed.

*nod*, with periodic known-good _tested_ bits getting merged to
mainline, to avoid the need for an IDE merge flag day as has been
the norm in the past.

        Dave
-- 
| Dave Jones.        http://www.codemonkey.org.uk
| SuSE Labs

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 15:43     ` Dave Jones
@ 2002-06-14 16:06       ` Bartlomiej Zolnierkiewicz
  2002-06-14 16:33         ` Martin Dalecki
  2002-06-14 17:56       ` Linus Torvalds
  1 sibling, 1 reply; 36+ messages in thread
From: Bartlomiej Zolnierkiewicz @ 2002-06-14 16:06 UTC (permalink / raw)
  To: Dave Jones
  Cc: Jens Axboe, Martin Dalecki, Linus Torvalds, Kernel Mailing List


Just my 0.02 plns...

On Fri, 14 Jun 2002, Dave Jones wrote:

> On Fri, Jun 14, 2002 at 05:17:03PM +0200, Jens Axboe wrote:
>
>  > And finally a small plea for more testing. Do you even test before
>  > blindly sending patches off to Linus?! Sometimes just watching how
>  > quickly these big patches appears makes it impossible that they have
>  > gotten any kind of testing other than the 'hey it compiles', which I
>  > think it just way too little for something that could possible screw
>  > peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
>  > currently. The success ratio of posted over working patches is too big.
>

I review every single patch from Martin and they are usually ok,
but it is generally good habbit to wait for next patch before running
previous one (just to see if there was any bugs in previous one)...

> Ditto. As we rapidly approach the 100th incarnation of Martin's IDE
> patches, there are still cases where we die within seconds of booting
> on some old PIO-only boxes for eg.  There seems to be far too much
> "lets rip this out as it doesn't do much anyway" rather than fixing
> known problems.

I will try to fix them one by one soon...

>
> When the IDE carnage first started back circa 2.5.3, I had contemplated
> not merging *any* of the IDE patches, just so that people who want to
> work on other areas could have something solid to build upon.
> I regret not following through on that instinct.
>

Your mistake, Dave ;)

>         Dave.
>
> --
> | Dave Jones.        http://www.codemonkey.org.uk
> | SuSE Labs
> -

--
Bartlomiej


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 15:56     ` Benjamin LaHaise
  2002-06-14 16:04       ` Dave Jones
@ 2002-06-14 16:09       ` Bartlomiej Zolnierkiewicz
  1 sibling, 0 replies; 36+ messages in thread
From: Bartlomiej Zolnierkiewicz @ 2002-06-14 16:09 UTC (permalink / raw)
  To: Benjamin LaHaise
  Cc: Jens Axboe, Martin Dalecki, Linus Torvalds, Kernel Mailing List


You can just copy old drives/ide/ + include/linux/[ide.h, hdreg.h]
+ asm/ide.h and it should compile/work?...

On Fri, 14 Jun 2002, Benjamin LaHaise wrote:

> On Fri, Jun 14, 2002 at 05:17:03PM +0200, Jens Axboe wrote:
> > And finally a small plea for more testing. Do you even test before
> > blindly sending patches off to Linus?! Sometimes just watching how
> > quickly these big patches appears makes it impossible that they have
> > gotten any kind of testing other than the 'hey it compiles', which I
> > think it just way too little for something that could possible screw
> > peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
> > currently. The success ratio of posted over working patches is too big.
>
> Add my voice to these concerns.  At the very least the code should have
> been moved into a second tree to allow people to work with the old stable
> driver as needed.
>
> 		-ben
> --
> "You will be reincarnated as a toad; and you will be much happier."
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
>


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 15:17   ` Jens Axboe
                       ` (2 preceding siblings ...)
  2002-06-14 15:56     ` Benjamin LaHaise
@ 2002-06-14 16:15     ` Martin Dalecki
  2002-06-15  8:15       ` Jens Axboe
  2002-06-14 16:43     ` Linus Torvalds
  4 siblings, 1 reply; 36+ messages in thread
From: Martin Dalecki @ 2002-06-14 16:15 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Linus Torvalds, Kernel Mailing List

Użytkownik Jens Axboe napisał:
> On Fri, Jun 14 2002, Martin Dalecki wrote:
> 
>>Thu Jun 13 22:59:54 CEST 2002 ide-clean-91
>>
>>- Realize that the only place where ata_do_taskfile gets used is ide-disk.c
>>  move it and its "friends' over there.
> 
> 
> Ehm, isn't that a bit odd? The typical "I'm not looking at the
> interface, if only one person is currently using it then heck lets move
> it" Martin approach (refer to prep_rq_fn cdb builder as well)

Yes this is the usual Marcin aproach. The fscking best road to bloat
is providing interfaces "on the heap" and "just in case", which
then nobody sane uses. Wen one discovers later that they are inadequate,
we try to support them until the end of days becouse
some silly python based incompetently written setup application
turns out to love to having you know what with.

And since application level programming expierence shows me that in 99%
of the cases where interfaces are designed in front of beeing used
they turn out to be inadequate I don't do it.

Won't to see some silly things like the situation described above?
Please just count the number of ioctl for the /dev/random.
90% of them qualify as "debuggin during developement" or are
working against the purpose of the thing. Just one example.
Once another even more striking is the whole /proc/ mess.

> The above is just a minor thing, the reason I'm mailing is really:
> 
> - current 2.5 bk deadlocks reading partition info off disk. Again a
>   locking problem I suppose, to be honest I just got very tired when
>   seeing it happen and didn't want to spend tim looking into it.

2.5.21 + the patches I did doesn't. Likely it's the driverfs?

> I thought IDE locking couldn't get worse than 2.4, but I think you are
> well into doing just that. What exactly are you plans with the channel
> locks? Right now, to me, it seems you are extending the use of them to
> the point where they would be used to serialize the entire request
> operation start? Heck, ide-cd is even holding the channel lock when
> outputting the cdb now.

After extracting out 80% of the host controller register file
access one has to realize that in reality we where releasing the lock
just to regain it immediately. Or alternatively accessing them
without any lock protection at all. (Same goes for BIO ummer layer
memmbers.) This is why they are pushed up. It's just avoiding the
"windows" between unlock and immediate relock and making the real
behaviour more "obvious". You have just realized this.

2.4 prevents the locking problems basically by georgeously
disabling IRQs. Too fine grained locking is a futile exercise.
Unless I see the time spent in system state during concurrent disk
access going really up (it doesn't right now), I don't see any thing
bad in making the locking more coarse. Locks don't came for free and
having fine grained locking isn't justifying itself.

Another "usual Marcin approach" - don't optimze for the sake of it.
See futile unlikely() tagging and inlining in tcq.c for example.
I don't do somethig like that. I have just written too much
numerical code which was really time constrained to do something
like this before looking at benchmarks.
Really constrained means having a program running 7 or "just"
5 *days*. This can make a difference, a difference in hard real
money on the range of multiple kEUR!

Finally - unless there appears some aother way to block access to
busy devices on the generic block layer I do it the only way
we have right now - spinlocks. (... looking forward to working
queue pugging and the work done by Adam richter).
Unless there is a sane way to signal partial completion - we will
be doing it at once. Unless we have a sane async io infrastructure
most of the above will be likely not solved anyway.

> - ata_end_request(). why didn't you just remove the last argument to
>   __ata_end_request() instead just changing the comment saying why we
>   pass nr_secs == 0 in from some sites?

One step after another. Watch for hard_xxx members from struch request
to see why I hesitated please.

> - what's the reasoning behind moving the active request into the
>   ata_device?! we are serializing requests for ata_device's on the
>   ata_channel anyways, which is why it made sense to have the active
>   request there.

Becouse it is going to go away altogether. We need it there
just only for the purpose of the default ide_timer_expiry function, which
is subject to go away since a long time. And finally becouse
it doesn't hurt.

Further on I refer you to the discussion we had (or was it Linus?)
once about the fact that attaching physical properties of the
device to the request queue and replicating those parameters in
every single request struct, is well ... "unpleasant" on behalf of the
upper layers. Loop devices expose the same problem.
Once again just grepping for hard_ memebers of the struct
request makes it obvious.

Somce people say that using the gratest common denominator in
the case of the loop devices is the  solution,
but I think that it's rather a work around.

> And finally a small plea for more testing. Do you even test before
> blindly sending patches off to Linus?! Sometimes just watching how
> quickly these big patches appears makes it impossible that they have
> gotten any kind of testing other than the 'hey it compiles', which I
> think it just way too little for something that could possible screw
> peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
> currently. The success ratio of posted over working patches is too big.

Fact is: many of the patches are just big becouse they contain
host chip handling cleanups done by others, and becouse
we have just too many different drivers for the same purpose:
ATAPI packet command devices. Which I'm more and more tempted
to scarp... in favour of just ide-scsi.c. But that's another
story. (Adam J. Richter is givng me constant preassure to do just that and I 
start really tending to admitt that he is just right.)

As of testing. Well at least I can assure you that I'm eating my dogfood,
since I run constantly on the patches. (Heck even the time I write this.)
But I don't use kernels from the BK repository at all.
In fact I just don't use BK at all and I don't intend too.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-14 16:29 Hron, Randall
  2002-06-14 23:32 ` Andrew Morton
  0 siblings, 1 reply; 36+ messages in thread
From: Hron, Randall @ 2002-06-14 16:29 UTC (permalink / raw)
  To: 'john.weber@linuxhq.com'; +Cc: 'linux-kernel@vger.kernel.org'

> (lord knows i'm trying to get up to speed), but, in the meantime, I can
> test the crap out of a kernel :).

tiobench is having trouble completing in kernels >= 2.5.19 on my K6-2
384 mb ram, IDE test system.  The parameters I'm using are:

tiobench.pl --size 2048 --numruns 3 --threads 1 --threads 32 --threads 64
--threads 128

--size depends on ram and disk space.

Early in 2.5, dbench 192 would exercise a bug or two.
(requires about 5GB of disk space)

Linux Test Project's runalltests.sh has occasionally triggered a bug.

2.5 took a drop in dbench throughput recently.

dbench ext2 128 processes       Average         High            Low(MB/sec)
2.5.19                           18.60           21.69           14.58
2.5.20                           12.89           13.15           12.79
2.5.21                           12.67           12.94           12.51

If you want to benchmark some stuff while you're stress testing,
http://home.earthlink.net/~rwhron/kernel/index.html might be a
starting point for ideas.




^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 16:06       ` Bartlomiej Zolnierkiewicz
@ 2002-06-14 16:33         ` Martin Dalecki
  0 siblings, 0 replies; 36+ messages in thread
From: Martin Dalecki @ 2002-06-14 16:33 UTC (permalink / raw)
  To: Bartlomiej Zolnierkiewicz
  Cc: Dave Jones, Jens Axboe, Linus Torvalds, Kernel Mailing List

Użytkownik Bartlomiej Zolnierkiewicz napisał:
> Just my 0.02 plns...
> 
> On Fri, 14 Jun 2002, Dave Jones wrote:
> 
> 
>>On Fri, Jun 14, 2002 at 05:17:03PM +0200, Jens Axboe wrote:
>>
>> > And finally a small plea for more testing. Do you even test before
>> > blindly sending patches off to Linus?! Sometimes just watching how
>> > quickly these big patches appears makes it impossible that they have
>> > gotten any kind of testing other than the 'hey it compiles', which I
>> > think it just way too little for something that could possible screw
>> > peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
>> > currently. The success ratio of posted over working patches is too big.
> 
> I review every single patch from Martin and they are usually ok,
> but it is generally good habbit to wait for next patch before running
> previous one (just to see if there was any bugs in previous one)...

And I'm gald you do. You know that I admit if I screw
things up even in my change logs. Finally I'm sometimes just
the shield for screwups done by others. (No problem I can
take the heat :-).

>>Ditto. As we rapidly approach the 100th incarnation of Martin's IDE
>>patches, there are still cases where we die within seconds of booting
>>on some old PIO-only boxes for eg.  There seems to be far too much
>>"lets rip this out as it doesn't do much anyway" rather than fixing
>>known problems.
> 
> 
> I will try to fix them one by one soon...

Many people just do not realize that I don't hunt for the
easy fruits. I tend to solve fundamental things first before
getting down. (See for example the TCQ in and out
discussion I had with Jens - it turned out to be much
better to do the infrastructure thing at same time instead of
hacking up on the existing thing.)

Down to the point - there was *no way in hell* to
solve the PIO problems util:

1. The basic datastructures we have now where there.

2. The locking issues get resolved and take place on a sane
level.

3. The number of possible code flows got sanitized.

Heck, contrary to the habbits of many others I'm constantly
trying to explain even the reasoning behing the changes I do.
Not just what gets changes.

But fortunately right now we are indeed "getting just there" as
the fact shows that the last series of the IDE patches is moving
more and more down to the actual transport layer. And since
the overall structure starts to become more and more
clean even more and more people start to work together with
me on not just some very well separated areas like for
example host chip timing but the overall infrastructure as well.
(Adam, Bartek - I'm glad you are around :-).

Everybody out there proclaiming that this could have
all been solved far earlier by some kind of "easy" fix had just
enough of time to prove me worng on this issue.

Unless I get complains from people who work with me together
on the code itself I indeed tend to follow my own agenda
of prio.

It's that simple.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 15:17   ` Jens Axboe
                       ` (3 preceding siblings ...)
  2002-06-14 16:15     ` Martin Dalecki
@ 2002-06-14 16:43     ` Linus Torvalds
  2002-06-14 16:47       ` Martin Dalecki
  2002-06-15  8:19       ` Jens Axboe
  4 siblings, 2 replies; 36+ messages in thread
From: Linus Torvalds @ 2002-06-14 16:43 UTC (permalink / raw)
  To: Jens Axboe; +Cc: Martin Dalecki, Kernel Mailing List



On Fri, 14 Jun 2002, Jens Axboe wrote:
>
> - current 2.5 bk deadlocks reading partition info off disk. Again a
>   locking problem I suppose, to be honest I just got very tired when
>   seeing it happen and didn't want to spend tim looking into it.

Hmm. There's a bug in "balance_irq()" if you are trying to run a SMP
kernel on an UP machine right now, and it _might_ be that the lockup has
nothing to do with the IDE layer, but simple with the first PCI interrupt
(as opposed to local timer interrupt) coming in.

One-liner from Zwane Mwaikambo (cut-and-paste, so space is wrong, please
apply by hand).

--- linux-2.5.19/arch/i386/kernel/io_apic.c.orig        Fri Jun 14 17:43:20 2002
+++ linux-2.5.19/arch/i386/kernel/io_apic.c     Fri Jun 14 17:42:23 2002
@@ -251,7 +251,7 @@
        irq_balance_t *entry = irq_balance + irq;
        unsigned long now = jiffies;

-       if (unlikely(entry->timestamp != now)) {
+       if ((entry->timestamp != now) && (smp_num_cpus > 1)) {
                unsigned long allowed_mask;
                int random_number;

I don't know. Might be the IDE code too, of course.

		Linus


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 16:43     ` Linus Torvalds
@ 2002-06-14 16:47       ` Martin Dalecki
  2002-06-15  8:19       ` Jens Axboe
  1 sibling, 0 replies; 36+ messages in thread
From: Martin Dalecki @ 2002-06-14 16:47 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Jens Axboe, Kernel Mailing List

Użytkownik Linus Torvalds napisał:
> 
> On Fri, 14 Jun 2002, Jens Axboe wrote:
> 
>>- current 2.5 bk deadlocks reading partition info off disk. Again a
>>  locking problem I suppose, to be honest I just got very tired when
>>  seeing it happen and didn't want to spend tim looking into it.
> 
> 
> Hmm. There's a bug in "balance_irq()" if you are trying to run a SMP
> kernel on an UP machine right now, and it _might_ be that the lockup has
> nothing to do with the IDE layer, but simple with the first PCI interrupt
> (as opposed to local timer interrupt) coming in.

...

> I don't know. Might be the IDE code too, of course.

Just to complete the picture: I'm running SMP kernels on
UP for the sake of spinlock debugging and compilatoin coverage too.
But as I have already stated - I run my own patches on
top of the last offical release instead of BK contents.



^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-14 17:09 Andries.Brouwer
  2002-06-14 17:15 ` Martin Dalecki
  0 siblings, 1 reply; 36+ messages in thread
From: Andries.Brouwer @ 2002-06-14 17:09 UTC (permalink / raw)
  To: axboe, dalecki; +Cc: linux-kernel, torvalds

> Frankly, _I'm_ too scared to run 2.5 IDE currently.

Backups, that is what you need. Or a scratch machine.

This is what vanilla 2.5.21 can do to your filesystem
(after a reboot and a e2fsck -a):

% ls /lost+found
#10416
#104719
#104724
#10537
#10540
#10547
#10548
#10549
#10550
#10551
#106768
#108545
#108550
#108576
...
(thousands and thousands of files - not lost, only their
names suffered a bit...)

But, to be fair, only a small part of the damage is due
to the kernel. Afterwards, e2fsck made things much worse.

Andries


(Vanilla 2.5.21 does not compile, you say?
OK, 2.5.21 together with the addition of the #include lines
that make it compile.)

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 17:09 Andries.Brouwer
@ 2002-06-14 17:15 ` Martin Dalecki
  0 siblings, 0 replies; 36+ messages in thread
From: Martin Dalecki @ 2002-06-14 17:15 UTC (permalink / raw)
  To: Andries.Brouwer; +Cc: axboe, linux-kernel, torvalds

Użytkownik Andries.Brouwer@cwi.nl napisał:
>>Frankly, _I'm_ too scared to run 2.5 IDE currently.
> 
> 
> Backups, that is what you need. Or a scratch machine.
> 
> This is what vanilla 2.5.21 can do to your filesystem
> (after a reboot and a e2fsck -a):
> 
> % ls /lost+found
> #10416
> #104719

> ...
> (thousands and thousands of files - not lost, only their
> names suffered a bit...)
> 
> But, to be fair, only a small part of the damage is due
> to the kernel. Afterwards, e2fsck made things much worse.

Just curious: Becouse dir entires beeing
affected looks like the effect of the recent changes to
the VFS.


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 16:04       ` Dave Jones
@ 2002-06-14 17:23         ` Martin Dalecki
  0 siblings, 0 replies; 36+ messages in thread
From: Martin Dalecki @ 2002-06-14 17:23 UTC (permalink / raw)
  To: Dave Jones
  Cc: Benjamin LaHaise, Jens Axboe, Linus Torvalds, Kernel Mailing List

Użytkownik Dave Jones napisał:
> On Fri, Jun 14, 2002 at 11:56:34AM -0400, Benjamin LaHaise wrote:
> 
>  > Add my voice to these concerns.  At the very least the code should have 
>  > been moved into a second tree to allow people to work with the old stable 
>  > driver as needed.
> 
> *nod*, with periodic known-good _tested_ bits getting merged to
> mainline, to avoid the need for an IDE merge flag day as has been
> the norm in the past.

And they where the best way to basically halt the whole thing.
Right now the only thing you would achieve would be to kick out
the other people with wich I work *together*.
Oh perhaps noone of them would have got involved...
I simply don't like to idea of lonely developement on a separate
small iland called a "dedicated mailing list". And I don't see any
need for it - the traffic on LKML isn't that high if one
learn how to use mozillas filtering facilities.




^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 15:43     ` Dave Jones
  2002-06-14 16:06       ` Bartlomiej Zolnierkiewicz
@ 2002-06-14 17:56       ` Linus Torvalds
  1 sibling, 0 replies; 36+ messages in thread
From: Linus Torvalds @ 2002-06-14 17:56 UTC (permalink / raw)
  To: Dave Jones; +Cc: Jens Axboe, Martin Dalecki, Kernel Mailing List



On Fri, 14 Jun 2002, Dave Jones wrote:
> On Fri, Jun 14, 2002 at 05:17:03PM +0200, Jens Axboe wrote:
>
>  > And finally a small plea for more testing. Do you even test before
>  > blindly sending patches off to Linus?! Sometimes just watching how
>  > quickly these big patches appears makes it impossible that they have
>  > gotten any kind of testing other than the 'hey it compiles', which I
>  > think it just way too little for something that could possible screw
>  > peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
>  > currently. The success ratio of posted over working patches is too big.
>
> Ditto. As we rapidly approach the 100th incarnation of Martin's IDE
> patches, there are still cases where we die within seconds of booting
> on some old PIO-only boxes for eg.  There seems to be far too much
> "lets rip this out as it doesn't do much anyway" rather than fixing
> known problems.

Are we perhaps forgetting the _point_ of open source?

We're not supposed to be writing code and then releasing it when it is
done. We _want_ incremental changes, and open breakage.

Do bugs happen? Should we expect locking problems if the author is working
on changing the locking (which everybody who has ever looked at the code
admits was totally broken)?

YES.

Am I personally disturbed over the fact that PIO is still totally broken?
Yes. I don't like that part at all, but I do know that Martin is aware of
it, and I keep pushing on him all the time. Martin claims to be on top of
it, and getting close to be able to fix it. I haven't seen any real reason
to doubt him on that so far. We'll get there.

		Linus


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-14 18:36 Andries.Brouwer
  0 siblings, 0 replies; 36+ messages in thread
From: Andries.Brouwer @ 2002-06-14 18:36 UTC (permalink / raw)
  To: Andries.Brouwer, dalecki; +Cc: axboe, linux-kernel, torvalds

>>>Frankly, _I'm_ too scared to run 2.5 IDE currently.

>> Backups, that is what you need. Or a scratch machine.

> Just curious: Becouse dir entires beeing affected
> looks like the effect of the recent changes to the VFS.

I am not ready to blame any part of the kernel.
Don't read an implicit complaint about IDE into my note.
No, just a warning: this is what vanilla 2.5.21 can do.

Andries

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 16:29 Hron, Randall
@ 2002-06-14 23:32 ` Andrew Morton
  0 siblings, 0 replies; 36+ messages in thread
From: Andrew Morton @ 2002-06-14 23:32 UTC (permalink / raw)
  To: Hron, Randall
  Cc: 'john.weber@linuxhq.com',
	'linux-kernel@vger.kernel.org'

"Hron, Randall" wrote:
> 
> > (lord knows i'm trying to get up to speed), but, in the meantime, I can
> > test the crap out of a kernel :).
> 
> tiobench is having trouble completing in kernels >= 2.5.19 on my K6-2
> 384 mb ram, IDE test system.  The parameters I'm using are:
> 
> tiobench.pl --size 2048 --numruns 3 --threads 1 --threads 32 --threads 64
> --threads 128
> 
> --size depends on ram and disk space.
> 
> Early in 2.5, dbench 192 would exercise a bug or two.
> (requires about 5GB of disk space)
> 
> Linux Test Project's runalltests.sh has occasionally triggered a bug.

Is this still happening?  What was the bug?
 
> 2.5 took a drop in dbench throughput recently.
> 
> dbench ext2 128 processes       Average         High            Low(MB/sec)

Is this still with 384 megs of memory?

> 2.5.19                           18.60           21.69           14.58
> 2.5.20                           12.89           13.15           12.79
> 2.5.21                           12.67           12.94           12.51
> 

One possibile culprit here is the doubling of the request queue size
in 2.5.20.  A long time ago it was 1024 slots.  Then it went to
128.  That's where it is in Marcelo kernels.  Then -ac kernels
went up to 1024 because they have read-latency2.  Somehow 2.5 found
itself at 256 slots.  In 2.5.20 it slealthily snuck up to 512
slots.  I didn't squeak about this because I was interested to see what
effect it would have.

Does this patch get the throughput back?


Index: drivers/block/ll_rw_blk.c
===================================================================
RCS file: /opt/cvs/lk/drivers/block/ll_rw_blk.c,v
retrieving revision 1.66
diff -u -r1.66 ll_rw_blk.c
--- drivers/block/ll_rw_blk.c	9 Jun 2002 07:13:15 -0000	1.66
+++ drivers/block/ll_rw_blk.c	14 Jun 2002 23:35:54 -0000
@@ -1974,6 +1974,8 @@
 	if (queue_nr_requests > 512)
 		queue_nr_requests = 512;
 
+	queue_nr_requests = 256;
+
 	/*
 	 * Batch frees according to queue length
 	 */


-

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 16:15     ` Martin Dalecki
@ 2002-06-15  8:15       ` Jens Axboe
  0 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2002-06-15  8:15 UTC (permalink / raw)
  To: Martin Dalecki; +Cc: Linus Torvalds, Kernel Mailing List

On Fri, Jun 14 2002, Martin Dalecki wrote:
> >- current 2.5 bk deadlocks reading partition info off disk. Again a
> >  locking problem I suppose, to be honest I just got very tired when
> >  seeing it happen and didn't want to spend tim looking into it.
> 
> 2.5.21 + the patches I did doesn't. Likely it's the driverfs?

This particular problem I don't know, I'm simply just out lining it.

> >I thought IDE locking couldn't get worse than 2.4, but I think you are
> >well into doing just that. What exactly are you plans with the channel
> >locks? Right now, to me, it seems you are extending the use of them to
> >the point where they would be used to serialize the entire request
> >operation start? Heck, ide-cd is even holding the channel lock when
> >outputting the cdb now.
> 
> After extracting out 80% of the host controller register file
> access one has to realize that in reality we where releasing the lock
> just to regain it immediately. Or alternatively accessing them
> without any lock protection at all. (Same goes for BIO ummer layer
> memmbers.) This is why they are pushed up. It's just avoiding the
> "windows" between unlock and immediate relock and making the real
> behaviour more "obvious". You have just realized this.

I know the locking needs to be reworked, it just very much looks like
that you are just extending the scope of the channel lock to basically
be held the entire way through. I'm hoping this is just a transition?

> 2.4 prevents the locking problems basically by georgeously
> disabling IRQs. Too fine grained locking is a futile exercise.

Yep

> Unless I see the time spent in system state during concurrent disk
> access going really up (it doesn't right now), I don't see any thing
> bad in making the locking more coarse. Locks don't came for free and
> having fine grained locking isn't justifying itself.

That's silly. Most of the "locking" required is just serializing access
to the channel (or interface). Just making the locking coarse and
grabbing it to do just that is pretty dumb.

> Another "usual Marcin approach" - don't optimze for the sake of it.
> See futile unlikely() tagging and inlining in tcq.c for example.
> I don't do somethig like that. I have just written too much
> numerical code which was really time constrained to do something
> like this before looking at benchmarks.
> Really constrained means having a program running 7 or "just"
> 5 *days*. This can make a difference, a difference in hard real
> money on the range of multiple kEUR!

Listing to complaints about unlikely() as micro-optimization from
someone who doesn't think that using a spin lock to serialize looong
operations is a problem doesn't carry a lot of weight, sorry. I don't
know why you bring this up again, I've already stated my case in the
last mail and I see no reason to repeat it.

> Finally - unless there appears some aother way to block access to
> busy devices on the generic block layer I do it the only way
> we have right now - spinlocks. (... looking forward to working
> queue pugging and the work done by Adam richter).

First of all, queue plugging works now. Second, this is no excuse for
using a spin lock to serialize request starts?! Please tell me you don't
really mean this. At worst you could have spurious request_fn runs
before, but that's hardly a big problem. I know the start/stop queue is
a nicer interface (that's why it's there now), but there's nothing wrong
with simply recalling your queueing function once a request completes.
That's how it was done before too. In short, this is not an argument, it
has little relevance to this case. And the big-bio stuff, how is that
relevant?

> Unless there is a sane way to signal partial completion - we will
> be doing it at once. Unless we have a sane async io infrastructure
> most of the above will be likely not solved anyway.

??

> >  ata_device?! we are serializing requests for ata_device's on the
> >  ata_channel anyways, which is why it made sense to have the active
> >  request there.
> 
> Becouse it is going to go away altogether. We need it there
> just only for the purpose of the default ide_timer_expiry function, which
> is subject to go away since a long time. And finally becouse
> it doesn't hurt.

Because it doesn't hurt? From the same book of code writing an
optimization? Again, the serialization point is the channel, why move
the active request to the drive?! To me, this is loosing information
and making the whole thing more confusing.

> Further on I refer you to the discussion we had (or was it Linus?)
> once about the fact that attaching physical properties of the
> device to the request queue and replicating those parameters in
> every single request struct, is well ... "unpleasant" on behalf of the

The drive <-> queue relationship has no bearing on the active request
serialization.

> upper layers. Loop devices expose the same problem.

Please explain the loop case?

> Once again just grepping for hard_ memebers of the struct
> request makes it obvious.

Do you understand why we have the hard_ members? It seems you don't,
because I don't see how that is remotely related to this. The hard_
members are just there so that low level drivers can screw with the
nr_sectors and current_nr_sectors as much as they want, and the block
layer can still maintain a consistent request state regardless of what
happens.

> Somce people say that using the gratest common denominator in
> the case of the loop devices is the  solution,
> but I think that it's rather a work around.

This sounds like nonsense.

> >And finally a small plea for more testing. Do you even test before
> >blindly sending patches off to Linus?! Sometimes just watching how
> >quickly these big patches appears makes it impossible that they have
> >gotten any kind of testing other than the 'hey it compiles', which I
> >think it just way too little for something that could possible screw
> >peoples data up very badly. Frankly, _I'm_ too scared to run 2.5 IDE
> >currently. The success ratio of posted over working patches is too big.
> 
> Fact is: many of the patches are just big becouse they contain
> host chip handling cleanups done by others, and becouse
> we have just too many different drivers for the same purpose:
> ATAPI packet command devices. Which I'm more and more tempted
> to scarp... in favour of just ide-scsi.c. But that's another
> story. (Adam J. Richter is givng me constant preassure to do just that and 
> I start really tending to admitt that he is just right.)

Yeah I know that they are mostly big because of cleanups, and I'm not
worried about that at all. But even when just maybe 10% of the patch is
changing stuff radically, problems can sneak in. Plus, there appears to
be no clear goal in what you are doing. Things change in one direction
one day, back in another the next day. I have a hard time seeing how
this will lead to a cleaner and more maintainable code base.

> As of testing. Well at least I can assure you that I'm eating my dogfood,
> since I run constantly on the patches. (Heck even the time I write this.)

That's good for you, but a single person testing is usually not enough.
Especially not with stuff like IDE where there are sooo many different
pieces of hardware and setups out there. I'm not saying that you should
do complete validation of the code every single time, but maybe having a
bleding edge tree and a linus tree would go along way. Then you could
ship cleanups as much as you want, but do the more radical changes a bit
slower. I think that would also solve your direction problem, letting
the more radical changes mature a bit before shipping it.

> But I don't use kernels from the BK repository at all.
> In fact I just don't use BK at all and I don't intend too.

Doesn't matter. Fact is, you don't know when Linus will tag the BK tree
as the next release. Or how many of your ide-xx patches are in at that
point.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-14 16:43     ` Linus Torvalds
  2002-06-14 16:47       ` Martin Dalecki
@ 2002-06-15  8:19       ` Jens Axboe
  1 sibling, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2002-06-15  8:19 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Martin Dalecki, Kernel Mailing List

On Fri, Jun 14 2002, Linus Torvalds wrote:
> 
> 
> On Fri, 14 Jun 2002, Jens Axboe wrote:
> >
> > - current 2.5 bk deadlocks reading partition info off disk. Again a
> >   locking problem I suppose, to be honest I just got very tired when
> >   seeing it happen and didn't want to spend tim looking into it.
> 
> Hmm. There's a bug in "balance_irq()" if you are trying to run a SMP
> kernel on an UP machine right now, and it _might_ be that the lockup has
> nothing to do with the IDE layer, but simple with the first PCI interrupt
> (as opposed to local timer interrupt) coming in.
> 
> One-liner from Zwane Mwaikambo (cut-and-paste, so space is wrong, please
> apply by hand).
> 
> --- linux-2.5.19/arch/i386/kernel/io_apic.c.orig        Fri Jun 14 17:43:20 2002
> +++ linux-2.5.19/arch/i386/kernel/io_apic.c     Fri Jun 14 17:42:23 2002
> @@ -251,7 +251,7 @@
>         irq_balance_t *entry = irq_balance + irq;
>         unsigned long now = jiffies;
> 
> -       if (unlikely(entry->timestamp != now)) {
> +       if ((entry->timestamp != now) && (smp_num_cpus > 1)) {
>                 unsigned long allowed_mask;
>                 int random_number;
> 
> I don't know. Might be the IDE code too, of course.

If it's just a SMP kernel on UP, then that's not the problem here. This
was SMP kernel on SMP machine.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-15 10:42 rwhron
  2002-06-15 17:17 ` Jens Axboe
  0 siblings, 1 reply; 36+ messages in thread
From: rwhron @ 2002-06-15 10:42 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel

>> > test the crap out of a kernel :).

>> Linux Test Project's runalltests.sh has occasionally triggered a bug.

> Is this still happening?  What was the bug?

That's just a general suggestion with regard to kernel testing.
Recent 2.5.x hasn't had a problem completing LTP runalltests.sh.  
I've used LTP to narrow down a couple early 2.4.x reiserfs bugs to
a specific test case.  LTP has occasionally triggered oops and
livelocks too.  It's a useful regression test. 

>> 2.5 took a drop in dbench throughput recently.
>>
>> dbench ext2 128 processes       Average         High            Low(MB/sec)

> Is this still with 384 megs of memory?

Yes. 

> 2.5.19                           18.60           21.69           14.58
> 2.5.20                           12.89           13.15           12.79

> One possibile culprit here is the doubling of the request queue size
> in 2.5.20.  A long time ago it was 1024 slots.  Then it went to
> 128.  That's where it is in Marcelo kernels.  Then -ac kernels
> went up to 1024 because they have read-latency2.  Somehow 2.5 found
> itself at 256 slots.  In 2.5.20 it slealthily snuck up to 512
> slots.  I didn't squeak about this because I was interested to see what
> effect it would have.

Interesting.  I've seen read-latency2 drop dbench throughput in -aa
kernels (but I use it anyway).  I'd like to capture the request
queue size.  Is there a command or file in /proc that displays 
it, or should I just grep drivers/block/ll_rw_blk.c?

> Does this patch get the throughput back?

I will try that next.

-- 
Randy Hron


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-15 11:41 rwhron
  2002-06-15 11:50 ` Dave Jones
  0 siblings, 1 reply; 36+ messages in thread
From: rwhron @ 2002-06-15 11:41 UTC (permalink / raw)
  To: davej; +Cc: linux-kernel

> When the IDE carnage first started back circa 2.5.3, I had contemplated
> not merging *any* of the IDE patches, just so that people who want to
> work on other areas could have something solid to build upon.
> I regret not following through on that instinct.

I give the -dj series a vote of "good taste".   In my testing they have
been reliable.  Recently, 2.5.20-dj[34] completed all my tests, whereas
2.5.{19,20,21} haven't.   I realize breakage in the development series
is expected and sometimes good.  Nonetheless, "two thumbs up" for -dj.

-- 
Randy Hron


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-15 11:41 rwhron
@ 2002-06-15 11:50 ` Dave Jones
  0 siblings, 0 replies; 36+ messages in thread
From: Dave Jones @ 2002-06-15 11:50 UTC (permalink / raw)
  To: rwhron; +Cc: linux-kernel

On Sat, Jun 15, 2002 at 07:41:06AM -0400, rwhron@earthlink.net wrote:
 > > When the IDE carnage first started back circa 2.5.3, I had contemplated
 > > not merging *any* of the IDE patches, just so that people who want to
 > > work on other areas could have something solid to build upon.
 > > I regret not following through on that instinct.
 > 
 > I give the -dj series a vote of "good taste".   In my testing they have
 > been reliable.  Recently, 2.5.20-dj[34] completed all my tests, whereas
 > 2.5.{19,20,21} haven't.   I realize breakage in the development series
 > is expected and sometimes good.  Nonetheless, "two thumbs up" for -dj.

That's interesting. What exactly was failing ? It'd be in everyones
interests to get those bits pushed to Linus sooner.

        Dave.

-- 
| Dave Jones.        http://www.codemonkey.org.uk
| SuSE Labs

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-15 12:05 rwhron
  2002-06-17  8:40 ` Andrew Morton
  0 siblings, 1 reply; 36+ messages in thread
From: rwhron @ 2002-06-15 12:05 UTC (permalink / raw)
  To: davej; +Cc: linux-kernel

>> Recently, 2.5.20-dj[34] completed all my tests, whereas
>> 2.5.{19,20,21} haven't.   I realize breakage in the development series
>> is expected and sometimes good.  Nonetheless, "two thumbs up" for -dj.

> That's interesting. What exactly was failing ? It'd be in everyones
> interests to get those bits pushed to Linus sooner.

tiobench.pl --size 2048 --numruns 3 --threads 128  # 384 MB ram in machine

The ssh session running vmstat no longer updates.  Console won't
give a new "login" prompt with <Enter>.  <sysrq T> prints a 
trace (which I haven't captured - it's really long with > 128 processes).

Does <sysrq T> need any post processing to convert addresses to
something more useful?  I'll save it on 2.5.22 if it happens.

-- 
Randy Hron


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-15 10:42 rwhron
@ 2002-06-15 17:17 ` Jens Axboe
  0 siblings, 0 replies; 36+ messages in thread
From: Jens Axboe @ 2002-06-15 17:17 UTC (permalink / raw)
  To: rwhron; +Cc: akpm, linux-kernel

On Sat, Jun 15 2002, rwhron@earthlink.net wrote:
> > One possibile culprit here is the doubling of the request queue size
> > in 2.5.20.  A long time ago it was 1024 slots.  Then it went to
> > 128.  That's where it is in Marcelo kernels.  Then -ac kernels
> > went up to 1024 because they have read-latency2.  Somehow 2.5 found
> > itself at 256 slots.  In 2.5.20 it slealthily snuck up to 512
> > slots.  I didn't squeak about this because I was interested to see what
> > effect it would have.
> 
> Interesting.  I've seen read-latency2 drop dbench throughput in -aa
> kernels (but I use it anyway).  I'd like to capture the request
> queue size.  Is there a command or file in /proc that displays 
> it, or should I just grep drivers/block/ll_rw_blk.c?

There is currently no way you can capture the queue request state. In
the past I've done sysrq hacks to display it, but that's about it.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-15 21:00 rwhron
  2002-06-15 21:00 ` William Lee Irwin III
  2002-06-15 21:23 ` Andrew Morton
  0 siblings, 2 replies; 36+ messages in thread
From: rwhron @ 2002-06-15 21:00 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel

> Does this patch get the throughput back?

That makes all the difference to dbench.  Throughput
for dbench 128 up over 40% compared to vanilla 2.5.21.
-- 
Randy Hron


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-15 21:00 rwhron
@ 2002-06-15 21:00 ` William Lee Irwin III
  2002-06-15 21:23 ` Andrew Morton
  1 sibling, 0 replies; 36+ messages in thread
From: William Lee Irwin III @ 2002-06-15 21:00 UTC (permalink / raw)
  To: rwhron; +Cc: akpm, linux-kernel

At some point in the past, akpm wrote:
>> Does this patch get the throughput back?

On Sat, Jun 15, 2002 at 05:00:09PM -0400, rwhron@earthlink.net wrote:
> That makes all the difference to dbench.  Throughput
> for dbench 128 up over 40% compared to vanilla 2.5.21.

How does it do against the prior 2.5 kernels?


Cheers,
Bill

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-15 21:00 rwhron
  2002-06-15 21:00 ` William Lee Irwin III
@ 2002-06-15 21:23 ` Andrew Morton
  1 sibling, 0 replies; 36+ messages in thread
From: Andrew Morton @ 2002-06-15 21:23 UTC (permalink / raw)
  To: rwhron; +Cc: linux-kernel

rwhron@earthlink.net wrote:
> 
> > Does this patch get the throughput back?
> 
> That makes all the difference to dbench.  Throughput
> for dbench 128 up over 40% compared to vanilla 2.5.21.

ho hum.  Now we need to work out why a larger request queue
whacks dbench, whether it penalises workloads which we actually
care about and if so, what the appropriate size really should be.
If indeed that algorithms are optimal.  urgh.

Thanks again, Randy.

-

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-16 10:52 rwhron
  0 siblings, 0 replies; 36+ messages in thread
From: rwhron @ 2002-06-16 10:52 UTC (permalink / raw)
  To: davej; +Cc: linux-kernel

> That's interesting. What exactly was failing ? It'd be in everyones
> interests to get those bits pushed to Linus sooner.

This time it was tiobench with 32 threads.  The <sysrq Tasks>
and a few other details on the livelock are at:

http://home.earthlink.net/~rwhron/kernel/2.5.21-sysrq.txt

If the System.map file is useful, it is at:
http://home.earthlink.net/~rwhron/kernel/System.map-2.5.21.bz2

-- 
Randy Hron


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-16 11:05 rwhron
  0 siblings, 0 replies; 36+ messages in thread
From: rwhron @ 2002-06-16 11:05 UTC (permalink / raw)
  To: wli; +Cc: linux-kernel

> At some point in the past, akpm wrote:
>>> Does this patch get the throughput back?

>> That makes all the difference to dbench.  

> How does it do against the prior 2.5 kernels?

dbench ext2 128    Average 
2.5.1-dj13           8.04 mb/sec 
2.5.2-dj6            8.59  
2.5.4-dj2            8.45  
2.5.16              11.06  
2.5.18-akpm         18.11  
2.5.18-wli          18.54  
2.5.19              18.60  
2.5.19-wli          18.54  
2.5.19-wli3         20.93  
2.5.20              12.89  
2.5.20-dj4          10.58  
2.5.21              12.67  
2.5.21-akpm         19.65

-- 
Randy Hron


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-16 13:03 rwhron
  0 siblings, 0 replies; 36+ messages in thread
From: rwhron @ 2002-06-16 13:03 UTC (permalink / raw)
  To: linux-kernel

This oops occured after rebooting into 2.5.21, compiling a kernel,
then init 6.

flushing ide devices: hda <1>Unable to handle kernel NULL pointer dereference at virtual address 00000004
c0199a8b
*pde = 00000000
Oops: 0002
CPU:    0
EIP:    0010:[<c0199a8b>]    Not tainted
Using defaults from ksymoops -t elf32-i386 -a i386
EFLAGS: 00010286
eax: c02c1944   ebx: c02c192c   ecx: 00000000   edx: 00000000
esi: c025cc60   edi: 00000001   ebp: 00000001   esp: d01fde58
ds: 0018   es: 0018   ss: 0018
Stack: c02c192c c02c17d4 c0199d0e c02c192c c02c17d0 c01c6b1c c02c192c c02c17d0
       c01c072b c02c17d0 c025c77c 00000000 00000001 bffffd54 c011910e c025c77c
       00000001 00000000 d01fc000 080498c0 fee1dead c01193ee c02a1b48 00000001
Call Trace: [<c0199d0e>] [<c01c6b1c>] [<c01c072b>] [<c011910e>] [<c01193ee>]
   [<c01dbaa3>] [<c01d7ab5>] [<c01ffaac>] [<c01ffdb9>] [<c01d0e57>] [<c013c264>]
   [<c013ccfc>] [<c013b016>] [<c012c654>] [<c012b6f5>] [<c012b745>] [<c01069f7>]
Code: 89 4a 04 89 11 89 43 18 89 40 04 83 7e 38 00 74 09 53 8b 46


>>EIP; c0199a8b <device_detach+23/4c>   <=====

>>eax; c02c1944 <ide_hwifs+2a4/3d90>
>>ebx; c02c192c <ide_hwifs+28c/3d90>
>>esi; c025cc60 <idedisk_devdrv+0/60>
>>esp; d01fde58 <END_OF_CODE+ff35f3c/????>

Trace; c0199d0e <put_device+4a/7c>
Trace; c01c6b1c <idedisk_cleanup+1c/60>
Trace; c01c072b <ata_sys_notify+9f/d4>
Trace; c011910e <notifier_call_chain+1e/38>
Trace; c01193ee <sys_reboot+c2/1d0>
Trace; c01dbaa3 <rtmsg_ifinfo+6f/74>
Trace; c01d7ab5 <dev_change_flags+f1/fc>
Trace; c01ffaac <devinet_ioctl+348/6b4>
Trace; c01ffdb9 <devinet_ioctl+655/6b4>
Trace; c01d0e57 <sock_destroy_inode+13/18>
Trace; c013c264 <destroy_inode+2c/4c>
Trace; c013ccfc <generic_forget_inode+c4/cc>
Trace; c013b016 <dput+126/144>
Trace; c012c654 <fput+bc/e0>
Trace; c012b6f5 <filp_close+59/64>
Trace; c012b745 <sys_close+45/58>
Trace; c01069f7 <syscall_call+7/b>

Code;  c0199a8b <device_detach+23/4c>
00000000 <_EIP>:
Code;  c0199a8b <device_detach+23/4c>   <=====
   0:   89 4a 04                  mov    %ecx,0x4(%edx)   <=====
Code;  c0199a8e <device_detach+26/4c>
   3:   89 11                     mov    %edx,(%ecx)
Code;  c0199a90 <device_detach+28/4c>
   5:   89 43 18                  mov    %eax,0x18(%ebx)
Code;  c0199a93 <device_detach+2b/4c>
   8:   89 40 04                  mov    %eax,0x4(%eax)
Code;  c0199a96 <device_detach+2e/4c>
   b:   83 7e 38 00               cmpl   $0x0,0x38(%esi)
Code;  c0199a9a <device_detach+32/4c>
   f:   74 09                     je     1a <_EIP+0x1a> c0199aa5 <device_detach+3d/4c>
Code;  c0199a9c <device_detach+34/4c>
  11:   53                        push   %ebx
Code;  c0199a9d <device_detach+35/4c>
  12:   8b 46 00                  mov    0x0(%esi),%eax


-- 
Randy Hron


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-16 16:36 rwhron
  0 siblings, 0 replies; 36+ messages in thread
From: rwhron @ 2002-06-16 16:36 UTC (permalink / raw)
  To: linux-kernel; +Cc: davej, axboe, akpm

There is a stack trace with function names from the livelock 
during tiotest on 2.5.21 at:

http://home.earthlink.net/~rwhron/kernel/2.5.21.tasktrace

The task trace was create with:
http://home.earthlink.net/~rwhron/kernel/tasktrace

-- 
Randy Hron


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
  2002-06-15 12:05 rwhron
@ 2002-06-17  8:40 ` Andrew Morton
  0 siblings, 0 replies; 36+ messages in thread
From: Andrew Morton @ 2002-06-17  8:40 UTC (permalink / raw)
  To: rwhron; +Cc: davej, linux-kernel

rwhron@earthlink.net wrote:
> 
> >> Recently, 2.5.20-dj[34] completed all my tests, whereas
> >> 2.5.{19,20,21} haven't.   I realize breakage in the development series
> >> is expected and sometimes good.  Nonetheless, "two thumbs up" for -dj.
> 
> > That's interesting. What exactly was failing ? It'd be in everyones
> > interests to get those bits pushed to Linus sooner.
> 
> tiobench.pl --size 2048 --numruns 3 --threads 128  # 384 MB ram in machine
> 
> The ssh session running vmstat no longer updates.  Console won't
> give a new "login" prompt with <Enter>.  <sysrq T> prints a
> trace (which I haven't captured - it's really long with > 128 processes).
> 
> Does <sysrq T> need any post processing to convert addresses to
> something more useful?  I'll save it on 2.5.22 if it happens.

Well I dunno, Randy.  Works fine here, on aic7xxx SCSI and hpt366 IDE.

>From your trace it would seem that writeout completion has not
occurred against one or more pages.  Could be that the device
driver lost an interrupt, or it failed to deliver completion
for one or more BIO segments, or something screwed up at the
VFS level.

I am (of course ;)) disinclined to believe the latter, mainly
because of the amount of testing I do here.  Eight-hour Cerberus
runs on quad CPU, five IDE disks and six SCSI disks all chugging
along, no probs.

Is it reproducible?   Are you able to try it on a different machine?
On other disks in the same machine? 

-

^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-17 13:16 rwhron
  0 siblings, 0 replies; 36+ messages in thread
From: rwhron @ 2002-06-17 13:16 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel

>> tiobench.pl --size 2048 --numruns 3 --threads 128  # 384 MB ram in machine

> From your trace it would seem that writeout completion has not
> occurred against one or more pages.  Could be that the device
> driver lost an interrupt, or it failed to deliver completion
> for one or more BIO segments, or something screwed up at the
> VFS level.

> I am (of course ;)) disinclined to believe the latter, mainly
> because of the amount of testing I do here.  Eight-hour Cerberus
> runs on quad CPU, five IDE disks and six SCSI disks all chugging
> along, no probs.

> Is it reproducible?   Are you able to try it on a different machine?
> On other disks in the same machine?

tiobench had a similar livelock in 2.5.19, 2.5.19 + 2 versions of
wli's lazy-buddy allocator, 2.5.20, 2.5.21, and 2.5.21 with the
request queue size change we talked about.  However, when I tried
it just now on a different disk and filesystem type (reiser instead
of ext2) it didn't happen.  The non-reproduced livelock didn't
have any of the previous stress testing run against the machine
though.

Since the tiobench livelock appeared in 2.5.19, the following
kernels have completed all tests.

2.4.19-pre10
2.5.20-dj3
2.4.19-pre10-ac2
2.4.19-pre10-aa1
2.5.20-dj4
2.4.19-pre10-jam2
2.4.19-pre10-jam2-O2
2.4.19-pre10-jam2-O3
2.4.18-cmpr-cache
2.4.19-pre10-mjc1

2.5.22 is out now and I'm trying that.  If it has a problem, I'm
inclined to rebuild the ext2 that tiobench, osdb, and dbench run 
on.  (may not help, but it won't hurt :)

-- 
Randy Hron


^ permalink raw reply	[flat|nested] 36+ messages in thread

* Re: [PATCH] 2.5.21 IDE 91
@ 2002-06-18 13:24 rwhron
  0 siblings, 0 replies; 36+ messages in thread
From: rwhron @ 2002-06-18 13:24 UTC (permalink / raw)
  To: linux-kernel

>> Is it reproducible?   

> tiobench had a similar livelock in 2.5.19, 2.5.19 + 2 versions of
> wli's lazy-buddy allocator, 2.5.20, 2.5.21

2.5.22 completes all the tests, including tiobench.  :)

-- 
Randy Hron
http://home.earthlink.net/~rwhron/


^ permalink raw reply	[flat|nested] 36+ messages in thread

end of thread, other threads:[~2002-06-18 13:23 UTC | newest]

Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-06-16 13:03 [PATCH] 2.5.21 IDE 91 rwhron
  -- strict thread matches above, loose matches on Subject: below --
2002-06-18 13:24 rwhron
2002-06-17 13:16 rwhron
2002-06-16 16:36 rwhron
2002-06-16 11:05 rwhron
2002-06-16 10:52 rwhron
2002-06-15 21:00 rwhron
2002-06-15 21:00 ` William Lee Irwin III
2002-06-15 21:23 ` Andrew Morton
2002-06-15 12:05 rwhron
2002-06-17  8:40 ` Andrew Morton
2002-06-15 11:41 rwhron
2002-06-15 11:50 ` Dave Jones
2002-06-15 10:42 rwhron
2002-06-15 17:17 ` Jens Axboe
2002-06-14 18:36 Andries.Brouwer
2002-06-14 17:09 Andries.Brouwer
2002-06-14 17:15 ` Martin Dalecki
2002-06-14 16:29 Hron, Randall
2002-06-14 23:32 ` Andrew Morton
2002-06-09  5:42 Linux 2.5.21 Linus Torvalds
2002-06-14 14:02 ` [PATCH] 2.5.21 IDE 91 Martin Dalecki
2002-06-14 15:17   ` Jens Axboe
2002-06-14 15:42     ` John Weber
2002-06-14 15:43     ` Dave Jones
2002-06-14 16:06       ` Bartlomiej Zolnierkiewicz
2002-06-14 16:33         ` Martin Dalecki
2002-06-14 17:56       ` Linus Torvalds
2002-06-14 15:56     ` Benjamin LaHaise
2002-06-14 16:04       ` Dave Jones
2002-06-14 17:23         ` Martin Dalecki
2002-06-14 16:09       ` Bartlomiej Zolnierkiewicz
2002-06-14 16:15     ` Martin Dalecki
2002-06-15  8:15       ` Jens Axboe
2002-06-14 16:43     ` Linus Torvalds
2002-06-14 16:47       ` Martin Dalecki
2002-06-15  8:19       ` Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox