* ide-scsi error handling
@ 2003-05-18 23:07 Willem Riede
2003-05-19 13:01 ` Douglas Gilbert
2003-05-19 14:36 ` Randy.Dunlap
0 siblings, 2 replies; 6+ messages in thread
From: Willem Riede @ 2003-05-18 23:07 UTC (permalink / raw)
To: linux-scsi
Now that current 2.5 kernels boot again for me (haven't been able
to run current kernels for months, but I digress) I've done some
more work on the error handling in ide-scsi.
The patch below works for me "most of the time". I'd appreciate if
others try it, and report results, or inspect the patch and comment.
Against 2.5.69-bk13.
Thanks, Willem Riede.
diff -uwr linux-2.5.69-bk13/drivers/ide/ide-iops.c linux-2.5.69-bk13-wr/drivers/ide/ide-iops.c
--- linux-2.5.69-bk13/drivers/ide/ide-iops.c 2003-05-18 11:10:22.000000000 -0400
+++ linux-2.5.69-bk13-wr/drivers/ide/ide-iops.c 2003-05-18 13:51:02.000000000 -0400
@@ -1134,6 +1134,7 @@
if (hwif->reset_poll(drive)) {
printk(KERN_ERR "%s: host reset_poll failure for %s.\n",
hwif->name, drive->name);
+ hwgroup->busy--;
return ide_stopped;
}
}
@@ -1179,6 +1180,7 @@
}
}
hwgroup->poll_timeout = 0; /* done polling */
+ hwgroup->busy--;
return ide_stopped;
}
@@ -1267,6 +1269,7 @@
#if OK_TO_RESET_CONTROLLER
if (!IDE_CONTROL_REG) {
spin_unlock_irqrestore(&ide_lock, flags);
+ hwgroup->busy--;
return ide_stopped;
}
@@ -1315,6 +1318,7 @@
ide_startstop_t ide_do_reset (ide_drive_t *drive)
{
+ HWGROUP(drive)->busy++;
return do_reset1(drive, 0);
}
diff -uwr linux-2.5.69-bk13/drivers/scsi/ide-scsi.c linux-2.5.69-bk13-wr/drivers/scsi/ide-scsi.c
--- linux-2.5.69-bk13/drivers/scsi/ide-scsi.c 2003-05-18 11:11:30.000000000 -0400
+++ linux-2.5.69-bk13-wr/drivers/scsi/ide-scsi.c 2003-05-18 14:23:47.000000000 -0400
@@ -270,7 +270,7 @@
printk("]\n");
}
-static int idescsi_check_condition(ide_drive_t *drive, struct request *failed_command)
+static int idescsi_check_condition(ide_drive_t *drive, idescsi_pc_t *failed_command)
{
idescsi_scsi_t *scsi = drive_to_idescsi(drive);
idescsi_pc_t *pc;
@@ -298,8 +298,8 @@
rq->flags = REQ_SENSE;
pc->timeout = jiffies + WAIT_READY;
/* NOTE! Save the failed packet command in "rq->buffer" */
- rq->buffer = (void *) failed_command->special;
- pc->scsi_cmd = ((idescsi_pc_t *) failed_command->special)->scsi_cmd;
+ rq->buffer = (void *) failed_command;
+ pc->scsi_cmd = failed_command->scsi_cmd;
if (test_bit(IDESCSI_LOG_CMD, &scsi->log)) {
printk ("ide-scsi: %s: queue cmd = ", drive->name);
hexdump(pc->c, 6);
@@ -307,6 +307,23 @@
return ide_do_drive_cmd(drive, rq, ide_preempt);
}
+ide_startstop_t idescsi_atapi_abort (ide_drive_t *drive, const char *msg)
+{
+ struct request *rq;
+
+ if (drive == NULL || (rq = HWGROUP(drive)->rq) == NULL)
+ return ide_stopped;
+ /* retry only "normal" I/O: */
+ if (rq->flags & (REQ_DRIVE_CMD | REQ_DRIVE_TASK)) {
+ rq->errors = 1;
+ ide_end_drive_cmd(drive, BUSY_STAT, 0);
+ return ide_stopped;
+ }
+ rq->errors |= ERROR_RESET;
+ DRIVER(drive)->end_request(drive, 0, 0);
+ return ide_stopped;
+}
+
static int idescsi_end_request (ide_drive_t *drive, int uptodate, int nrsecs)
{
idescsi_scsi_t *scsi = drive_to_idescsi(drive);
@@ -342,7 +359,7 @@
} else if (rq->errors) {
if (log)
printk ("ide-scsi: %s: check condition for %lu\n", drive->name, pc->scsi_cmd->serial_number);
- if (!idescsi_check_condition(drive, rq))
+ if (!idescsi_check_condition(drive, pc))
/* we started a request sense, so we'll be back, exit for now */
return 0;
pc->scsi_cmd->result = (CHECK_CONDITION << 1) | (DID_OK << 16);
@@ -536,12 +553,8 @@
set_bit(PC_DMA_OK, &pc->flags);
if (test_bit(IDESCSI_DRQ_INTERRUPT, &scsi->flags)) {
- if (HWGROUP(drive)->handler != NULL)
- BUG();
- ide_set_handler(drive, &idescsi_transfer_pc,
- get_timeout(pc), NULL);
/* Issue the packet command */
- HWIF(drive)->OUTB(WIN_PACKETCMD, IDE_COMMAND_REG);
+ ide_execute_command(drive, WIN_PACKETCMD, &idescsi_transfer_pc, get_timeout(pc), NULL);
return ide_started;
} else {
/* Issue the packet command */
@@ -633,6 +646,7 @@
.cleanup = idescsi_cleanup,
.do_request = idescsi_do_request,
.end_request = idescsi_end_request,
+ .abort = idescsi_atapi_abort,
.drives = LIST_HEAD_INIT(idescsi_driver.drives),
};
@@ -664,8 +678,6 @@
.ioctl = idescsi_ide_ioctl,
};
-static int idescsi_attach(ide_drive_t *drive);
-
static int idescsi_slave_configure(Scsi_Device * sdp)
{
/* Configure detected device */
@@ -846,13 +858,15 @@
return 1;
}
-static int idescsi_abort (Scsi_Cmnd *cmd)
+static int idescsi_scsi_eh_abort (Scsi_Cmnd *cmd)
{
- int countdown = 8;
+ int countdown = 120; /* maximum is 12 seconds because ide interrupt timeout is 10 sec. */
unsigned long flags;
idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host);
ide_drive_t *drive = scsi->drive;
+ if (!drive)
+ return FAILED;
printk (KERN_ERR "ide-scsi: abort called for %lu\n", cmd->serial_number);
while (countdown--) {
/* is cmd active?
@@ -863,56 +877,85 @@
/* yep - let's give it some more time -
* we can do that, we're in _our_ error kernel thread */
spin_unlock_irqrestore(&ide_lock, flags);
- scsi_sleep(HZ);
+#if IDESCSI_DEBUG_LOG
+ printk(KERN_WARNING "ide-scsi: waiting in abort\n");
+#endif
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(HZ/10);
continue;
}
/* no, but is it queued in the ide subsystem? */
if (elv_queue_empty(&drive->queue)) {
spin_unlock_irqrestore(&ide_lock, flags);
+#if IDESCSI_DEBUG_LOG
+ printk(KERN_WARNING "ide-scsi: abort success return\n");
+#endif
return SUCCESS;
}
spin_unlock_irqrestore(&ide_lock, flags);
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(HZ/10);
}
+#if IDESCSI_DEBUG_LOG
+ printk(KERN_WARNING "ide-scsi: abort fail return\n");
+#endif
return FAILED;
}
-static int idescsi_reset (Scsi_Cmnd *cmd)
+static int idescsi_scsi_eh_reset (Scsi_Cmnd *cmd)
{
+ int countdown = 10;
unsigned long flags;
struct request *req;
idescsi_scsi_t *idescsi = scsihost_to_idescsi(cmd->device->host);
ide_drive_t *drive = idescsi->drive;
+ if (!drive)
+ return FAILED;
printk (KERN_ERR "ide-scsi: reset called for %lu\n", cmd->serial_number);
- /* first null the handler for the drive and let any process
- * doing IO (on another CPU) run to (partial) completion
- * the lock prevents processing new requests */
+ /*
+ * Abort the current command on the group if there is one, taking care not to
+ * allow anything else to be queued and to die on the spot if we miss one somehow
+ */
spin_lock_irqsave(&ide_lock, flags);
- while (HWGROUP(drive)->handler) {
- HWGROUP(drive)->handler = NULL;
- schedule_timeout(1);
- }
- /* now nuke the drive queue */
- while ((req = elv_next_request(&drive->queue))) {
- blkdev_dequeue_request(req);
- end_that_request_last(req);
- }
+ DRIVER(drive)->abort(drive, "drive reset");
+ if (HWGROUP(drive)->handler)
+ BUG();
/* FIXME - this will probably leak memory */
HWGROUP(drive)->rq = NULL;
if (drive_to_idescsi(drive))
drive_to_idescsi(drive)->pc = NULL;
+ /*
+ * we use the busy flag to reserve the hwgroup for ourselves without holding
+ * the ide lock for a long time during the reset, reset will clear the busy
+ */
+ HWGROUP(drive)->busy = 1;
spin_unlock_irqrestore(&ide_lock, flags);
+#if IDESCSI_DEBUG_LOG
+ printk(KERN_WARNING "ide-scsi: drive->abort completed, now do_reset\n");
+#endif
/* finally, reset the drive (and its partner on the bus...) */
- ide_do_reset (drive);
+ (void) ide_do_reset (drive);
+ /* in theory, this can take 30 seconds, but ide_spin_wait_hwgroup waits only 3,
+ * usually, that is enough, but we call repeatedly, just to be covered */
+ while (ide_spin_wait_hwgroup(drive) && countdown--)
+ printk (KERN_INFO "ide-scsi: waiting for reset drive to complete\n");
+ /* for some reason when successful ide_spin_wait_hwgroup exits with ide_lock taken */
+ if (countdown) spin_unlock_irq(&ide_lock);
+ if (HWGROUP(drive)->handler) {
+ printk (KERN_CRIT "ide-scsi: reset drive did not complete in time\n");
+ return FAILED;
+ }
+#if IDESCSI_DEBUG_LOG
+ printk(KERN_WARNING "ide-scsi: reset success return\n");
+#endif
return SUCCESS;
}
static int idescsi_bios(struct scsi_device *sdev, struct block_device *bdev,
sector_t capacity, int *parm)
{
- idescsi_scsi_t *idescsi = scsihost_to_idescsi(sdev->host);
- ide_drive_t *drive = idescsi->drive;
+ ide_drive_t *drive = scsihost_to_idescsi(sdev->host)->drive;
if (drive->bios_cyl && drive->bios_head && drive->bios_sect) {
parm[0] = drive->bios_head;
@@ -929,8 +972,10 @@
.slave_configure = idescsi_slave_configure,
.ioctl = idescsi_ioctl,
.queuecommand = idescsi_queue,
- .eh_abort_handler = idescsi_abort,
- .eh_device_reset_handler = idescsi_reset,
+ .eh_abort_handler = idescsi_scsi_eh_abort,
+ .eh_device_reset_handler = idescsi_scsi_eh_reset,
+ .eh_bus_reset_handler = idescsi_scsi_eh_reset,
+ .eh_host_reset_handler = idescsi_scsi_eh_reset,
.bios_param = idescsi_bios,
.can_queue = 40,
.this_id = -1,
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: ide-scsi error handling
2003-05-18 23:07 ide-scsi error handling Willem Riede
@ 2003-05-19 13:01 ` Douglas Gilbert
2003-05-19 23:42 ` Willem Riede
2003-05-19 14:36 ` Randy.Dunlap
1 sibling, 1 reply; 6+ messages in thread
From: Douglas Gilbert @ 2003-05-19 13:01 UTC (permalink / raw)
To: wrlk; +Cc: linux-scsi, rddunlap
[-- Attachment #1: Type: text/plain, Size: 1011 bytes --]
Willem Riede wrote:
> Now that current 2.5 kernels boot again for me (haven't been able
> to run current kernels for months, but I digress) I've done some
> more work on the error handling in ide-scsi.
>
> The patch below works for me "most of the time". I'd appreciate if
> others try it, and report results, or inspect the patch and comment.
Willem,
As reported to you earlier I got the attached "sleeping
from illegal context" followed by an oops when I tried
to exercise ide-scsi with your patch on bk13. My setup:
- SMP kernel running on a UP machine with all kernel
debugging configured on
- PIO which is the default (can try with DMA later)
- cdrecord on an atapi writer running at the same time
as sg_dd on an atapi reader (i.e. 2 active ide-scsi
devices)
The machine was still usable after the oops (sg_dd
seg faulted).
This was same test that broke my patches a few days
back. Randy Dunlap seemed to be able to get ide-scsi
to break with far less provocation.
Doug Gilbert
[-- Attachment #2: ide-scsi2569bk14wr.txt --]
[-- Type: text/plain, Size: 3428 bytes --]
Debug: sleeping function called from illegal context at include/linux/rwsem.h:43
Call Trace:
[<c011f92c>] __might_sleep+0x5c/0x5e
[<c011b2d8>] do_page_fault+0x78/0x4a8
[<e0823c4b>] idescsi_transfer_pc+0xfb/0x130 [ide_scsi]
[<c0233ce7>] start_request+0x107/0x160
[<c021ac05>] elv_queue_empty+0x25/0x30
[<c0233dc6>] ide_do_request+0x56/0x3e0
[<c021a9e3>] __elv_add_request+0x33/0x50
[<c011b260>] do_page_fault+0x0/0x4a8
[<c010a1c9>] error_code+0x2d/0x38
[<e0824534>] idescsi_queue+0x244/0x6d0 [ide_scsi]
[<c0246909>] __scsi_get_command+0x29/0xc0
[<c0246fc8>] scsi_dispatch_cmd+0x218/0x3e0
[<c0247320>] scsi_done+0x0/0x80
[<c024a6d0>] scsi_times_out+0x0/0x90
[<c024d946>] scsi_prep_fn+0xd6/0x150
[<c024ca70>] scsi_init_cmd_errh+0xa0/0xd0
[<c024dc06>] scsi_request_fn+0x246/0x410
[<c021a9e3>] __elv_add_request+0x33/0x50
[<c021ce1c>] blk_insert_request+0x9c/0xf0
[<c024c819>] scsi_do_req+0x49/0xa0
[<c024c673>] scsi_insert_special_req+0x33/0x40
[<e0893062>] sg_common_write+0x1c2/0x240 [sg]
[<e08944a0>] sg_cmd_done+0x0/0x330 [sg]
[<e0892df9>] sg_new_write+0x1e9/0x290 [sg]
[<c024d9fe>] scsi_request_fn+0x3e/0x410
[<e0893cbf>] sg_ioctl+0xbdf/0xe00 [sg]
[<c021aab6>] elv_next_request+0x16/0x100
[<c024d9fe>] scsi_request_fn+0x3e/0x410
[<c011da9a>] __wake_up_common+0x3a/0x60
[<c016f074>] kill_fasync+0x44/0x4d
[<e0894645>] sg_cmd_done+0x1a5/0x330 [sg]
[<c0247719>] scsi_finish_command+0xf9/0x150
[<c024748c>] scsi_softirq+0xec/0x240
[<c016f42d>] sys_ioctl+0x15d/0x2e6
[<c0118abd>] smp_apic_timer_interrupt+0xcd/0x140
[<c010975f>] syscall_call+0x7/0xb
Unable to handle kernel paging request at virtual address 6b6b6b7b
printing eip:
e0824534
*pde = 00000000
Oops: 0000 [#1]
CPU: 0
EIP: 0060:[<e0824534>] Tainted: G S
EFLAGS: 00010086
EIP is at idescsi_queue+0x244/0x6d0 [ide_scsi]
eax: 6b6b6b6b ebx: df65eb1c ecx: c0406b48 edx: 00000000
esi: dfd94998 edi: de1c3000 ebp: db233c84 esp: db233c3c
ds: 007b es: 007b ss: 0068
Process sg_dd (pid: 1900, threadinfo=db232000 task=ddcac080)
Stack: c0406b48 dffe587c 00000003 c0246909 dfdff500 00000020 0011b565 c150b400
00000000 dfd949e5 de3d7c38 dffe587c c0406b48 db233c98 0000ea60 de3d7aac
dfd94998 de3d7a80 db233cd4 c0246fc8 dfd94998 c0247320 c024a6d0 db233cb8
Call Trace:
[<c0246909>] __scsi_get_command+0x29/0xc0
[<c0246fc8>] scsi_dispatch_cmd+0x218/0x3e0
[<c0247320>] scsi_done+0x0/0x80
[<c024a6d0>] scsi_times_out+0x0/0x90
[<c024d946>] scsi_prep_fn+0xd6/0x150
[<c024ca70>] scsi_init_cmd_errh+0xa0/0xd0
[<c024dc06>] scsi_request_fn+0x246/0x410
[<c021a9e3>] __elv_add_request+0x33/0x50
[<c021ce1c>] blk_insert_request+0x9c/0xf0
[<c024c819>] scsi_do_req+0x49/0xa0
[<c024c673>] scsi_insert_special_req+0x33/0x40
[<e0893062>] sg_common_write+0x1c2/0x240 [sg]
[<e08944a0>] sg_cmd_done+0x0/0x330 [sg]
[<e0892df9>] sg_new_write+0x1e9/0x290 [sg]
[<c024d9fe>] scsi_request_fn+0x3e/0x410
[<e0893cbf>] sg_ioctl+0xbdf/0xe00 [sg]
[<c021aab6>] elv_next_request+0x16/0x100
[<c024d9fe>] scsi_request_fn+0x3e/0x410
[<c011da9a>] __wake_up_common+0x3a/0x60
[<c016f074>] kill_fasync+0x44/0x4d
[<e0894645>] sg_cmd_done+0x1a5/0x330 [sg]
[<c0247719>] scsi_finish_command+0xf9/0x150
[<c024748c>] scsi_softirq+0xec/0x240
[<c016f42d>] sys_ioctl+0x15d/0x2e6
[<c0118abd>] smp_apic_timer_interrupt+0xcd/0x140
[<c010975f>] syscall_call+0x7/0xb
Code: 8b 40 10 8b 70 34 81 7e 04 ad 4e ad de 74 1c c7 44 24 04 0f
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: ide-scsi error handling
2003-05-18 23:07 ide-scsi error handling Willem Riede
2003-05-19 13:01 ` Douglas Gilbert
@ 2003-05-19 14:36 ` Randy.Dunlap
2003-05-19 14:37 ` Randy.Dunlap
1 sibling, 1 reply; 6+ messages in thread
From: Randy.Dunlap @ 2003-05-19 14:36 UTC (permalink / raw)
To: wrlk; +Cc: linux-scsi
On Sun, 18 May 2003 19:07:06 -0400 Willem Riede <wrlk@riede.org> wrote:
| Now that current 2.5 kernels boot again for me (haven't been able
| to run current kernels for months, but I digress) I've done some
| more work on the error handling in ide-scsi.
|
| The patch below works for me "most of the time". I'd appreciate if
| others try it, and report results, or inspect the patch and comment.
|
| Against 2.5.69-bk13.
Hi,
Here's what I get after applying this patch.
Machine is dual-proc P4 1.7 GHz IBM Netfinity, with 2 ATA hard
drives (hda, hdb), CD-ROM (hdc), CD-RW (hdd), 1 SCSI hard drive.
scsi2 : SCSI host adapter emulation for IDE ATAPI devices
ide-scsi: abort called for 32
hdd: lost interrupt
ide-scsi: CoD != 0 in idescsi_pc_intr
hdd: DMA disabled
hdd: ATAPI reset complete
hdd: irq timeout: status=0x80 { Busy }
hdd: ATAPI reset complete
hdd: irq timeout: status=0x80 { Busy }
hdd: ATAPI reset complete
hdd: irq timeout: status=0x80 { Busy }
I rebooted after > 5 minutes of nothing else logged.
--
~Randy
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: ide-scsi error handling
2003-05-19 14:36 ` Randy.Dunlap
@ 2003-05-19 14:37 ` Randy.Dunlap
0 siblings, 0 replies; 6+ messages in thread
From: Randy.Dunlap @ 2003-05-19 14:37 UTC (permalink / raw)
To: wrlk; +Cc: linux-scsi
On Mon, 19 May 2003 07:36:26 -0700 "Randy.Dunlap" <rddunlap@osdl.org> wrote:
| On Sun, 18 May 2003 19:07:06 -0400 Willem Riede <wrlk@riede.org> wrote:
|
| | Now that current 2.5 kernels boot again for me (haven't been able
| | to run current kernels for months, but I digress) I've done some
| | more work on the error handling in ide-scsi.
| |
| | The patch below works for me "most of the time". I'd appreciate if
| | others try it, and report results, or inspect the patch and comment.
| |
| | Against 2.5.69-bk13.
|
| Hi,
|
| Here's what I get after applying this patch.
| Machine is dual-proc P4 1.7 GHz IBM Netfinity, with 2 ATA hard
| drives (hda, hdb), CD-ROM (hdc), CD-RW (hdd), 1 SCSI hard drive.
|
| scsi2 : SCSI host adapter emulation for IDE ATAPI devices
| ide-scsi: abort called for 32
| hdd: lost interrupt
| ide-scsi: CoD != 0 in idescsi_pc_intr
| hdd: DMA disabled
| hdd: ATAPI reset complete
| hdd: irq timeout: status=0x80 { Busy }
| hdd: ATAPI reset complete
| hdd: irq timeout: status=0x80 { Busy }
| hdd: ATAPI reset complete
| hdd: irq timeout: status=0x80 { Busy }
|
|
| I rebooted after > 5 minutes of nothing else logged.
I didn't make this clear. This is hanging during boot.
I didn't run any userspace programs.
--
~Randy
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: ide-scsi error handling
2003-05-19 13:01 ` Douglas Gilbert
@ 2003-05-19 23:42 ` Willem Riede
2003-05-20 11:24 ` Douglas Gilbert
0 siblings, 1 reply; 6+ messages in thread
From: Willem Riede @ 2003-05-19 23:42 UTC (permalink / raw)
To: dougg; +Cc: linux-scsi, rddunlap
On 2003.05.19 09:01, Douglas Gilbert wrote:
> Debug: sleeping function called from illegal context at include/linux/rwsem.h:43
> Call Trace:
> [<c011f92c>] __might_sleep+0x5c/0x5e
> [<c011b2d8>] do_page_fault+0x78/0x4a8
> [<e0823c4b>] idescsi_transfer_pc+0xfb/0x130 [ide_scsi]
A page fault while in idescsi_transfer_pc?!
What memory would be accessed that is allowed to be paged out?
By the way, I have never seen that problem. When ide-scsi fails for me, it
is in the same way Randy reports. While my change improves mean-time-to-hang
significantly on my machine, it obviously doesn't for Randy. Back to the
drawing board :-(
Thanks, Willem Riede.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: ide-scsi error handling
2003-05-19 23:42 ` Willem Riede
@ 2003-05-20 11:24 ` Douglas Gilbert
0 siblings, 0 replies; 6+ messages in thread
From: Douglas Gilbert @ 2003-05-20 11:24 UTC (permalink / raw)
To: wrlk; +Cc: linux-scsi
[-- Attachment #1: Type: text/plain, Size: 1331 bytes --]
Willem Riede wrote:
> On 2003.05.19 09:01, Douglas Gilbert wrote:
>
>>Debug: sleeping function called from illegal context at include/linux/rwsem.h:43
>>Call Trace:
>> [<c011f92c>] __might_sleep+0x5c/0x5e
>> [<c011b2d8>] do_page_fault+0x78/0x4a8
>> [<e0823c4b>] idescsi_transfer_pc+0xfb/0x130 [ide_scsi]
>
>
> A page fault while in idescsi_transfer_pc?!
> What memory would be accessed that is allowed to be paged out?
>
> By the way, I have never seen that problem. When ide-scsi fails for me, it
> is in the same way Randy reports. While my change improves mean-time-to-hang
> significantly on my machine, it obviously doesn't for Randy. Back to the
> drawing board :-(
Willem,
When I tried today, my test went for a while then
failed with a timeout and an abort lockup (which
you reported as fixed but I don't have that fix):
hdb: irq timeout: status=0xd0 { Busy }
ide-scsi: abort called for 330982
hdb: ATAPI reset complete
<<machine lockup, alt-sysrq inactive>>
Attached is a patch to idescsi_queue(). Won't fix the
problems we are seeing now. Changes:
- returns 0 on error (not 1 which means "busy")
- yield DID_NO_CONNECT for channel, id or lun
invalid (this should fix the "responding to
multiple lun" problem often seen in lk 2.4
- memset the whole of pc and rq to zero
Doug Gilbert
[-- Attachment #2: ide-scsi2569bk13wr_d1.diff --]
[-- Type: text/plain, Size: 1741 bytes --]
--- linux/drivers/scsi/ide-scsi.c 2003-05-19 12:30:34.000000000 +1000
+++ linux/drivers/scsi/ide-scsi.c2569bk13wr_d1 2003-05-20 20:30:46.000000000 +1000
@@ -795,11 +795,20 @@
static int idescsi_queue (Scsi_Cmnd *cmd, void (*done)(Scsi_Cmnd *))
{
+ struct scsi_device * sdev = cmd->device;
idescsi_scsi_t *scsi = scsihost_to_idescsi(cmd->device->host);
ide_drive_t *drive = scsi->drive;
struct request *rq = NULL;
idescsi_pc_t *pc = NULL;
+ if ((sdev->channel > 0) ||
+ (sdev->id >= sdev->host->max_id) ||
+ (sdev->lun >= sdev->host->max_lun)) {
+ printk(KERN_INFO "ide-scsi: channel:id:lun %d:%d:%d not "
+ "present\n", sdev->channel, sdev->id, sdev->lun);
+ cmd->result = DID_NO_CONNECT << 16;
+ goto abort1;
+ }
if (!drive) {
printk (KERN_ERR "ide-scsi: drive id %d not present\n", cmd->device->id);
goto abort;
@@ -811,9 +820,8 @@
printk (KERN_ERR "ide-scsi: %s: out of memory\n", drive->name);
goto abort;
}
-
- memset (pc->c, 0, 12);
- pc->flags = 0;
+ memset(pc, 0, sizeof(idescsi_pc_t));
+ memset(rq, 0, sizeof(struct request));
pc->rq = rq;
memcpy (pc->c, cmd->cmnd, cmd->cmd_len);
if (cmd->use_sg) {
@@ -846,16 +854,17 @@
rq->special = (char *) pc;
rq->bio = idescsi_dma_bio (drive, pc);
rq->flags = REQ_SPECIAL;
- spin_unlock_irq(cmd->device->host->host_lock);
+ spin_unlock_irq(sdev->host->host_lock);
(void) ide_do_drive_cmd (drive, rq, ide_end);
- spin_lock_irq(cmd->device->host->host_lock);
+ spin_lock_irq(sdev->host->host_lock);
return 0;
abort:
+ cmd->result = DID_ERROR << 16;
+abort1:
if (pc) kfree (pc);
if (rq) kfree (rq);
- cmd->result = DID_ERROR << 16;
done(cmd);
- return 1;
+ return 0;
}
static int idescsi_scsi_eh_abort (Scsi_Cmnd *cmd)
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2003-05-20 11:11 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-05-18 23:07 ide-scsi error handling Willem Riede
2003-05-19 13:01 ` Douglas Gilbert
2003-05-19 23:42 ` Willem Riede
2003-05-20 11:24 ` Douglas Gilbert
2003-05-19 14:36 ` Randy.Dunlap
2003-05-19 14:37 ` Randy.Dunlap
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox