linux-ide.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* PATA Sil680 Disabling IRQ
@ 2008-02-27  0:47 Fajun Chen
  2008-02-27  0:58 ` Jeff Garzik
  0 siblings, 1 reply; 9+ messages in thread
From: Fajun Chen @ 2008-02-27  0:47 UTC (permalink / raw)
  To: linux-ide@vger.kernel.org; +Cc: Mark Lord, Tejun Heo

[-- Attachment #1: Type: text/plain, Size: 1763 bytes --]

Hi All,

Attached is the source code to Identify Device through sg (courtesy of
Mark Lord).  I intentionally change the data transfer from
SG_DXFER_FROM_DEV to SG_DXFER_TO_DEV to expose the problem.  When the
code is run on SATA Sil3124 controller,  it's working well. But when
it is run on PATA Sil680,  IRQ was disabled right away (see dmesg log
below). I have a old kernel 2.6.18 rc2 but I suspect the same problem
exists in latest kernel release as well.  Please execute above code in
your system and see what happens.

dmesg log:
[1204070913 936910] irq 15: nobody cared (try booting with the "irqpoll" option)
[1204070913 936978]  [<c0127037>] __report_bad_irq+0x2b/0x69
[1204070913 937039]  [<c012720f>] note_interrupt+0x19a/0x1d3
[1204070913 937089]  [<c0126848>] handle_IRQ_event+0x21/0x47
[1204070913 937138]  [<c01268de>] __do_IRQ+0x70/0x9f
[1204070913 937183]  [<c01044a9>] do_IRQ+0x43/0x52
[1204070913 937236]  [<c0102d7a>] common_interrupt+0x1a/0x20
[1204070913 937283]  [<c0114d58>] __do_softirq+0x27/0x6e
[1204070913 937341]  [<c0114dc1>] do_softirq+0x22/0x26
[1204070913 937386]  [<c01044ae>] do_IRQ+0x48/0x52
[1204070913 937430]  [<c0102d7a>] common_interrupt+0x1a/0x20
[1204070913 937478]  [<c01014b4>] default_idle+0x31/0x59
[1204070913 937523]  [<c010151b>] cpu_idle+0x3f/0x57
[1204070913 937568]  [<c02de63a>] start_kernel+0x2ba/0x2bc
[1204070913 937617] handlers:
[1204070913 937651] [<c01e2bb0>] (ata_interrupt+0x0/0x173)
[1204070913 937703] Disabling IRQ #15
[1204070917 801261] ata5 port frozen
[1204070917 801335] ata5.00: exception Emask 0x0 SAct 0x0 SErr 0x0
action 0x0 frozen
[1204070917 801407] ata5.00: tag 0 cmd 0xec Emask 0x6 stat 0x58 err
0x0 (timeout)
[1204070917 801476] ata5.00: lba 0x0 hob_lba 0x0 device 0x0

Thanks,
Fajun

[-- Attachment #2: sg_identify.c --]
[-- Type: application/octet-stream, Size: 3676 bytes --]

/*
 * This code is copyright 2007 by Mark Lord,
 * and is made available to all under the terms
 * of the GNU General Public License v2.
 */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>

#include <linux/fs.h>
#include <linux/hdreg.h>
#include <scsi/scsi.h>
#include <scsi/sg.h>
#include <sys/mman.h>

typedef unsigned long long u64;

enum {
	ATA_CMD_PIO_IDENTIFY		= 0xec,
	ATA_CMD_PIO_PIDENTIFY		= 0xa1,

	/* normal sector size (bytes) for PIO/DMA */
	ATA_SECT_SIZE			= 512,

	ATA_16				= 0x85,
	ATA_16_LEN			= 16,

	ATA_DEV_REG_LBA			= (1 << 6),

	ATA_LBA48			= 1,

	/* data transfer protocols; only basic PIO and DMA actually work */
	ATA_PROTO_NON_DATA		= ( 3 << 1),
	ATA_PROTO_PIO_IN		= ( 4 << 1),
	ATA_PROTO_PIO_OUT		= ( 5 << 1),
	ATA_PROTO_DMA			= ( 6 << 1),
	ATA_PROTO_UDMA_IN		= (11 << 1), /* unsupported */
	ATA_PROTO_UDMA_OUT		= (12 << 1), /* unsupported */
};

/*
 * Taskfile layout for ATA_16 cdb (LBA28/LBA48):
 *
 *	cdb[ 4] = feature
 *	cdb[ 6] = nsect
 *	cdb[ 8] = lbal
 *	cdb[10] = lbam
 *	cdb[12] = lbah
 *	cdb[13] = device
 *	cdb[14] = command
 *
 * "high order byte" (hob) fields for LBA48 commands:
 *
 *	cdb[ 3] = hob_feature
 *	cdb[ 5] = hob_nsect
 *	cdb[ 7] = hob_lbal
 *	cdb[ 9] = hob_lbam
 *	cdb[11] = hob_lbah
 *
 * dxfer_direction choices:
 *
 *	SG_DXFER_TO_DEV		(writing to drive)
 *	SG_DXFER_FROM_DEV	(reading from drive)
 *	SG_DXFER_NONE		(non-data commands)
 */

static int sg_issue (int fd, unsigned char ata_op, void *buf)
{
	unsigned char cdb[ATA_16_LEN]
		= { ATA_16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
	unsigned char sense[32];
	unsigned int nsects = 1;
	struct sg_io_hdr hdr;

	cdb[ 1] = ATA_PROTO_PIO_IN;
	cdb[ 6] = nsects;
	cdb[14] = ata_op;

	memset(&hdr, 0, sizeof(struct sg_io_hdr));
	hdr.interface_id	= 'S';
	hdr.cmd_len		= ATA_16_LEN;
	hdr.mx_sb_len		= sizeof(sense);
	hdr.dxfer_direction	= SG_DXFER_TO_DEV;
	hdr.dxfer_len		= nsects * ATA_SECT_SIZE;
	hdr.dxferp		= buf;
	hdr.cmdp		= cdb;
	hdr.sbp			= sense;
	hdr.timeout		= 5000; /* milliseconds */

	memset(sense, 0, sizeof(sense));
	if (ioctl(fd, SG_IO, &hdr) < 0) {
		perror("ioctl(SG_IO)");
		return (-1);
	}
	if (hdr.status == 0 && hdr.host_status == 0 && hdr.driver_status == 0)
		return 0; /* success */

	if (hdr.status > 0) {
		unsigned char *d = sense + 8;
		/* SCSI status is non-zero */
		fprintf(stderr, "SG_IO error: SCSI sense=0x%x/%02x/%02x, ATA=0x%02x/%02x\n",
			sense[1] & 0xf, sense[2], sense[3], d[13], d[3]);
		return -1;
	}
	/* some other error we don't know about yet */
	fprintf(stderr, "SG_IO returned: SCSI status=0x%x, host_status=0x%x, driver_status=0x%x\n",
		hdr.status, hdr.host_status, hdr.driver_status);
	return -1;
}

int main (int argc, char *argv[])
{
	const char *devpath;
	int i, rc, fd;
#if 0
	unsigned short id[ATA_SECT_SIZE / 2];
	memset(id, 0, sizeof(id));
#else
	unsigned short *id;
	id = mmap(NULL, getpagesize(), PROT_READ|PROT_WRITE, MAP_SHARED|MAP_ANONYMOUS, -1, 0);
	if (id == MAP_FAILED) {
		perror("mmap");
		exit(1);
	}
#endif
	if (argc != 2) {
		fprintf(stderr, "%s: bad/missing parm: expected <devpath>\n", argv[0]);
		exit(1);
	}
	devpath = argv[1];

	fd = open(devpath, O_RDWR|O_NONBLOCK);
	if (fd == -1) {
		perror(devpath);
		exit(1);
	}
	rc = sg_issue(fd, ATA_CMD_PIO_IDENTIFY, id);
	if (rc != 0)
		rc = sg_issue(fd, ATA_CMD_PIO_PIDENTIFY, id);
	if (rc == 0) {
		unsigned short *d = id;
		for (i = 0; i < (256/8); ++i) {
			printf("%04x %04x %04x %04x %04x %04x %04x %04x\n",
				d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]);
			d += 8;
		}
		exit(0);
	}
	exit(1);
}

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATA Sil680 Disabling IRQ
  2008-02-27  0:47 PATA Sil680 Disabling IRQ Fajun Chen
@ 2008-02-27  0:58 ` Jeff Garzik
  2008-02-28  0:20   ` Fajun Chen
  0 siblings, 1 reply; 9+ messages in thread
From: Jeff Garzik @ 2008-02-27  0:58 UTC (permalink / raw)
  To: Fajun Chen; +Cc: linux-ide@vger.kernel.org, Mark Lord, Tejun Heo

Fajun Chen wrote:
> Hi All,
> 
> Attached is the source code to Identify Device through sg (courtesy of
> Mark Lord).  I intentionally change the data transfer from
> SG_DXFER_FROM_DEV to SG_DXFER_TO_DEV to expose the problem.  When the
> code is run on SATA Sil3124 controller,  it's working well. But when
> it is run on PATA Sil680,  IRQ was disabled right away (see dmesg log
> below). I have a old kernel 2.6.18 rc2 but I suspect the same problem
> exists in latest kernel release as well.  Please execute above code in
> your system and see what happens.

root has permission to do anything -- including misprogram the device 
resulting in a lockup / data corruption / nuclear explosion.

We do not work around problems that only present themselves when a 
privileged administrator is doing something really wrong.

	Jeff




^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATA Sil680 Disabling IRQ
  2008-02-27  0:58 ` Jeff Garzik
@ 2008-02-28  0:20   ` Fajun Chen
  2008-02-28 20:22     ` Alan Cox
  0 siblings, 1 reply; 9+ messages in thread
From: Fajun Chen @ 2008-02-28  0:20 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: linux-ide@vger.kernel.org, Mark Lord, Tejun Heo

On 2/26/08, Jeff Garzik <jeff@garzik.org> wrote:
> Fajun Chen wrote:
> > Hi All,
> >
> > Attached is the source code to Identify Device through sg (courtesy of
> > Mark Lord).  I intentionally change the data transfer from
> > SG_DXFER_FROM_DEV to SG_DXFER_TO_DEV to expose the problem.  When the
> > code is run on SATA Sil3124 controller,  it's working well. But when
> > it is run on PATA Sil680,  IRQ was disabled right away (see dmesg log
> > below). I have a old kernel 2.6.18 rc2 but I suspect the same problem
> > exists in latest kernel release as well.  Please execute above code in
> > your system and see what happens.
>
> root has permission to do anything -- including misprogram the device
> resulting in a lockup / data corruption / nuclear explosion.
>
> We do not work around problems that only present themselves when a
> privileged administrator is doing something really wrong.
>

PIO read/writes with wrong direction is not the only failure mode,
PATA sil680 also failed with disabling IRQ with some unsupported
commands such as Trusted Send (0x5E) even with perfect TF data. Given
that some ATA commands are optional,  we may have a chance to hit the
trap even with well programmed code.

What would take to harden the PATA ISR code such that it fails more gracefully?

Thanks,
Fajun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATA Sil680 Disabling IRQ
  2008-02-28  0:20   ` Fajun Chen
@ 2008-02-28 20:22     ` Alan Cox
  2008-02-28 22:11       ` Fajun Chen
  0 siblings, 1 reply; 9+ messages in thread
From: Alan Cox @ 2008-02-28 20:22 UTC (permalink / raw)
  To: Fajun Chen; +Cc: Jeff Garzik, linux-ide@vger.kernel.org, Mark Lord, Tejun Heo

> PIO read/writes with wrong direction is not the only failure mode,
> PATA sil680 also failed with disabling IRQ with some unsupported
> commands such as Trusted Send (0x5E) even with perfect TF data. Given
> that some ATA commands are optional,  we may have a chance to hit the
> trap even with well programmed code.

That sounds very strange - I regularly test PATA controlles with
unsupported commands and see the correct 0x04 abort patterns.

> What would take to harden the PATA ISR code such that it fails more gracefully?

First thing would be to work out why your system is behaving differently
to the others. What is the trigger here.

Alan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATA Sil680 Disabling IRQ
  2008-02-28 20:22     ` Alan Cox
@ 2008-02-28 22:11       ` Fajun Chen
  2008-02-28 22:24         ` Jeff Garzik
  2008-02-28 23:10         ` Alan Cox
  0 siblings, 2 replies; 9+ messages in thread
From: Fajun Chen @ 2008-02-28 22:11 UTC (permalink / raw)
  To: Alan Cox; +Cc: Jeff Garzik, linux-ide@vger.kernel.org, Mark Lord, Tejun Heo

On 2/28/08, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > PIO read/writes with wrong direction is not the only failure mode,
>  > PATA sil680 also failed with disabling IRQ with some unsupported
>  > commands such as Trusted Send (0x5E) even with perfect TF data. Given
>  > that some ATA commands are optional,  we may have a chance to hit the
>  > trap even with well programmed code.
>
>
> That sounds very strange - I regularly test PATA controlles with
>  unsupported commands and see the correct 0x04 abort patterns.
>
>
>  > What would take to harden the PATA ISR code such that it fails more gracefully?
>
>
> First thing would be to work out why your system is behaving differently
>  to the others. What is the trigger here.
>
For most of unsupported commands, it will be aborted by drive.
However, for some unsupported commands, it may not.  I suspect these
bad commands are the new ones in ATA8 issued to some drives with old
firmware. For instance, can you try command 0x5E (Trusted Send PIO
data out) with sector count set to 1 and see what happens?

The blame is probably on drives which should have aborted these
commands. But the reality is that libata will handle variety of drives
including the ones with old firmware.  So the question here is whether
libata PATA code can be more fault tolerate. It seems the weakest link
is on PATA PIO since I have not been able to reproduce the IRQ
disabling problem on DMA operations.

Just my 2 cents.

Thanks,
Fajun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATA Sil680 Disabling IRQ
  2008-02-28 22:11       ` Fajun Chen
@ 2008-02-28 22:24         ` Jeff Garzik
  2008-02-28 23:10         ` Alan Cox
  1 sibling, 0 replies; 9+ messages in thread
From: Jeff Garzik @ 2008-02-28 22:24 UTC (permalink / raw)
  To: Fajun Chen; +Cc: Alan Cox, linux-ide@vger.kernel.org, Mark Lord, Tejun Heo

Fajun Chen wrote:
> On 2/28/08, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
>>> PIO read/writes with wrong direction is not the only failure mode,
>>  > PATA sil680 also failed with disabling IRQ with some unsupported
>>  > commands such as Trusted Send (0x5E) even with perfect TF data. Given
>>  > that some ATA commands are optional,  we may have a chance to hit the
>>  > trap even with well programmed code.
>>
>>
>> That sounds very strange - I regularly test PATA controlles with
>>  unsupported commands and see the correct 0x04 abort patterns.
>>
>>
>>  > What would take to harden the PATA ISR code such that it fails more gracefully?
>>
>>
>> First thing would be to work out why your system is behaving differently
>>  to the others. What is the trigger here.
>>
> For most of unsupported commands, it will be aborted by drive.
> However, for some unsupported commands, it may not.  I suspect these
> bad commands are the new ones in ATA8 issued to some drives with old
> firmware. For instance, can you try command 0x5E (Trusted Send PIO
> data out) with sector count set to 1 and see what happens?
> 
> The blame is probably on drives which should have aborted these
> commands. But the reality is that libata will handle variety of drives
> including the ones with old firmware.  So the question here is whether
> libata PATA code can be more fault tolerate. It seems the weakest link
> is on PATA PIO since I have not been able to reproduce the IRQ
> disabling problem on DMA operations.


With regards to many controllers -- and I think sil680 is one of those 
-- they snoop commands send to the device, in order to set some internal 
parameters (generally, guessing the taskfile protocol of the command).

As such, you cannot assume that all controllers will gracefully abort 
unknown commands.

Again, we are in the realm of privileged administrators creating 
scenarios which are not designed for general production use.

I support efforts to "harden" libata, but its a judgement call when it 
comes to random, unpredictable scenarios that only root may create. 
There are an infinite variety of such scenarios.

	Jeff



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATA Sil680 Disabling IRQ
  2008-02-28 22:11       ` Fajun Chen
  2008-02-28 22:24         ` Jeff Garzik
@ 2008-02-28 23:10         ` Alan Cox
  2008-02-29  1:07           ` Fajun Chen
  1 sibling, 1 reply; 9+ messages in thread
From: Alan Cox @ 2008-02-28 23:10 UTC (permalink / raw)
  To: Fajun Chen; +Cc: Jeff Garzik, linux-ide@vger.kernel.org, Mark Lord, Tejun Heo

> For most of unsupported commands, it will be aborted by drive.
> However, for some unsupported commands, it may not.  I suspect these
> bad commands are the new ones in ATA8 issued to some drives with old
> firmware. For instance, can you try command 0x5E (Trusted Send PIO
> data out) with sector count set to 1 and see what happens?

Well the kernel as of 2.6.24 defaults to blocking treacherous computing
commands 8)

> The blame is probably on drives which should have aborted these
> commands. But the reality is that libata will handle variety of drives
> including the ones with old firmware.  So the question here is whether
> libata PATA code can be more fault tolerate. It seems the weakest link
> is on PATA PIO since I have not been able to reproduce the IRQ
> disabling problem on DMA operations.

If you send crap to a drive you will get junk as a result. Only the
superuser can do this so that behaviour is fine. The superuser can also
crash the machine a million other ways. End users cannot send arbitary
commands to the drive.

Ditto they may know that an "unsupported" command for their ATA version
is actually a vendor private command for the specific drive they have.

Alan

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATA Sil680 Disabling IRQ
  2008-02-28 23:10         ` Alan Cox
@ 2008-02-29  1:07           ` Fajun Chen
  2008-02-29 11:21             ` Alan Cox
  0 siblings, 1 reply; 9+ messages in thread
From: Fajun Chen @ 2008-02-29  1:07 UTC (permalink / raw)
  To: Alan Cox; +Cc: Jeff Garzik, linux-ide@vger.kernel.org, Mark Lord, Tejun Heo

On 2/28/08, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > For most of unsupported commands, it will be aborted by drive.
>  > However, for some unsupported commands, it may not.  I suspect these
>  > bad commands are the new ones in ATA8 issued to some drives with old
>  > firmware. For instance, can you try command 0x5E (Trusted Send PIO
>  > data out) with sector count set to 1 and see what happens?
>
>
> Well the kernel as of 2.6.24 defaults to blocking treacherous computing
>  commands 8)
>
Whether a command is "treacherous" really depends.  For instance,
command 0x5E is treacherous for PATA drives not supporting it but is
good on security drives supporting it.

In a perfect world, host would know what exact command to issue. But
in reality, Identify Device may not show every feature set supported,
which is the scenario I run into.  I have to use the "try and see"
approach on some drives where I tripped on the IRQ disabling problem.

>
>  > The blame is probably on drives which should have aborted these
>  > commands. But the reality is that libata will handle variety of drives
>  > including the ones with old firmware.  So the question here is whether
>  > libata PATA code can be more fault tolerate. It seems the weakest link
>  > is on PATA PIO since I have not been able to reproduce the IRQ
>  > disabling problem on DMA operations.
>
>
> If you send crap to a drive you will get junk as a result. Only the
>  superuser can do this so that behaviour is fine. The superuser can also
>  crash the machine a million other ways. End users cannot send arbitary
>  commands to the drive.
>
>  Ditto they may know that an "unsupported" command for their ATA version
>  is actually a vendor private command for the specific drive they have.
>
I agree with you and Jeff that we can not prevent superuser from
corrupting the system.  What I want to understand more is if there's
room to harden PATA PIO code in libata.  For instance, when I set
wrong data transfer direction in DMA read, I got command timeout,
which to me is a more graceful failure than IRQ disabling.  I'm not
sure if the difference of failure mode between PIO and DMA is caused
by controller or software.  If the cause is software,  it would be
nice to close the gap.

Thanks,
Fajun

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: PATA Sil680 Disabling IRQ
  2008-02-29  1:07           ` Fajun Chen
@ 2008-02-29 11:21             ` Alan Cox
  0 siblings, 0 replies; 9+ messages in thread
From: Alan Cox @ 2008-02-29 11:21 UTC (permalink / raw)
  To: Fajun Chen; +Cc: Jeff Garzik, linux-ide@vger.kernel.org, Mark Lord, Tejun Heo

> I agree with you and Jeff that we can not prevent superuser from
> corrupting the system.  What I want to understand more is if there's
> room to harden PATA PIO code in libata.  For instance, when I set
> wrong data transfer direction in DMA read, I got command timeout,
> which to me is a more graceful failure than IRQ disabling.  I'm not
> sure if the difference of failure mode between PIO and DMA is caused
> by controller or software.  If the cause is software,  it would be
> nice to close the gap.

Its almost impossible to tell - I can't duplicate the case you see. When
I issue wrong commands I see the expected timeout/fail. It may be that
Tejun's drain patches for the PIO fifo are going to help in your case I
don't know. Nothing obvious in the SIL680 docs hit me

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2008-02-29 11:33 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-02-27  0:47 PATA Sil680 Disabling IRQ Fajun Chen
2008-02-27  0:58 ` Jeff Garzik
2008-02-28  0:20   ` Fajun Chen
2008-02-28 20:22     ` Alan Cox
2008-02-28 22:11       ` Fajun Chen
2008-02-28 22:24         ` Jeff Garzik
2008-02-28 23:10         ` Alan Cox
2008-02-29  1:07           ` Fajun Chen
2008-02-29 11:21             ` Alan Cox

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).