* [PATCH] ahci: improve and limit spurious interrupt messages
@ 2007-01-15 9:01 Tejun Heo
2007-01-16 6:52 ` [PATCH] ahci: improve and limit spurious interrupt messages, take#2 Tejun Heo
0 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2007-01-15 9:01 UTC (permalink / raw)
To: Jeff Garzik, linux-ide
We're still seeing a lot of issues with NCQ implementation in drive
firmwares. Sprious FISes during NCQ command phase occur on many
drives and some of them seem potentially dangerous (at least to me).
Until we find the solution, spurious messages can give us more info.
Improve and limit them such that more info can be reported while not
disturbing users too much.
Signed-off-by: Tejun Heo <htejun@gmail.com>
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 5998f74..00c6bcc 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -75,6 +75,7 @@ enum {
AHCI_CMD_CLR_BUSY = (1 << 10),
RX_FIS_D2H_REG = 0x40, /* offset of D2H Register FIS data */
+ RX_FIS_SDB = 0x58, /* offset of SDB FIS data */
RX_FIS_UNK = 0x60, /* offset of Unknown FIS data */
board_ahci = 0,
@@ -202,6 +203,10 @@ struct ahci_port_priv {
dma_addr_t cmd_tbl_dma;
void *rx_fis;
dma_addr_t rx_fis_dma;
+ /* for NCQ spurious interrupt analysis */
+ int ncq_saw_spurious_sdb_cnt;
+ unsigned int ncq_saw_d2h:1;
+ unsigned int ncq_saw_dmas:1;
};
static u32 ahci_scr_read (struct ata_port *ap, unsigned int sc_reg);
@@ -1126,6 +1131,7 @@ static void ahci_host_intr(struct ata_port *ap)
void __iomem *mmio = ap->host->mmio_base;
void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no);
struct ata_eh_info *ehi = &ap->eh_info;
+ struct ahci_port_priv *pp = ap->private_data;
u32 status, qc_active;
int rc;
@@ -1154,17 +1160,43 @@ static void ahci_host_intr(struct ata_port *ap)
/* hmmm... a spurious interupt */
- /* some devices send D2H reg with I bit set during NCQ command phase */
- if (ap->sactive && (status & PORT_IRQ_D2H_REG_FIS))
+ /* if !NCQ, ignore. No modern ATA device has broken HSM
+ * implementation for non-NCQ commands.
+ */
+ if (!ap->sactive)
return;
- /* ignore interim PIO setup fis interrupts */
- if (ata_tag_valid(ap->active_tag) && (status & PORT_IRQ_PIOS_FIS))
- return;
+ if (status & PORT_IRQ_D2H_REG_FIS) {
+ if (!pp->ncq_saw_d2h)
+ ata_port_printk(ap, KERN_INFO,
+ "D2H reg with I during NCQ, "
+ "this message won't be printed again\n");
+ pp->ncq_saw_d2h = 1;
+ } else if (status & PORT_IRQ_DMAS_FIS) {
+ if (!pp->ncq_saw_dmas)
+ ata_port_printk(ap, KERN_INFO,
+ "DMAS FIS during NCQ, "
+ "this message won't be printed again\n");
+ pp->ncq_saw_dmas = 1;
+ } else if (status & PORT_IRQ_SDB_FIS &&
+ pp->ncq_saw_spurious_sdb_cnt < 10) {
+ /* SDB FIS containing spurious completions might be
+ * dangerous, we need to know more about them. Print
+ * more of it.
+ */
+ const u32 *f = pp->rx_fis + RX_FIS_SDB;
+
+ pp->ncq_saw_spurious_sdb_cnt++;
- if (ata_ratelimit())
+ ata_port_printk(ap, KERN_INFO, "Spurious SDB FIS during NCQ "
+ "issue=0x%x SAct=0x%x FIS=%08x:%08x%s\n",
+ readl(port_mmio + PORT_CMD_ISSUE),
+ readl(port_mmio + PORT_SCR_ACT), f[0], f[1],
+ pp->ncq_saw_spurious_sdb_cnt < 10 ?
+ "" : ", shutting up");
+ } else
ata_port_printk(ap, KERN_INFO, "spurious interrupt "
- "(irq_stat 0x%x active_tag %d sactive 0x%x)\n",
+ "(irq_stat 0x%x active_tag 0x%x sactive 0x%x)\n",
status, ap->active_tag, ap->sactive);
}
^ permalink raw reply related [flat|nested] 9+ messages in thread* [PATCH] ahci: improve and limit spurious interrupt messages, take#2
2007-01-15 9:01 [PATCH] ahci: improve and limit spurious interrupt messages Tejun Heo
@ 2007-01-16 6:52 ` Tejun Heo
2007-01-25 1:24 ` Jeff Garzik
0 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2007-01-16 6:52 UTC (permalink / raw)
To: Jeff Garzik, linux-ide
We're still seeing a lot of issues with NCQ implementation in drive
firmwares. Sprious FISes during NCQ command phase occur on many
drives and some of them seem potentially dangerous (at least to me).
Until we find the solution, spurious messages can give us more info.
Improve and limit them such that more info can be reported while not
disturbing users too much.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
Updated to not use bitfields as requested. Thanks.
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 5998f74..d5ea1c3 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -75,6 +75,7 @@ enum {
AHCI_CMD_CLR_BUSY = (1 << 10),
RX_FIS_D2H_REG = 0x40, /* offset of D2H Register FIS data */
+ RX_FIS_SDB = 0x58, /* offset of SDB FIS data */
RX_FIS_UNK = 0x60, /* offset of Unknown FIS data */
board_ahci = 0,
@@ -202,6 +203,9 @@ struct ahci_port_priv {
dma_addr_t cmd_tbl_dma;
void *rx_fis;
dma_addr_t rx_fis_dma;
+ /* for NCQ spurious interrupt analysis */
+ int spurious_sdb_cnt;
+ u32 seen_status;
};
static u32 ahci_scr_read (struct ata_port *ap, unsigned int sc_reg);
@@ -1126,6 +1130,7 @@ static void ahci_host_intr(struct ata_port *ap)
void __iomem *mmio = ap->host->mmio_base;
void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no);
struct ata_eh_info *ehi = &ap->eh_info;
+ struct ahci_port_priv *pp = ap->private_data;
u32 status, qc_active;
int rc;
@@ -1154,17 +1159,40 @@ static void ahci_host_intr(struct ata_port *ap)
/* hmmm... a spurious interupt */
- /* some devices send D2H reg with I bit set during NCQ command phase */
- if (ap->sactive && (status & PORT_IRQ_D2H_REG_FIS))
+ /* if !NCQ, ignore. No modern ATA device has broken HSM
+ * implementation for non-NCQ commands.
+ */
+ if (!ap->sactive)
return;
- /* ignore interim PIO setup fis interrupts */
- if (ata_tag_valid(ap->active_tag) && (status & PORT_IRQ_PIOS_FIS))
- return;
+ if ((status & PORT_IRQ_D2H_REG_FIS) &&
+ !(pp->seen_status & PORT_IRQ_D2H_REG_FIS)) {
+ ata_port_printk(ap, KERN_INFO, "D2H reg with I during NCQ, "
+ "this message won't be printed again\n");
+ pp->seen_status |= PORT_IRQ_D2H_REG_FIS;
+ } else if ((status & PORT_IRQ_DMAS_FIS) &&
+ !(pp->seen_status & PORT_IRQ_DMAS_FIS)) {
+ ata_port_printk(ap, KERN_INFO, "DMAS FIS during NCQ, "
+ "this message won't be printed again\n");
+ pp->seen_status |= PORT_IRQ_DMAS_FIS;
+ } else if (status & PORT_IRQ_SDB_FIS && pp->spurious_sdb_cnt < 10) {
+ /* SDB FIS containing spurious completions might be
+ * dangerous, we need to know more about them. Print
+ * more of it.
+ */
+ const u32 *f = pp->rx_fis + RX_FIS_SDB;
+
+ pp->spurious_sdb_cnt++;
- if (ata_ratelimit())
+ ata_port_printk(ap, KERN_INFO, "Spurious SDB FIS during NCQ "
+ "issue=0x%x SAct=0x%x FIS=%08x:%08x%s\n",
+ readl(port_mmio + PORT_CMD_ISSUE),
+ readl(port_mmio + PORT_SCR_ACT), f[0], f[1],
+ pp->spurious_sdb_cnt < 10 ?
+ "" : ", shutting up");
+ } else
ata_port_printk(ap, KERN_INFO, "spurious interrupt "
- "(irq_stat 0x%x active_tag %d sactive 0x%x)\n",
+ "(irq_stat 0x%x active_tag 0x%x sactive 0x%x)\n",
status, ap->active_tag, ap->sactive);
}
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: [PATCH] ahci: improve and limit spurious interrupt messages, take#2
2007-01-16 6:52 ` [PATCH] ahci: improve and limit spurious interrupt messages, take#2 Tejun Heo
@ 2007-01-25 1:24 ` Jeff Garzik
2007-01-25 7:55 ` Tejun Heo
0 siblings, 1 reply; 9+ messages in thread
From: Jeff Garzik @ 2007-01-25 1:24 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-ide
Tejun Heo wrote:
> - /* ignore interim PIO setup fis interrupts */
> - if (ata_tag_valid(ap->active_tag) && (status & PORT_IRQ_PIOS_FIS))
> - return;
> + if ((status & PORT_IRQ_D2H_REG_FIS) &&
> + !(pp->seen_status & PORT_IRQ_D2H_REG_FIS)) {
> + ata_port_printk(ap, KERN_INFO, "D2H reg with I during NCQ, "
> + "this message won't be printed again\n");
> + pp->seen_status |= PORT_IRQ_D2H_REG_FIS;
> + } else if ((status & PORT_IRQ_DMAS_FIS) &&
> + !(pp->seen_status & PORT_IRQ_DMAS_FIS)) {
> + ata_port_printk(ap, KERN_INFO, "DMAS FIS during NCQ, "
> + "this message won't be printed again\n");
> + pp->seen_status |= PORT_IRQ_DMAS_FIS;
> + } else if (status & PORT_IRQ_SDB_FIS && pp->spurious_sdb_cnt < 10) {
> + /* SDB FIS containing spurious completions might be
> + * dangerous, we need to know more about them. Print
> + * more of it.
> + */
> + const u32 *f = pp->rx_fis + RX_FIS_SDB;
This if/else/else tree does not take into account the possiblity that
more than one bit may be set.
Jeff
^ permalink raw reply [flat|nested] 9+ messages in thread* Re: [PATCH] ahci: improve and limit spurious interrupt messages, take#2
2007-01-25 1:24 ` Jeff Garzik
@ 2007-01-25 7:55 ` Tejun Heo
2007-01-25 10:16 ` ahci: improve and limit spurious interrupt messages, take#3 Tejun Heo
0 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2007-01-25 7:55 UTC (permalink / raw)
To: Jeff Garzik; +Cc: linux-ide
Jeff Garzik wrote:
> Tejun Heo wrote:
>> - /* ignore interim PIO setup fis interrupts */
>> - if (ata_tag_valid(ap->active_tag) && (status & PORT_IRQ_PIOS_FIS))
>> - return;
>> + if ((status & PORT_IRQ_D2H_REG_FIS) &&
>> + !(pp->seen_status & PORT_IRQ_D2H_REG_FIS)) {
>> + ata_port_printk(ap, KERN_INFO, "D2H reg with I during NCQ, "
>> + "this message won't be printed again\n");
>> + pp->seen_status |= PORT_IRQ_D2H_REG_FIS;
>> + } else if ((status & PORT_IRQ_DMAS_FIS) &&
>> + !(pp->seen_status & PORT_IRQ_DMAS_FIS)) {
>> + ata_port_printk(ap, KERN_INFO, "DMAS FIS during NCQ, "
>> + "this message won't be printed again\n");
>> + pp->seen_status |= PORT_IRQ_DMAS_FIS;
>> + } else if (status & PORT_IRQ_SDB_FIS && pp->spurious_sdb_cnt < 10) {
>> + /* SDB FIS containing spurious completions might be
>> + * dangerous, we need to know more about them. Print
>> + * more of it.
>> + */
>> + const u32 *f = pp->rx_fis + RX_FIS_SDB;
>
>
> This if/else/else tree does not take into account the possiblity that
> more than one bit may be set.
Thought it wouldn't really matter. Will fix.
--
tejun
^ permalink raw reply [flat|nested] 9+ messages in thread* ahci: improve and limit spurious interrupt messages, take#3
2007-01-25 7:55 ` Tejun Heo
@ 2007-01-25 10:16 ` Tejun Heo
2007-01-25 22:24 ` Jeff Garzik
2007-01-26 2:04 ` Jeff Garzik
0 siblings, 2 replies; 9+ messages in thread
From: Tejun Heo @ 2007-01-25 10:16 UTC (permalink / raw)
To: Jeff Garzik; +Cc: linux-ide
We're still seeing a lot of issues with NCQ implementation in drive
firmwares. Sprious FISes during NCQ command phase occur on many
drives and some of them seem potentially dangerous (at least to me).
Until we find the solution, spurious messages can give us more info.
Improve and limit them such that more info can be reported while not
disturbing users too much.
Signed-off-by: Tejun Heo <htejun@gmail.com>
---
Fixed as advised. Thanks.
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 5998f74..659949a 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -75,6 +75,7 @@ enum {
AHCI_CMD_CLR_BUSY = (1 << 10),
RX_FIS_D2H_REG = 0x40, /* offset of D2H Register FIS data */
+ RX_FIS_SDB = 0x58, /* offset of SDB FIS data */
RX_FIS_UNK = 0x60, /* offset of Unknown FIS data */
board_ahci = 0,
@@ -202,6 +203,10 @@ struct ahci_port_priv {
dma_addr_t cmd_tbl_dma;
void *rx_fis;
dma_addr_t rx_fis_dma;
+ /* for NCQ spurious interrupt analysis */
+ int ncq_saw_spurious_sdb_cnt;
+ unsigned int ncq_saw_d2h:1;
+ unsigned int ncq_saw_dmas:1;
};
static u32 ahci_scr_read (struct ata_port *ap, unsigned int sc_reg);
@@ -1126,8 +1131,9 @@ static void ahci_host_intr(struct ata_port *ap)
void __iomem *mmio = ap->host->mmio_base;
void __iomem *port_mmio = ahci_port_base(mmio, ap->port_no);
struct ata_eh_info *ehi = &ap->eh_info;
+ struct ahci_port_priv *pp = ap->private_data;
u32 status, qc_active;
- int rc;
+ int rc, known_irq = 0;
status = readl(port_mmio + PORT_IRQ_STAT);
writel(status, port_mmio + PORT_IRQ_STAT);
@@ -1154,17 +1160,52 @@ static void ahci_host_intr(struct ata_port *ap)
/* hmmm... a spurious interupt */
- /* some devices send D2H reg with I bit set during NCQ command phase */
- if (ap->sactive && (status & PORT_IRQ_D2H_REG_FIS))
+ /* if !NCQ, ignore. No modern ATA device has broken HSM
+ * implementation for non-NCQ commands.
+ */
+ if (!ap->sactive)
return;
- /* ignore interim PIO setup fis interrupts */
- if (ata_tag_valid(ap->active_tag) && (status & PORT_IRQ_PIOS_FIS))
- return;
+ if (status & PORT_IRQ_D2H_REG_FIS) {
+ if (!pp->ncq_saw_d2h)
+ ata_port_printk(ap, KERN_INFO,
+ "D2H reg with I during NCQ, "
+ "this message won't be printed again\n");
+ pp->ncq_saw_d2h = 1;
+ known_irq = 1;
+ }
+
+ if (status & PORT_IRQ_DMAS_FIS) {
+ if (!pp->ncq_saw_dmas)
+ ata_port_printk(ap, KERN_INFO,
+ "DMAS FIS during NCQ, "
+ "this message won't be printed again\n");
+ pp->ncq_saw_dmas = 1;
+ known_irq = 1;
+ }
+
+ if (status & PORT_IRQ_SDB_FIS &&
+ pp->ncq_saw_spurious_sdb_cnt < 10) {
+ /* SDB FIS containing spurious completions might be
+ * dangerous, we need to know more about them. Print
+ * more of it.
+ */
+ const u32 *f = pp->rx_fis + RX_FIS_SDB;
+
+ ata_port_printk(ap, KERN_INFO, "Spurious SDB FIS during NCQ "
+ "issue=0x%x SAct=0x%x FIS=%08x:%08x%s\n",
+ readl(port_mmio + PORT_CMD_ISSUE),
+ readl(port_mmio + PORT_SCR_ACT), f[0], f[1],
+ pp->ncq_saw_spurious_sdb_cnt < 10 ?
+ "" : ", shutting up");
+
+ pp->ncq_saw_spurious_sdb_cnt++;
+ known_irq = 1;
+ }
- if (ata_ratelimit())
+ if (!known_irq)
ata_port_printk(ap, KERN_INFO, "spurious interrupt "
- "(irq_stat 0x%x active_tag %d sactive 0x%x)\n",
+ "(irq_stat 0x%x active_tag 0x%x sactive 0x%x)\n",
status, ap->active_tag, ap->sactive);
}
^ permalink raw reply related [flat|nested] 9+ messages in thread* Re: ahci: improve and limit spurious interrupt messages, take#3
2007-01-25 10:16 ` ahci: improve and limit spurious interrupt messages, take#3 Tejun Heo
@ 2007-01-25 22:24 ` Jeff Garzik
2007-01-26 2:04 ` Jeff Garzik
1 sibling, 0 replies; 9+ messages in thread
From: Jeff Garzik @ 2007-01-25 22:24 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-ide
Tejun Heo wrote:
> We're still seeing a lot of issues with NCQ implementation in drive
> firmwares. Sprious FISes during NCQ command phase occur on many
> drives and some of them seem potentially dangerous (at least to me).
> Until we find the solution, spurious messages can give us more info.
> Improve and limit them such that more info can be reported while not
> disturbing users too much.
>
> Signed-off-by: Tejun Heo <htejun@gmail.com>
> ---
> Fixed as advised. Thanks.
applied to #upstream-fixes
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: ahci: improve and limit spurious interrupt messages, take#3
2007-01-25 10:16 ` ahci: improve and limit spurious interrupt messages, take#3 Tejun Heo
2007-01-25 22:24 ` Jeff Garzik
@ 2007-01-26 2:04 ` Jeff Garzik
2007-01-26 5:47 ` [PATCH] ahci: fix endianness in spurious interrupt message Tejun Heo
1 sibling, 1 reply; 9+ messages in thread
From: Jeff Garzik @ 2007-01-26 2:04 UTC (permalink / raw)
To: Tejun Heo; +Cc: linux-ide
Tejun Heo wrote:
> + const u32 *f = pp->rx_fis + RX_FIS_SDB;
> +
> + ata_port_printk(ap, KERN_INFO, "Spurious SDB FIS during NCQ "
> + "issue=0x%x SAct=0x%x FIS=%08x:%08x%s\n",
> + readl(port_mmio + PORT_CMD_ISSUE),
> + readl(port_mmio + PORT_SCR_ACT), f[0], f[1],
Direct access to f[] would appear to be an endian problem...
Jeff
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH] ahci: fix endianness in spurious interrupt message
2007-01-26 2:04 ` Jeff Garzik
@ 2007-01-26 5:47 ` Tejun Heo
2007-01-26 22:24 ` Jeff Garzik
0 siblings, 1 reply; 9+ messages in thread
From: Tejun Heo @ 2007-01-26 5:47 UTC (permalink / raw)
To: Jeff Garzik; +Cc: linux-ide
Fix endianness in spurious interrupt message.
Signed-off-by: Tejun Heo <htejun@gmail.com>
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 2fe5a58..d8f0ce9 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1178,7 +1178,8 @@ static void ahci_host_intr(struct ata_port *ap)
ata_port_printk(ap, KERN_INFO, "Spurious SDB FIS during NCQ "
"issue=0x%x SAct=0x%x FIS=%08x:%08x%s\n",
readl(port_mmio + PORT_CMD_ISSUE),
- readl(port_mmio + PORT_SCR_ACT), f[0], f[1],
+ readl(port_mmio + PORT_SCR_ACT),
+ le32_to_cpu(f[0]), le32_to_cpu(f[1]),
pp->ncq_saw_spurious_sdb_cnt < 10 ?
"" : ", shutting up");
^ permalink raw reply related [flat|nested] 9+ messages in thread
end of thread, other threads:[~2007-01-26 22:24 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-15 9:01 [PATCH] ahci: improve and limit spurious interrupt messages Tejun Heo
2007-01-16 6:52 ` [PATCH] ahci: improve and limit spurious interrupt messages, take#2 Tejun Heo
2007-01-25 1:24 ` Jeff Garzik
2007-01-25 7:55 ` Tejun Heo
2007-01-25 10:16 ` ahci: improve and limit spurious interrupt messages, take#3 Tejun Heo
2007-01-25 22:24 ` Jeff Garzik
2007-01-26 2:04 ` Jeff Garzik
2007-01-26 5:47 ` [PATCH] ahci: fix endianness in spurious interrupt message Tejun Heo
2007-01-26 22:24 ` Jeff Garzik
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).