[PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312

iommu.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
@ 2013-04-15  7:07 suravee.suthikulpanit-5C7GfCeVMHo
  2013-04-18 16:02 ` Joerg Roedel
  0 siblings, 1 reply; 12+ messages in thread
From: suravee.suthikulpanit-5C7GfCeVMHo @ 2013-04-15  7:07 UTC (permalink / raw)
  To: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	joro-zLv9SwRftAIdnm+yROfE0A
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA

From: Suravee Suthikulpanit <suravee.suthikulpanit-5C7GfCeVMHo@public.gmane.org>

The IOMMU interrupt handling in bottom half must clear the PPR log interrupt
and event log interrupt bits to re-enable the interrupt. This is done by
writing 1 to the memory mapped register to clear the bit. Due to hardware bug,
if the driver tries to clear this bit while the IOMMU hardware also setting
this bit, the conflict will result with the bit being set. If the interrupt
handling code does not make sure to clear this bit, subsequent changes in the
event/PPR logs will no longer generating interrupts, and would result if
buffer overflow. After clearing the bits, the driver must read back
the register to verify.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit-5C7GfCeVMHo@public.gmane.org>
---
 drivers/iommu/amd_iommu.c |  145 +++++++++++++++++++++++++++++----------------
 1 file changed, 93 insertions(+), 52 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index e5db3e5..419af1d 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -803,22 +803,43 @@ retry:
 static void iommu_poll_events(struct amd_iommu *iommu)
 {
 	u32 head, tail;
+	u32 status;
 	unsigned long flags;
 
-	/* enable event interrupts again */
-	writel(MMIO_STATUS_EVT_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
 	spin_lock_irqsave(&iommu->lock, flags);
 
-	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
-	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
+	status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 
-	while (head != tail) {
-		iommu_handle_event(iommu, iommu->evt_buf + head);
-		head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
-	}
+	while (status & MMIO_STATUS_EVT_INT_MASK) {
+
+		/* enable event interrupts again */
+		writel(MMIO_STATUS_EVT_INT_MASK,
+			iommu->mmio_base + MMIO_STATUS_OFFSET);
 
-	writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
+		head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
+		tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
+
+		while (head != tail) {
+			iommu_handle_event(iommu, iommu->evt_buf + head);
+			head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size;
+		}
+
+		writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
+
+		/*
+		 * Hardware bug: When re-enabling interrupt (by writing 1
+		 * to clear the bit), the hardware might also try to set
+		 * the interrupt bit in the event status register.
+		 * In this scenario, the bit will be set, and disable
+		 * subsequent interrupts.
+		 *
+		 * Workaround: The IOMMU driver should read back the
+		 * status register and check if the interrupt bits are cleared.
+		 * If not, driver will need to go through the interrupt handler
+		 * again and re-clear the bits
+		 */
+		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+	}
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
 }
@@ -846,65 +867,85 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
 static void iommu_poll_ppr_log(struct amd_iommu *iommu)
 {
 	unsigned long flags;
+	u32 status;
 	u32 head, tail;
 
 	if (iommu->ppr_log == NULL)
 		return;
 
-	/* enable ppr interrupts again */
-	writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
 	spin_lock_irqsave(&iommu->lock, flags);
 
-	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
-	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+	status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 
-	while (head != tail) {
-		volatile u64 *raw;
-		u64 entry[2];
-		int i;
+	while (status & MMIO_STATUS_PPR_INT_MASK) {
+		/* enable ppr interrupts again */
+		writel(MMIO_STATUS_PPR_INT_MASK,
+			iommu->mmio_base + MMIO_STATUS_OFFSET);
 
-		raw = (u64 *)(iommu->ppr_log + head);
+		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 
-		/*
-		 * Hardware bug: Interrupt may arrive before the entry is
-		 * written to memory. If this happens we need to wait for the
-		 * entry to arrive.
-		 */
-		for (i = 0; i < LOOP_TIMEOUT; ++i) {
-			if (PPR_REQ_TYPE(raw[0]) != 0)
-				break;
-			udelay(1);
-		}
+		while (head != tail) {
+			volatile u64 *raw;
+			u64 entry[2];
+			int i;
 
-		/* Avoid memcpy function-call overhead */
-		entry[0] = raw[0];
-		entry[1] = raw[1];
+			raw = (u64 *)(iommu->ppr_log + head);
 
-		/*
-		 * To detect the hardware bug we need to clear the entry
-		 * back to zero.
-		 */
-		raw[0] = raw[1] = 0UL;
+			/*
+			 * Hardware bug: Interrupt may arrive before the entry
+			 * is written to memory. If this happens we need to wait
+			 * for the entry to arrive.
+			 */
+			for (i = 0; i < LOOP_TIMEOUT; ++i) {
+				if (PPR_REQ_TYPE(raw[0]) != 0)
+					break;
+				udelay(1);
+			}
 
-		/* Update head pointer of hardware ring-buffer */
-		head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
-		writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+			/* Avoid memcpy function-call overhead */
+			entry[0] = raw[0];
+			entry[1] = raw[1];
 
-		/*
-		 * Release iommu->lock because ppr-handling might need to
-		 * re-acquire it
-		 */
-		spin_unlock_irqrestore(&iommu->lock, flags);
+			/*
+			 * To detect the hardware bug we need to clear the entry
+			 * back to zero.
+			 */
+			raw[0] = raw[1] = 0UL;
+
+			/* Update head pointer of hardware ring-buffer */
+			head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
+			writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 
-		/* Handle PPR entry */
-		iommu_handle_ppr_entry(iommu, entry);
+			/*
+			 * Release iommu->lock because ppr-handling might need
+			 * to re-acquire it
+			 */
+			spin_unlock_irqrestore(&iommu->lock, flags);
 
-		spin_lock_irqsave(&iommu->lock, flags);
+			/* Handle PPR entry */
+			iommu_handle_ppr_entry(iommu, entry);
 
-		/* Refresh ring-buffer information */
-		head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
-		tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+			spin_lock_irqsave(&iommu->lock, flags);
+
+			/* Refresh ring-buffer information */
+			head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
+			tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
+		}
+
+		/*
+		 * Hardware bug: When re-enabling interrupt (by writing 1
+		 * to clear the bit), the hardware might also try to set
+		 * the interrupt bit in the event status register.
+		 * In this scenario, the bit will be set, and disable
+		 * subsequent interrupts.
+		 *
+		 * Workaround: The IOMMU driver should read back the
+		 * status register and check if the interrupt bits are cleared.
+		 * If not, driver will need to go through the interrupt handler
+		 * again and re-clear the bits
+		 */
+		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 	}
 
 	spin_unlock_irqrestore(&iommu->lock, flags);
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
  2013-04-15  7:07 [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312 suravee.suthikulpanit-5C7GfCeVMHo
@ 2013-04-18 16:02 ` Joerg Roedel
  2013-04-18 16:13   ` Suravee Suthikulanit
  0 siblings, 1 reply; 12+ messages in thread
From: Joerg Roedel @ 2013-04-18 16:02 UTC (permalink / raw)
  To: suravee.suthikulpanit; +Cc: iommu, linux-kernel

On Mon, Apr 15, 2013 at 02:07:46AM -0500, suravee.suthikulpanit@amd.com wrote:
>  drivers/iommu/amd_iommu.c |  145 +++++++++++++++++++++++++++++----------------

That is way too much for a simple erratum workaround, and too much for a
stable backport. I queued the patch below instead, which has a much
smaller diff and does the same. Please rebase your second patch on-top
of this one and send it again.

>From 4ba052102863da02db79c03d2483b6ad905737ad Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro@8bytes.org>
Date: Thu, 18 Apr 2013 17:55:04 +0200
Subject: [PATCH] iommu/amd: Workaround for ERBT1312

Work around an IOMMU  hardware bug where clearing the
EVT_INT bit in the status register may race with the
hardware trying to set it again. When not handled the bit
might not be cleared and we lose all future event
interrupts.

Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Joerg Roedel <joro@8bytes.org>
---
 drivers/iommu/amd_iommu.c |   17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f42793d..de5ae4b 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -700,14 +700,23 @@ retry:
 
 static void iommu_poll_events(struct amd_iommu *iommu)
 {
-	u32 head, tail;
+	u32 head, tail, status;
 	unsigned long flags;
 
-	/* enable event interrupts again */
-	writel(MMIO_STATUS_EVT_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
 	spin_lock_irqsave(&iommu->lock, flags);
 
+	/* enable event interrupts again */
+	do {
+		/*
+		 * Workaround for Erratum ERBT1312
+		 * Clearing the EVT_INT bit may race in the hardware, so read
+		 * it again and make sure it was really cleared
+		 */
+		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+		writel(MMIO_STATUS_EVT_INT_MASK,
+		       iommu->mmio_base + MMIO_STATUS_OFFSET);
+	} while (status & MMIO_STATUS_EVT_INT_MASK);
+
 	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
  2013-04-18 16:02 ` Joerg Roedel
@ 2013-04-18 16:13   ` Suravee Suthikulanit
       [not found]     ` <51701B9F.10003-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Suravee Suthikulanit @ 2013-04-18 16:13 UTC (permalink / raw)
  To: Joerg Roedel; +Cc: iommu, linux-kernel

Joerg,

This workaround is required for both event log and ppr log.  Your patch 
is only taking care of the event log.

Suravee

On 4/18/2013 11:02 AM, Joerg Roedel wrote:
> On Mon, Apr 15, 2013 at 02:07:46AM -0500, suravee.suthikulpanit@amd.com wrote:
>>   drivers/iommu/amd_iommu.c |  145 +++++++++++++++++++++++++++++----------------
> That is way too much for a simple erratum workaround, and too much for a
> stable backport. I queued the patch below instead, which has a much
> smaller diff and does the same. Please rebase your second patch on-top
> of this one and send it again.
>
>  From 4ba052102863da02db79c03d2483b6ad905737ad Mon Sep 17 00:00:00 2001
> From: Joerg Roedel <joro@8bytes.org>
> Date: Thu, 18 Apr 2013 17:55:04 +0200
> Subject: [PATCH] iommu/amd: Workaround for ERBT1312
>
> Work around an IOMMU  hardware bug where clearing the
> EVT_INT bit in the status register may race with the
> hardware trying to set it again. When not handled the bit
> might not be cleared and we lose all future event
> interrupts.
>
> Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> Cc: stable@vger.kernel.org
> Signed-off-by: Joerg Roedel <joro@8bytes.org>
> ---
>   drivers/iommu/amd_iommu.c |   17 +++++++++++++----
>   1 file changed, 13 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index f42793d..de5ae4b 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -700,14 +700,23 @@ retry:
>   
>   static void iommu_poll_events(struct amd_iommu *iommu)
>   {
> -	u32 head, tail;
> +	u32 head, tail, status;
>   	unsigned long flags;
>   
> -	/* enable event interrupts again */
> -	writel(MMIO_STATUS_EVT_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
> -
>   	spin_lock_irqsave(&iommu->lock, flags);
>   
> +	/* enable event interrupts again */
> +	do {
> +		/*
> +		 * Workaround for Erratum ERBT1312
> +		 * Clearing the EVT_INT bit may race in the hardware, so read
> +		 * it again and make sure it was really cleared
> +		 */
> +		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
> +		writel(MMIO_STATUS_EVT_INT_MASK,
> +		       iommu->mmio_base + MMIO_STATUS_OFFSET);
> +	} while (status & MMIO_STATUS_EVT_INT_MASK);
> +
>   	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
>   	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
>   

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <51701B9F.10003-5C7GfCeVMHo@public.gmane.org>]

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
       [not found]     ` <51701B9F.10003-5C7GfCeVMHo@public.gmane.org>
@ 2013-04-18 16:28       ` Joerg Roedel
       [not found]         ` <20130418162856.GA13891-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Joerg Roedel @ 2013-04-18 16:28 UTC (permalink / raw)
  To: Suravee Suthikulanit
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Thu, Apr 18, 2013 at 11:13:19AM -0500, Suravee Suthikulanit wrote:
> This workaround is required for both event log and ppr log.  Your
> patch is only taking care of the event log.

Right, thanks for the notice. Here is the updated patch.

>From cebe04596989c4b9001e2c1571c4fb219ea37b99 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
Date: Thu, 18 Apr 2013 17:55:04 +0200
Subject: [PATCH] iommu/amd: Workaround for ERBT1312

Work around an IOMMU  hardware bug where clearing the
EVT_INT or PPR_INT bit in the status register may race with
the hardware trying to set it again. When not handled the
bit might not be cleared and we lose all future event or ppr
interrupts.

Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit-5C7GfCeVMHo@public.gmane.org>
Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Signed-off-by: Joerg Roedel <joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
---
 drivers/iommu/amd_iommu.c |   34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index f42793d..27792f8 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -700,14 +700,23 @@ retry:
 
 static void iommu_poll_events(struct amd_iommu *iommu)
 {
-	u32 head, tail;
+	u32 head, tail, status;
 	unsigned long flags;
 
-	/* enable event interrupts again */
-	writel(MMIO_STATUS_EVT_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
 	spin_lock_irqsave(&iommu->lock, flags);
 
+	/* enable event interrupts again */
+	do {
+		/*
+		 * Workaround for Erratum ERBT1312
+		 * Clearing the EVT_INT bit may race in the hardware, so read
+		 * it again and make sure it was really cleared
+		 */
+		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+		writel(MMIO_STATUS_EVT_INT_MASK,
+		       iommu->mmio_base + MMIO_STATUS_OFFSET);
+	} while (status & MMIO_STATUS_EVT_INT_MASK);
+
 	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
 	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
 
@@ -744,16 +753,25 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
 static void iommu_poll_ppr_log(struct amd_iommu *iommu)
 {
 	unsigned long flags;
-	u32 head, tail;
+	u32 head, tail, status;
 
 	if (iommu->ppr_log == NULL)
 		return;
 
-	/* enable ppr interrupts again */
-	writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
-
 	spin_lock_irqsave(&iommu->lock, flags);
 
+	/* enable ppr interrupts again */
+	do {
+		/*
+		 * Workaround for Erratum ERBT1312
+		 * Clearing the PPR_INT bit may race in the hardware, so read
+		 * it again and make sure it was really cleared
+		 */
+		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
+		writel(MMIO_STATUS_PPR_INT_MASK,
+		       iommu->mmio_base + MMIO_STATUS_OFFSET);
+	} while (status & MMIO_STATUS_PPR_INT_MASK);
+
 	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
 	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
 
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 12+ messages in thread

[parent not found: <20130418162856.GA13891-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>]

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
       [not found]         ` <20130418162856.GA13891-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
@ 2013-04-18 16:59           ` Suravee Suthikulanit
       [not found]             ` <5170268E.8080706-5C7GfCeVMHo@public.gmane.org>
  2013-04-23 13:22           ` Don Dutile
  1 sibling, 1 reply; 12+ messages in thread
From: Suravee Suthikulanit @ 2013-04-18 16:59 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

Joerg,

One last concern I have for this patch is the case when we re-enable the 
interrupt, then another interrupt happens while we processing the log 
and set the bit.  If the interrupt thread doesn't check this right 
before the thread exits the handler.  We could still end up leaving the 
interrupt disabled.

Suravee

On 4/18/2013 11:28 AM, Joerg Roedel wrote:
> On Thu, Apr 18, 2013 at 11:13:19AM -0500, Suravee Suthikulanit wrote:
>> This workaround is required for both event log and ppr log.  Your
>> patch is only taking care of the event log.
> Right, thanks for the notice. Here is the updated patch.
>
>  From cebe04596989c4b9001e2c1571c4fb219ea37b99 Mon Sep 17 00:00:00 2001
> From: Joerg Roedel <joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
> Date: Thu, 18 Apr 2013 17:55:04 +0200
> Subject: [PATCH] iommu/amd: Workaround for ERBT1312
>
> Work around an IOMMU  hardware bug where clearing the
> EVT_INT or PPR_INT bit in the status register may race with
> the hardware trying to set it again. When not handled the
> bit might not be cleared and we lose all future event or ppr
> interrupts.
>
> Reported-by: Suravee Suthikulpanit <suravee.suthikulpanit-5C7GfCeVMHo@public.gmane.org>
> Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Signed-off-by: Joerg Roedel <joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
> ---
>   drivers/iommu/amd_iommu.c |   34 ++++++++++++++++++++++++++--------
>   1 file changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index f42793d..27792f8 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -700,14 +700,23 @@ retry:
>   
>   static void iommu_poll_events(struct amd_iommu *iommu)
>   {
> -	u32 head, tail;
> +	u32 head, tail, status;
>   	unsigned long flags;
>   
> -	/* enable event interrupts again */
> -	writel(MMIO_STATUS_EVT_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
> -
>   	spin_lock_irqsave(&iommu->lock, flags);
>   
> +	/* enable event interrupts again */
> +	do {
> +		/*
> +		 * Workaround for Erratum ERBT1312
> +		 * Clearing the EVT_INT bit may race in the hardware, so read
> +		 * it again and make sure it was really cleared
> +		 */
> +		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
> +		writel(MMIO_STATUS_EVT_INT_MASK,
> +		       iommu->mmio_base + MMIO_STATUS_OFFSET);
> +	} while (status & MMIO_STATUS_EVT_INT_MASK);
> +
>   	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
>   	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
>   
> @@ -744,16 +753,25 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
>   static void iommu_poll_ppr_log(struct amd_iommu *iommu)
>   {
>   	unsigned long flags;
> -	u32 head, tail;
> +	u32 head, tail, status;
>   
>   	if (iommu->ppr_log == NULL)
>   		return;
>   
> -	/* enable ppr interrupts again */
> -	writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
> -
>   	spin_lock_irqsave(&iommu->lock, flags);
>   
> +	/* enable ppr interrupts again */
> +	do {
> +		/*
> +		 * Workaround for Erratum ERBT1312
> +		 * Clearing the PPR_INT bit may race in the hardware, so read
> +		 * it again and make sure it was really cleared
> +		 */
> +		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
> +		writel(MMIO_STATUS_PPR_INT_MASK,
> +		       iommu->mmio_base + MMIO_STATUS_OFFSET);
> +	} while (status & MMIO_STATUS_PPR_INT_MASK);
> +
>   	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
>   	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
>   

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <5170268E.8080706-5C7GfCeVMHo@public.gmane.org>]

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
       [not found]             ` <5170268E.8080706-5C7GfCeVMHo@public.gmane.org>
@ 2013-04-18 18:35               ` Joerg Roedel
       [not found]                 ` <20130418183538.GA17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Joerg Roedel @ 2013-04-18 18:35 UTC (permalink / raw)
  To: Suravee Suthikulanit
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Thu, Apr 18, 2013 at 11:59:58AM -0500, Suthikulpanit, Suravee wrote:
> One last concern I have for this patch is the case when we re-enable
> the interrupt, then another interrupt happens while we processing
> the log and set the bit.  If the interrupt thread doesn't check this
> right before the thread exits the handler.  We could still end up
> leaving the interrupt disabled.

That can't happen, the patch checks whether the bit is really 0 and then
it processes the event/ppr-log entries. If any new entry is queued while
we process the logs another interrupt will be fired and the irq-thread
will run again. So we will not miss any log entry.


	Joerg

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <20130418183538.GA17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>]

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
       [not found]                 ` <20130418183538.GA17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
@ 2013-04-18 18:56                   ` Suravee Suthikulpanit
       [not found]                     ` <517041EA.70407-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Suravee Suthikulpanit @ 2013-04-18 18:56 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 4/18/2013 1:35 PM, Joerg Roedel wrote:
> On Thu, Apr 18, 2013 at 11:59:58AM -0500, Suthikulpanit, Suravee wrote:
>> One last concern I have for this patch is the case when we re-enable
>> the interrupt, then another interrupt happens while we processing
>> the log and set the bit.  If the interrupt thread doesn't check this
>> right before the thread exits the handler.  We could still end up
>> leaving the interrupt disabled.
> That can't happen, the patch checks whether the bit is really 0 and then
> it processes the event/ppr-log entries. If any new entry is queued while
> we process the logs another interrupt will be fired and the irq-thread
> will run again. So we will not miss any log entry.
According to the "kernel/irq/handle.c:irq_wake_thread()", I thought that 
for the threaded IRQ, if the system getting a new interrupt from the 
device while the thread is running, it will just return and do nothing.

Suravee

> 	Joerg
>
>
>

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <517041EA.70407-5C7GfCeVMHo@public.gmane.org>]

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
       [not found]                     ` <517041EA.70407-5C7GfCeVMHo@public.gmane.org>
@ 2013-04-18 20:06                       ` Joerg Roedel
       [not found]                         ` <20130418200613.GB17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Joerg Roedel @ 2013-04-18 20:06 UTC (permalink / raw)
  To: Suravee Suthikulpanit
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Thu, Apr 18, 2013 at 01:56:42PM -0500, Suthikulpanit, Suravee wrote:
> On 4/18/2013 1:35 PM, Joerg Roedel wrote:

> According to the "kernel/irq/handle.c:irq_wake_thread()", I thought
> that for the threaded IRQ, if the system getting a new interrupt
> from the device while the thread is running, it will just return and
> do nothing.

Yes, but the irq-thread function itself executes the handler function
repeatedly until the IRQTF_RUNTHREAD bit is cleared. And every new
interrupt will set this bit again. So when there is a new interrupt
while our handler function runs the handler will be called again by the
irq-thread.


	Joerg

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <20130418200613.GB17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>]

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
       [not found]                         ` <20130418200613.GB17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
@ 2013-04-22 21:43                           ` Suravee Suthikulanit
  0 siblings, 0 replies; 12+ messages in thread
From: Suravee Suthikulanit @ 2013-04-22 21:43 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 4/18/2013 3:06 PM, Joerg Roedel wrote:
> Yes, but the irq-thread function itself executes the handler function
> repeatedly until the IRQTF_RUNTHREAD bit is cleared. And every new
> interrupt will set this bit again. So when there is a new interrupt
> while our handler function runs the handler will be called again by the
> irq-thread.
>
>
> 	Joerg
Thank you for the clarification.  I have submitted a new patch based on 
the new workaround code. 
(http://lists.linuxfoundation.org/pipermail/iommu/2013-April/005574.html)

Suravee

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
       [not found]         ` <20130418162856.GA13891-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
  2013-04-18 16:59           ` Suravee Suthikulanit
@ 2013-04-23 13:22           ` Don Dutile
       [not found]             ` <51768B25.1060501-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
  1 sibling, 1 reply; 12+ messages in thread
From: Don Dutile @ 2013-04-23 13:22 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 04/18/2013 12:28 PM, Joerg Roedel wrote:
> On Thu, Apr 18, 2013 at 11:13:19AM -0500, Suravee Suthikulanit wrote:
>> This workaround is required for both event log and ppr log.  Your
>> patch is only taking care of the event log.
>
> Right, thanks for the notice. Here is the updated patch.
>
>  From cebe04596989c4b9001e2c1571c4fb219ea37b99 Mon Sep 17 00:00:00 2001
> From: Joerg Roedel<joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
> Date: Thu, 18 Apr 2013 17:55:04 +0200
> Subject: [PATCH] iommu/amd: Workaround for ERBT1312
>
> Work around an IOMMU  hardware bug where clearing the
> EVT_INT or PPR_INT bit in the status register may race with
> the hardware trying to set it again. When not handled the
> bit might not be cleared and we lose all future event or ppr
> interrupts.
>
> Reported-by: Suravee Suthikulpanit<suravee.suthikulpanit-5C7GfCeVMHo@public.gmane.org>
> Cc: stable-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> Signed-off-by: Joerg Roedel<joro-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
> ---
>   drivers/iommu/amd_iommu.c |   34 ++++++++++++++++++++++++++--------
>   1 file changed, 26 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
> index f42793d..27792f8 100644
> --- a/drivers/iommu/amd_iommu.c
> +++ b/drivers/iommu/amd_iommu.c
> @@ -700,14 +700,23 @@ retry:
>
>   static void iommu_poll_events(struct amd_iommu *iommu)
>   {
> -	u32 head, tail;
> +	u32 head, tail, status;
>   	unsigned long flags;
>
> -	/* enable event interrupts again */
> -	writel(MMIO_STATUS_EVT_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
> -
>   	spin_lock_irqsave(&iommu->lock, flags);
>
> +	/* enable event interrupts again */
> +	do {
> +		/*
> +		 * Workaround for Erratum ERBT1312
> +		 * Clearing the EVT_INT bit may race in the hardware, so read
> +		 * it again and make sure it was really cleared
> +		 */
> +		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
> +		writel(MMIO_STATUS_EVT_INT_MASK,
> +		       iommu->mmio_base + MMIO_STATUS_OFFSET);
> +	} while (status&  MMIO_STATUS_EVT_INT_MASK);
> +
>   	head = readl(iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
>   	tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET);
>
> @@ -744,16 +753,25 @@ static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
>   static void iommu_poll_ppr_log(struct amd_iommu *iommu)
>   {
>   	unsigned long flags;
> -	u32 head, tail;
> +	u32 head, tail, status;
>
>   	if (iommu->ppr_log == NULL)
>   		return;
>
> -	/* enable ppr interrupts again */
> -	writel(MMIO_STATUS_PPR_INT_MASK, iommu->mmio_base + MMIO_STATUS_OFFSET);
> -
>   	spin_lock_irqsave(&iommu->lock, flags);
>
> +	/* enable ppr interrupts again */
> +	do {
> +		/*
> +		 * Workaround for Erratum ERBT1312
> +		 * Clearing the PPR_INT bit may race in the hardware, so read
> +		 * it again and make sure it was really cleared
> +		 */
> +		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
> +		writel(MMIO_STATUS_PPR_INT_MASK,
> +		       iommu->mmio_base + MMIO_STATUS_OFFSET);
> +	} while (status&  MMIO_STATUS_PPR_INT_MASK);
> +
>   	head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
>   	tail = readl(iommu->mmio_base + MMIO_PPR_TAIL_OFFSET);
>
Given other threads on this mail list (and I've seen crashes with same problem)
where this type of logging during a flood of IOMMU errors will lock up the machine,
is there something that can be done to break the do-while loop after n iterations
have been exec'd, so the kernel can progress during a crash ?

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <51768B25.1060501-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>]

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
       [not found]             ` <51768B25.1060501-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
@ 2013-04-24 10:46               ` Joerg Roedel
       [not found]                 ` <20130424104616.GH17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
  0 siblings, 1 reply; 12+ messages in thread
From: Joerg Roedel @ 2013-04-24 10:46 UTC (permalink / raw)
  To: Don Dutile
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On Tue, Apr 23, 2013 at 09:22:45AM -0400, Don Dutile wrote:
> Given other threads on this mail list (and I've seen crashes with same problem)
> where this type of logging during a flood of IOMMU errors will lock up the machine,
> is there something that can be done to break the do-while loop after n iterations
> have been exec'd, so the kernel can progress during a crash ?

In the case of an IOMMU error flood this loop will only run until the
event-log/ppr-log overflows. So it should not turn into an endless loop.


	Joerg

^ permalink raw reply	[flat|nested] 12+ messages in thread

[parent not found: <20130424104616.GH17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>]

* Re: [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312
       [not found]                 ` <20130424104616.GH17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
@ 2013-04-24 13:52                   ` Don Dutile
  0 siblings, 0 replies; 12+ messages in thread
From: Don Dutile @ 2013-04-24 13:52 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: iommu-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

On 04/24/2013 06:46 AM, Joerg Roedel wrote:
> On Tue, Apr 23, 2013 at 09:22:45AM -0400, Don Dutile wrote:
>> Given other threads on this mail list (and I've seen crashes with same problem)
>> where this type of logging during a flood of IOMMU errors will lock up the machine,
>> is there something that can be done to break the do-while loop after n iterations
>> have been exec'd, so the kernel can progress during a crash ?
>
> In the case of an IOMMU error flood this loop will only run until the
> event-log/ppr-log overflows. So it should not turn into an endless loop.
>
>
> 	Joerg
>
>
Thanks for verification.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-04-24 13:52 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-15  7:07 [PATCH 1/2 V2] iommu/amd: Add workaround for ERBT1312 suravee.suthikulpanit-5C7GfCeVMHo
2013-04-18 16:02 ` Joerg Roedel
2013-04-18 16:13   ` Suravee Suthikulanit
     [not found]     ` <51701B9F.10003-5C7GfCeVMHo@public.gmane.org>
2013-04-18 16:28       ` Joerg Roedel
     [not found]         ` <20130418162856.GA13891-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2013-04-18 16:59           ` Suravee Suthikulanit
     [not found]             ` <5170268E.8080706-5C7GfCeVMHo@public.gmane.org>
2013-04-18 18:35               ` Joerg Roedel
     [not found]                 ` <20130418183538.GA17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2013-04-18 18:56                   ` Suravee Suthikulpanit
     [not found]                     ` <517041EA.70407-5C7GfCeVMHo@public.gmane.org>
2013-04-18 20:06                       ` Joerg Roedel
     [not found]                         ` <20130418200613.GB17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2013-04-22 21:43                           ` Suravee Suthikulanit
2013-04-23 13:22           ` Don Dutile
     [not found]             ` <51768B25.1060501-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2013-04-24 10:46               ` Joerg Roedel
     [not found]                 ` <20130424104616.GH17148-zLv9SwRftAIdnm+yROfE0A@public.gmane.org>
2013-04-24 13:52                   ` Don Dutile

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).