All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1 of 2] x86/x2apic: Sandy-Bridge BT98 Erratum
@ 2013-04-26 18:11 Andrew Cooper
  2013-04-26 18:11 ` [PATCH 2 of 2] x86/VT-d: " Andrew Cooper
  2013-04-29  9:04 ` [PATCH 1 of 2] x86/x2apic: " Jan Beulich
  0 siblings, 2 replies; 6+ messages in thread
From: Andrew Cooper @ 2013-04-26 18:11 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser, Xiantao Zhang, Jan Beulich

Reference:
http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-family-spec-update.pdf

Disable x2apic on affected systems.

This requires exposing apic_boot_mode outside of apic.c

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

diff -r be00287ccdf0 -r 89ba0b0192c4 xen/arch/x86/apic.c
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -78,7 +78,7 @@ boolean_param("x2apic", opt_x2apic);
  * Bootstrap processor local APIC boot mode - so we can undo our changes
  * to the APIC state.
  */
-static enum apic_mode apic_boot_mode = APIC_MODE_INVALID;
+enum apic_mode apic_boot_mode = APIC_MODE_INVALID;
 
 bool_t __read_mostly x2apic_enabled = 0;
 bool_t __read_mostly directed_eoi_enabled = 0;
diff -r be00287ccdf0 -r 89ba0b0192c4 xen/arch/x86/genapic/probe.c
--- a/xen/arch/x86/genapic/probe.c
+++ b/xen/arch/x86/genapic/probe.c
@@ -56,12 +56,51 @@ static void __init genapic_apic_force(ch
 }
 custom_param("apic", genapic_apic_force);
 
+/* Xeon E5 Family processors (Sandy-Bridge) suffer from erratum BT98, which
+ * affects Stepping C-1, but is reported fixed in Stepping C-2.
+ *
+ * This causes system instability when using x2apic and VT-d queued
+ * invalidation.  The workaround is to disable x2apic and VT-d.
+ */
+static void __init smb_bt98_erratum(void)
+{
+	const struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if (!(c->x86_vendor == X86_VENDOR_INTEL &&
+	      c->x86 == 6 &&
+	      c->x86_model == 0x2d &&
+	      c->x86_mask == 0x6))
+		return;
+
+	printk(KERN_WARNING
+	       "Disabling x2apic due to Sandy-Bridge BT98 erratum\n");
+
+	clear_bit(X86_FEATURE_X2APIC, boot_cpu_data.x86_capability);
+	x2apic_enabled = 0;
+
+	/* If the BIOS started us in x2apic mode, switch back to xapic. */
+	if (apic_boot_mode == APIC_MODE_X2APIC) {
+		uint64_t msr;
+
+		rdmsrl(MSR_IA32_APICBASE, msr);
+		msr &= ~(MSR_IA32_APICBASE_ENABLE|MSR_IA32_APICBASE_EXTD);
+		wrmsrl(MSR_IA32_APICBASE, msr);
+		msr |= MSR_IA32_APICBASE_ENABLE;
+		wrmsrl(MSR_IA32_APICBASE, msr);
+
+		apic_boot_mode = APIC_MODE_XAPIC;
+	}
+}
+
 void __init generic_apic_probe(void) 
 { 
 	int i, changed;
 
 	record_boot_APIC_mode();
 
+        /* Must be before check_x2apic_preenabled() */
+	smb_bt98_erratum();
+
 	check_x2apic_preenabled();
 	cmdline_apic = changed = (genapic != NULL);
 
diff -r be00287ccdf0 -r 89ba0b0192c4 xen/include/asm-x86/apic.h
--- a/xen/include/asm-x86/apic.h
+++ b/xen/include/asm-x86/apic.h
@@ -28,6 +28,7 @@ enum apic_mode {
     APIC_MODE_XAPIC,    /* xAPIC mode - default upon chipset reset */
     APIC_MODE_X2APIC    /* x2APIC mode - common for large MP machines */
 };
+extern enum apic_mode apic_boot_mode;
 
 extern u8 apic_verbosity;
 extern bool_t x2apic_enabled;

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2 of 2] x86/VT-d: Sandy-Bridge BT98 Erratum
  2013-04-26 18:11 [PATCH 1 of 2] x86/x2apic: Sandy-Bridge BT98 Erratum Andrew Cooper
@ 2013-04-26 18:11 ` Andrew Cooper
  2013-04-29 15:13   ` Jan Beulich
  2013-04-29  9:04 ` [PATCH 1 of 2] x86/x2apic: " Jan Beulich
  1 sibling, 1 reply; 6+ messages in thread
From: Andrew Cooper @ 2013-04-26 18:11 UTC (permalink / raw)
  To: xen-devel; +Cc: Keir Fraser, Xiantao Zhang, Jan Beulich

Reference:
http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/xeon-e5-family-spec-update.pdf

Disable the IOMMU on affected systems.

Specifying iommu=no-intremap on the Xen command line will allow use of basic
VT-d functionality without suffering the system instability, albeit with the
security problems associated with disabling interrupt remapping.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>

diff -r 89ba0b0192c4 -r 9c2aed177e25 xen/drivers/passthrough/vtd/quirks.c
--- a/xen/drivers/passthrough/vtd/quirks.c
+++ b/xen/drivers/passthrough/vtd/quirks.c
@@ -267,6 +267,27 @@ static void __init tylersburg_intremap_q
     }
 }
 
+/* Xeon E5 Family processors (Sandy-Bridge) suffer from erratum BT98, which
+ * affects Stepping C-1, but is reported fixed in Stepping C-2.
+ *
+ * This causes system instability when using x2apic and VT-d queued
+ * invalidation.  The workaround is to disable x2apic and VT-d.
+ */
+static void __init snb_bt98_erratum(void)
+{
+    const struct cpuinfo_x86 *c = &boot_cpu_data;
+
+    if ( !( c->x86_vendor == X86_VENDOR_INTEL &&
+            c->x86 == 6 &&
+            c->x86_model == 0x2d &&
+            c->x86_mask == 0x6 ) )
+        return;
+
+    printk(XENLOG_WARNING VTDPREFIX
+           " Disabling IOMMU due to Sandy-Bridge BT98 erratum\n");
+    iommu_enable = 0;
+}
+
 /* initialize platform identification flags */
 void __init platform_quirks_init(void)
 {
@@ -288,9 +309,12 @@ void __init platform_quirks_init(void)
     /* ioremap IGD MMIO+0x2000 page */
     map_igd_reg();
 
-    /* Tylersburg interrupt remap quirk */
+    /* Interrupt remapping quirks */
     if ( iommu_intremap )
+    {
         tylersburg_intremap_quirk();
+        snb_bt98_erratum();
+    }
 }
 
 /*

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1 of 2] x86/x2apic: Sandy-Bridge BT98 Erratum
  2013-04-26 18:11 [PATCH 1 of 2] x86/x2apic: Sandy-Bridge BT98 Erratum Andrew Cooper
  2013-04-26 18:11 ` [PATCH 2 of 2] x86/VT-d: " Andrew Cooper
@ 2013-04-29  9:04 ` Jan Beulich
  2013-04-29 10:25   ` Andrew Cooper
  1 sibling, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2013-04-29  9:04 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Keir Fraser, Xiantao Zhang, xen-devel

>>> On 26.04.13 at 20:11, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> +/* Xeon E5 Family processors (Sandy-Bridge) suffer from erratum BT98, which
> + * affects Stepping C-1, but is reported fixed in Stepping C-2.
> + *
> + * This causes system instability when using x2apic and VT-d queued
> + * invalidation.  The workaround is to disable x2apic and VT-d.
> + */
> +static void __init smb_bt98_erratum(void)

snb_bt98_erratum() perhaps? And is this really limited to just one
specific product line (usually the same microarchitecture gets used
for multiple ones, and then the BT98 reference would become
confusing)?

> +{
> +	const struct cpuinfo_x86 *c = &boot_cpu_data;
> +
> +	if (!(c->x86_vendor == X86_VENDOR_INTEL &&
> +	      c->x86 == 6 &&
> +	      c->x86_model == 0x2d &&
> +	      c->x86_mask == 0x6))
> +		return;
> +
> +	printk(KERN_WARNING
> +	       "Disabling x2apic due to Sandy-Bridge BT98 erratum\n");
> +
> +	clear_bit(X86_FEATURE_X2APIC, boot_cpu_data.x86_capability);
> +	x2apic_enabled = 0;
> +

Up here it's fine.

> +	/* If the BIOS started us in x2apic mode, switch back to xapic. */
> +	if (apic_boot_mode == APIC_MODE_X2APIC) {
> +		uint64_t msr;
> +
> +		rdmsrl(MSR_IA32_APICBASE, msr);
> +		msr &= ~(MSR_IA32_APICBASE_ENABLE|MSR_IA32_APICBASE_EXTD);
> +		wrmsrl(MSR_IA32_APICBASE, msr);
> +		msr |= MSR_IA32_APICBASE_ENABLE;
> +		wrmsrl(MSR_IA32_APICBASE, msr);
> +
> +		apic_boot_mode = APIC_MODE_XAPIC;
> +	}

But how good are our chances that this won't cause subsequent
problems? The BIOS may e.g. have assigned APIC IDs wider than
what xAPIC can deal with. I think we ought to consider denying
to boot in that case altogether. How does Linux deal with the
erratum?

> +}
> +
>  void __init generic_apic_probe(void) 
>  { 
>  	int i, changed;
>  
>  	record_boot_APIC_mode();
>  
> +        /* Must be before check_x2apic_preenabled() */

Indentation (in case you need to resubmit anyway).

> +	smb_bt98_erratum();

Perhaps worth moving into check_x2apic_preenabled() instead,
keeping x2apic related code as much as possible in x2apic.c?

> +
>  	check_x2apic_preenabled();
>  	cmdline_apic = changed = (genapic != NULL);
>  

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1 of 2] x86/x2apic: Sandy-Bridge BT98 Erratum
  2013-04-29  9:04 ` [PATCH 1 of 2] x86/x2apic: " Jan Beulich
@ 2013-04-29 10:25   ` Andrew Cooper
  0 siblings, 0 replies; 6+ messages in thread
From: Andrew Cooper @ 2013-04-29 10:25 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Keir (Xen.org), Xiantao Zhang, xen-devel@lists.xen.org

On 29/04/13 10:04, Jan Beulich wrote:
>>>> On 26.04.13 at 20:11, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> +/* Xeon E5 Family processors (Sandy-Bridge) suffer from erratum BT98, which
>> + * affects Stepping C-1, but is reported fixed in Stepping C-2.
>> + *
>> + * This causes system instability when using x2apic and VT-d queued
>> + * invalidation.  The workaround is to disable x2apic and VT-d.
>> + */
>> +static void __init smb_bt98_erratum(void)
> snb_bt98_erratum() perhaps? And is this really limited to just one
> specific product line (usually the same microarchitecture gets used
> for multiple ones, and then the BT98 reference would become
> confusing)?

Oops - I did mean snb.  However, I would request guidance from Intel as
to what the best naming policy would be.

>
>> +{
>> +	const struct cpuinfo_x86 *c = &boot_cpu_data;
>> +
>> +	if (!(c->x86_vendor == X86_VENDOR_INTEL &&
>> +	      c->x86 == 6 &&
>> +	      c->x86_model == 0x2d &&
>> +	      c->x86_mask == 0x6))
>> +		return;
>> +
>> +	printk(KERN_WARNING
>> +	       "Disabling x2apic due to Sandy-Bridge BT98 erratum\n");
>> +
>> +	clear_bit(X86_FEATURE_X2APIC, boot_cpu_data.x86_capability);
>> +	x2apic_enabled = 0;
>> +
> Up here it's fine.
>
>> +	/* If the BIOS started us in x2apic mode, switch back to xapic. */
>> +	if (apic_boot_mode == APIC_MODE_X2APIC) {
>> +		uint64_t msr;
>> +
>> +		rdmsrl(MSR_IA32_APICBASE, msr);
>> +		msr &= ~(MSR_IA32_APICBASE_ENABLE|MSR_IA32_APICBASE_EXTD);
>> +		wrmsrl(MSR_IA32_APICBASE, msr);
>> +		msr |= MSR_IA32_APICBASE_ENABLE;
>> +		wrmsrl(MSR_IA32_APICBASE, msr);
>> +
>> +		apic_boot_mode = APIC_MODE_XAPIC;
>> +	}
> But how good are our chances that this won't cause subsequent
> problems? The BIOS may e.g. have assigned APIC IDs wider than
> what xAPIC can deal with. I think we ought to consider denying
> to boot in that case altogether. How does Linux deal with the
> erratum?

I guess "Works on my affected silicon" is not a good enough indicator,
although in that case it was booting in xapic mode not x2apic mode.

A quick look over linux mainline suggests that there is no workaround
present for this erratum.

As for APIC IDs, the CPU IDs are fixed, and the IO-APIC id on the PCH is
4 bits wide.  Unhelpfully, the SDM states that the BIOS should read all
the LAPIC IDs, and if any single one is greater than 255, the BIOS must
hand over to the OS in x2apic mode.

The code in this patch runs before the APs are booted, so querying their
LAPIC IDs is somewhat hard.  Alternatively, doing a synchronised switch
from x2apic to xapic once the APs are running is likely to also be
problematic, but might be our only option.

>
>> +}
>> +
>>  void __init generic_apic_probe(void) 
>>  { 
>>  	int i, changed;
>>  
>>  	record_boot_APIC_mode();
>>  
>> +        /* Must be before check_x2apic_preenabled() */
> Indentation (in case you need to resubmit anyway).
>
>> +	smb_bt98_erratum();
> Perhaps worth moving into check_x2apic_preenabled() instead,
> keeping x2apic related code as much as possible in x2apic.c?

Ok.

>
>> +
>>  	check_x2apic_preenabled();
>>  	cmdline_apic = changed = (genapic != NULL);
>>  
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2 of 2] x86/VT-d: Sandy-Bridge BT98 Erratum
  2013-04-26 18:11 ` [PATCH 2 of 2] x86/VT-d: " Andrew Cooper
@ 2013-04-29 15:13   ` Jan Beulich
  2013-04-29 15:23     ` Andrew Cooper
  0 siblings, 1 reply; 6+ messages in thread
From: Jan Beulich @ 2013-04-29 15:13 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Keir Fraser, Xiantao Zhang, xen-devel

>>> On 26.04.13 at 20:11, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> @@ -288,9 +309,12 @@ void __init platform_quirks_init(void)
>      /* ioremap IGD MMIO+0x2000 page */
>      map_igd_reg();
>  
> -    /* Tylersburg interrupt remap quirk */
> +    /* Interrupt remapping quirks */
>      if ( iommu_intremap )
> +    {
>          tylersburg_intremap_quirk();
> +        snb_bt98_erratum();
> +    }

Considering the nature of the erratum, keying this off
iommu_intremap seems wrong - iommu_qinval ought to be used
instead - intel_vtd_setup() takes care to disable interrupt
remapping when queued invalidation is not available, yet when
you make the above conditional upon iommu_intremap, queued
invalidation could still get enabled (and used for whatever else
purposes, now or in the future).

Jan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2 of 2] x86/VT-d: Sandy-Bridge BT98 Erratum
  2013-04-29 15:13   ` Jan Beulich
@ 2013-04-29 15:23     ` Andrew Cooper
  0 siblings, 0 replies; 6+ messages in thread
From: Andrew Cooper @ 2013-04-29 15:23 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Keir (Xen.org), Xiantao Zhang, xen-devel@lists.xen.org

On 29/04/13 16:13, Jan Beulich wrote:
>>>> On 26.04.13 at 20:11, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> @@ -288,9 +309,12 @@ void __init platform_quirks_init(void)
>>      /* ioremap IGD MMIO+0x2000 page */
>>      map_igd_reg();
>>  
>> -    /* Tylersburg interrupt remap quirk */
>> +    /* Interrupt remapping quirks */
>>      if ( iommu_intremap )
>> +    {
>>          tylersburg_intremap_quirk();
>> +        snb_bt98_erratum();
>> +    }
> Considering the nature of the erratum, keying this off
> iommu_intremap seems wrong - iommu_qinval ought to be used
> instead - intel_vtd_setup() takes care to disable interrupt
> remapping when queued invalidation is not available, yet when
> you make the above conditional upon iommu_intremap, queued
> invalidation could still get enabled (and used for whatever else
> purposes, now or in the future).
>
> Jan
>

Yes - I mentally had the erratum the wrong way around, as intremap is
the visible impact of it.

I shall respin once hearing back from Xiantao, especially regarding the
first patch.

~Andrew

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2013-04-29 15:23 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-26 18:11 [PATCH 1 of 2] x86/x2apic: Sandy-Bridge BT98 Erratum Andrew Cooper
2013-04-26 18:11 ` [PATCH 2 of 2] x86/VT-d: " Andrew Cooper
2013-04-29 15:13   ` Jan Beulich
2013-04-29 15:23     ` Andrew Cooper
2013-04-29  9:04 ` [PATCH 1 of 2] x86/x2apic: " Jan Beulich
2013-04-29 10:25   ` Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.