* [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
@ 2011-10-11 22:09 Jacob Shin
2011-10-11 23:44 ` H. Peter Anvin
` (2 more replies)
0 siblings, 3 replies; 12+ messages in thread
From: Jacob Shin @ 2011-10-11 22:09 UTC (permalink / raw)
To: Thomas Gleixner, Ingo Molnar, H. Peter Anvin
Cc: linux-kernel, x86, Yinghai Lu, Jacob Shin
The entire HT hole and also the unused address range before that hole
need to be excluded from direct mapping. Otherwise speculative
accesses to that reserved region can happen which cause machine
checks.
Cc: stable@kernel.org # > 2.6.32
Signed-off-by: Jacob Shin <jacob.shin@amd.com>
---
arch/x86/include/asm/e820.h | 1 +
arch/x86/include/asm/hypertransport.h | 7 +++++
arch/x86/include/asm/processor.h | 16 +++++++++++++
arch/x86/kernel/cpu/amd.c | 40 +++++++++++++++++++++++++++++++++
arch/x86/kernel/e820.c | 6 +++++
arch/x86/kernel/setup.c | 8 +++++-
6 files changed, 76 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index 908b969..7e4d417 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -117,6 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
extern unsigned long e820_end_of_ram_pfn(void);
extern unsigned long e820_end_of_low_ram_pfn(void);
+extern unsigned long e820_end_of_ram_under_ht_pfn(void);
extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
void memblock_x86_fill(void);
diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h
index 334b1a8..c1d5a08 100644
--- a/arch/x86/include/asm/hypertransport.h
+++ b/arch/x86/include/asm/hypertransport.h
@@ -42,4 +42,11 @@
#define HT_IRQ_HIGH_DEST_ID(v) \
((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
+/*
+ * Memory Region Reserved for HyperTransport
+ */
+
+#define HT_RESERVED_MEM_START 0xfd00000000ULL
+#define HT_RESERVED_MEM_END 0x10000000000ULL
+
#endif /* _ASM_X86_HYPERTRANSPORT_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0d1171c..73ae54f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -970,6 +970,22 @@ extern int set_tsc_mode(unsigned int val);
extern int amd_get_nb_id(int cpu);
+#if defined(CONFIG_CPU_SUP_AMD) && defined(CONFIG_X86_64)
+extern int amd_with_ram_above_ht(void);
+extern unsigned long amd_init_high_memory_mapping(void);
+#else
+static inline int amd_with_ram_above_ht(void)
+{
+ return 0;
+}
+
+static inline unsigned long amd_init_high_memory_mapping(void)
+{
+ BUG();
+ return 0;
+}
+#endif
+
struct aperfmperf {
u64 aperf, mperf;
};
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index b13ed39..a57b010 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -7,6 +7,7 @@
#include <asm/apic.h>
#include <asm/cpu.h>
#include <asm/pci-direct.h>
+#include <asm/hypertransport.h>
#ifdef CONFIG_X86_64
# include <asm/numa_64.h>
@@ -755,3 +756,42 @@ bool cpu_has_amd_erratum(const int *erratum)
}
EXPORT_SYMBOL_GPL(cpu_has_amd_erratum);
+
+#if defined(CONFIG_CPU_SUP_AMD) && defined(CONFIG_X86_64)
+int __cpuinit amd_with_ram_above_ht(void)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ max_pfn << PAGE_SHIFT >= HT_RESERVED_MEM_START)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * On AMD systems, memory region 0xfd_0000_0000 ~ 0xff_ffff_ffff is reserved by
+ * HyperTransport and cannot be used by the processor. On systems with more than
+ * 1TB of RAM, BIOS may take memory immediately below the HT region and "hoist"
+ * it up above the HT region, leaving a hole.
+ */
+unsigned long __cpuinit amd_init_high_memory_mapping(void)
+{
+ unsigned long ret;
+
+ /* remove HT region from the e820 map, if it's declared as usable */
+ e820_remove_range(HT_RESERVED_MEM_START,
+ HT_RESERVED_MEM_END - HT_RESERVED_MEM_START,
+ E820_RAM, 1);
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+
+ /* direct mapping of high memory under HT */
+ ret = init_memory_mapping(1UL << 32,
+ e820_end_of_ram_under_ht_pfn() << PAGE_SHIFT);
+
+ /* skip HT region, direct mapping of high memory above HT */
+ if (max_pfn << PAGE_SHIFT >= HT_RESERVED_MEM_END)
+ ret = init_memory_mapping(HT_RESERVED_MEM_END,
+ max_pfn << PAGE_SHIFT);
+
+ return ret;
+}
+#endif
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 3e2ef84..c0ba036 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -22,6 +22,7 @@
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/setup.h>
+#include <asm/hypertransport.h>
/*
* The e820 map is the map that gets modified e.g. with command line parameters
@@ -826,6 +827,11 @@ unsigned long __init e820_end_of_low_ram_pfn(void)
return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
}
+unsigned long __init e820_end_of_ram_under_ht_pfn(void)
+{
+ return e820_end_pfn(HT_RESERVED_MEM_START >> PAGE_SHIFT, E820_RAM);
+}
+
static void early_panic(char *msg)
{
early_printk(msg);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index afaf384..84d0968 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -937,8 +937,12 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_64
if (max_pfn > max_low_pfn) {
- max_pfn_mapped = init_memory_mapping(1UL<<32,
- max_pfn<<PAGE_SHIFT);
+ if (amd_with_ram_above_ht())
+ max_pfn_mapped = amd_init_high_memory_mapping();
+ else
+ max_pfn_mapped = init_memory_mapping(1UL << 32,
+ max_pfn << PAGE_SHIFT);
+
/* can we preseve max_low_pfn ?*/
max_low_pfn = max_pfn;
}
--
1.7.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2011-10-11 22:09 [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB Jacob Shin
@ 2011-10-11 23:44 ` H. Peter Anvin
2011-10-13 9:57 ` Andreas Herrmann
2011-10-13 11:04 ` Andreas Herrmann
2012-10-16 16:47 ` Shuah Khan
2 siblings, 1 reply; 12+ messages in thread
From: H. Peter Anvin @ 2011-10-11 23:44 UTC (permalink / raw)
To: Jacob Shin; +Cc: Thomas Gleixner, Ingo Molnar, linux-kernel, x86, Yinghai Lu
On 10/11/2011 03:09 PM, Jacob Shin wrote:
> The entire HT hole and also the unused address range before that hole
> need to be excluded from direct mapping. Otherwise speculative
> accesses to that reserved region can happen which cause machine
> checks.
BARF!
This is completely insane ad hockery when all that really should need to
happen is marking the HT region RESERVED, which should be possible on
any HT-equipped processor.
-hpa
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2011-10-11 23:44 ` H. Peter Anvin
@ 2011-10-13 9:57 ` Andreas Herrmann
2011-10-13 15:52 ` H. Peter Anvin
0 siblings, 1 reply; 12+ messages in thread
From: Andreas Herrmann @ 2011-10-13 9:57 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Jacob Shin, Thomas Gleixner, Ingo Molnar, linux-kernel, x86,
Yinghai Lu
On Tue, Oct 11, 2011 at 04:44:39PM -0700, H. Peter Anvin wrote:
> On 10/11/2011 03:09 PM, Jacob Shin wrote:
> > The entire HT hole and also the unused address range before that hole
> > need to be excluded from direct mapping. Otherwise speculative
> > accesses to that reserved region can happen which cause machine
> > checks.
> BARF!
> This is completely insane ad hockery when all that really should
> need to happen is marking the HT region RESERVED, which should be
> possible on any HT-equipped processor.
Great, thanks for this hint, I would never have thought that ... but
wait, guess what, we have tried this already.
Initially we had following situation:
BIOS-e820: 0000000100000000 - 000000e038000000 (usable)
BIOS-e820: 000000e038000000 - 000000fd00000000 (reserved)
BIOS-e820: 0000010000000000 - 0000011fff000000 (usable)
...
init_memory_mapping: 0000000100000000-0000011fff000000
0100000000 - 11fc0000000 page 1G
11fc0000000 - 11fff000000 page 2M
kernel direct mapping tables up to 11fff000000 @ 11ffeffc000-11fff000000
But MCEs due to speculative accesses happened to the reserved region
before the HT hole.
So what is the point in including address space below TOM2 not backed
with memory in kernel's direct mapping? For similar reserved space
before 4GB we don't do this.
Instead of barfing, some more constructive feedback would be
appreciated.
Thanks,
Andreas
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2011-10-11 22:09 [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB Jacob Shin
2011-10-11 23:44 ` H. Peter Anvin
@ 2011-10-13 11:04 ` Andreas Herrmann
2011-10-14 5:45 ` Yinghai Lu
2012-10-16 16:47 ` Shuah Khan
2 siblings, 1 reply; 12+ messages in thread
From: Andreas Herrmann @ 2011-10-13 11:04 UTC (permalink / raw)
To: Jacob Shin
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, linux-kernel, x86,
Yinghai Lu
CC-ing Yinghai Lu <yinghai.lu@oracle.com> as yinghai@kernel.org still
doesn't seem to work.
Andreas
On Tue, Oct 11, 2011 at 05:09:35PM -0500, Jacob Shin wrote:
> The entire HT hole and also the unused address range before that hole
> need to be excluded from direct mapping. Otherwise speculative
> accesses to that reserved region can happen which cause machine
> checks.
>
> Cc: stable@kernel.org # > 2.6.32
> Signed-off-by: Jacob Shin <jacob.shin@amd.com>
> ---
> arch/x86/include/asm/e820.h | 1 +
> arch/x86/include/asm/hypertransport.h | 7 +++++
> arch/x86/include/asm/processor.h | 16 +++++++++++++
> arch/x86/kernel/cpu/amd.c | 40 +++++++++++++++++++++++++++++++++
> arch/x86/kernel/e820.c | 6 +++++
> arch/x86/kernel/setup.c | 8 +++++-
> 6 files changed, 76 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
> index 908b969..7e4d417 100644
> --- a/arch/x86/include/asm/e820.h
> +++ b/arch/x86/include/asm/e820.h
> @@ -117,6 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
>
> extern unsigned long e820_end_of_ram_pfn(void);
> extern unsigned long e820_end_of_low_ram_pfn(void);
> +extern unsigned long e820_end_of_ram_under_ht_pfn(void);
> extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
>
> void memblock_x86_fill(void);
> diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h
> index 334b1a8..c1d5a08 100644
> --- a/arch/x86/include/asm/hypertransport.h
> +++ b/arch/x86/include/asm/hypertransport.h
> @@ -42,4 +42,11 @@
> #define HT_IRQ_HIGH_DEST_ID(v) \
> ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
>
> +/*
> + * Memory Region Reserved for HyperTransport
> + */
> +
> +#define HT_RESERVED_MEM_START 0xfd00000000ULL
> +#define HT_RESERVED_MEM_END 0x10000000000ULL
> +
> #endif /* _ASM_X86_HYPERTRANSPORT_H */
> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
> index 0d1171c..73ae54f 100644
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -970,6 +970,22 @@ extern int set_tsc_mode(unsigned int val);
>
> extern int amd_get_nb_id(int cpu);
>
> +#if defined(CONFIG_CPU_SUP_AMD) && defined(CONFIG_X86_64)
> +extern int amd_with_ram_above_ht(void);
> +extern unsigned long amd_init_high_memory_mapping(void);
> +#else
> +static inline int amd_with_ram_above_ht(void)
> +{
> + return 0;
> +}
> +
> +static inline unsigned long amd_init_high_memory_mapping(void)
> +{
> + BUG();
> + return 0;
> +}
> +#endif
> +
> struct aperfmperf {
> u64 aperf, mperf;
> };
> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
> index b13ed39..a57b010 100644
> --- a/arch/x86/kernel/cpu/amd.c
> +++ b/arch/x86/kernel/cpu/amd.c
> @@ -7,6 +7,7 @@
> #include <asm/apic.h>
> #include <asm/cpu.h>
> #include <asm/pci-direct.h>
> +#include <asm/hypertransport.h>
>
> #ifdef CONFIG_X86_64
> # include <asm/numa_64.h>
> @@ -755,3 +756,42 @@ bool cpu_has_amd_erratum(const int *erratum)
> }
>
> EXPORT_SYMBOL_GPL(cpu_has_amd_erratum);
> +
> +#if defined(CONFIG_CPU_SUP_AMD) && defined(CONFIG_X86_64)
> +int __cpuinit amd_with_ram_above_ht(void)
> +{
> + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
> + max_pfn << PAGE_SHIFT >= HT_RESERVED_MEM_START)
> + return 1;
> +
> + return 0;
> +}
> +
> +/*
> + * On AMD systems, memory region 0xfd_0000_0000 ~ 0xff_ffff_ffff is reserved by
> + * HyperTransport and cannot be used by the processor. On systems with more than
> + * 1TB of RAM, BIOS may take memory immediately below the HT region and "hoist"
> + * it up above the HT region, leaving a hole.
> + */
> +unsigned long __cpuinit amd_init_high_memory_mapping(void)
> +{
> + unsigned long ret;
> +
> + /* remove HT region from the e820 map, if it's declared as usable */
> + e820_remove_range(HT_RESERVED_MEM_START,
> + HT_RESERVED_MEM_END - HT_RESERVED_MEM_START,
> + E820_RAM, 1);
> + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
> +
> + /* direct mapping of high memory under HT */
> + ret = init_memory_mapping(1UL << 32,
> + e820_end_of_ram_under_ht_pfn() << PAGE_SHIFT);
> +
> + /* skip HT region, direct mapping of high memory above HT */
> + if (max_pfn << PAGE_SHIFT >= HT_RESERVED_MEM_END)
> + ret = init_memory_mapping(HT_RESERVED_MEM_END,
> + max_pfn << PAGE_SHIFT);
> +
> + return ret;
> +}
> +#endif
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index 3e2ef84..c0ba036 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -22,6 +22,7 @@
> #include <asm/e820.h>
> #include <asm/proto.h>
> #include <asm/setup.h>
> +#include <asm/hypertransport.h>
>
> /*
> * The e820 map is the map that gets modified e.g. with command line parameters
> @@ -826,6 +827,11 @@ unsigned long __init e820_end_of_low_ram_pfn(void)
> return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
> }
>
> +unsigned long __init e820_end_of_ram_under_ht_pfn(void)
> +{
> + return e820_end_pfn(HT_RESERVED_MEM_START >> PAGE_SHIFT, E820_RAM);
> +}
> +
> static void early_panic(char *msg)
> {
> early_printk(msg);
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index afaf384..84d0968 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -937,8 +937,12 @@ void __init setup_arch(char **cmdline_p)
>
> #ifdef CONFIG_X86_64
> if (max_pfn > max_low_pfn) {
> - max_pfn_mapped = init_memory_mapping(1UL<<32,
> - max_pfn<<PAGE_SHIFT);
> + if (amd_with_ram_above_ht())
> + max_pfn_mapped = amd_init_high_memory_mapping();
> + else
> + max_pfn_mapped = init_memory_mapping(1UL << 32,
> + max_pfn << PAGE_SHIFT);
> +
> /* can we preseve max_low_pfn ?*/
> max_low_pfn = max_pfn;
> }
> --
> 1.7.1
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2011-10-13 9:57 ` Andreas Herrmann
@ 2011-10-13 15:52 ` H. Peter Anvin
0 siblings, 0 replies; 12+ messages in thread
From: H. Peter Anvin @ 2011-10-13 15:52 UTC (permalink / raw)
To: Andreas Herrmann
Cc: Jacob Shin, Thomas Gleixner, Ingo Molnar, linux-kernel, x86,
Yinghai Lu
On 10/13/2011 02:57 AM, Andreas Herrmann wrote:
>
> So what is the point in including address space below TOM2 not backed
> with memory in kernel's direct mapping? For similar reserved space
> before 4GB we don't do this.
>
> Instead of barfing, some more constructive feedback would be
> appreciated.
>
Ok, that's a BUG, plain and simple, and a very serious one (for exactly
the reason you just described.) We should NEVER have cachable mappings
for a RESERVED region because it can have arbitrary side effects, so the
fact that that happens at all is the real problem and it MUST be addressed.
So I'm not arguing that it is not a problem for you (it is a real and
serious problem), but I would like to see it addressed correctly, which
is to exclude these memory regions from direct mapping just as we do for
< 4 GB. That way the fix applies to any RESERVED region.
-hpa
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2011-10-13 11:04 ` Andreas Herrmann
@ 2011-10-14 5:45 ` Yinghai Lu
0 siblings, 0 replies; 12+ messages in thread
From: Yinghai Lu @ 2011-10-14 5:45 UTC (permalink / raw)
To: Andreas Herrmann
Cc: Jacob Shin, Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
linux-kernel, x86
On 10/13/2011 04:04 AM, Andreas Herrmann wrote:
> CC-ing Yinghai Lu <yinghai.lu@oracle.com> as yinghai@kernel.org still
> doesn't seem to work.
>
>
> Andreas
>
> On Tue, Oct 11, 2011 at 05:09:35PM -0500, Jacob Shin wrote:
>> The entire HT hole and also the unused address range before that hole
>> need to be excluded from direct mapping. Otherwise speculative
>> accesses to that reserved region can happen which cause machine
>> checks.
Great, now AMD platform could support 8T ram?
>>
>> Cc: stable@kernel.org # > 2.6.32
>> Signed-off-by: Jacob Shin <jacob.shin@amd.com>
>> ---
>> arch/x86/include/asm/e820.h | 1 +
>> arch/x86/include/asm/hypertransport.h | 7 +++++
>> arch/x86/include/asm/processor.h | 16 +++++++++++++
>> arch/x86/kernel/cpu/amd.c | 40 +++++++++++++++++++++++++++++++++
>> arch/x86/kernel/e820.c | 6 +++++
>> arch/x86/kernel/setup.c | 8 +++++-
>> 6 files changed, 76 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
>> index 908b969..7e4d417 100644
>> --- a/arch/x86/include/asm/e820.h
>> +++ b/arch/x86/include/asm/e820.h
>> @@ -117,6 +117,7 @@ static inline void early_memtest(unsigned long start, unsigned long end)
>>
>> extern unsigned long e820_end_of_ram_pfn(void);
>> extern unsigned long e820_end_of_low_ram_pfn(void);
>> +extern unsigned long e820_end_of_ram_under_ht_pfn(void);
>> extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
>>
>> void memblock_x86_fill(void);
>> diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h
>> index 334b1a8..c1d5a08 100644
>> --- a/arch/x86/include/asm/hypertransport.h
>> +++ b/arch/x86/include/asm/hypertransport.h
>> @@ -42,4 +42,11 @@
>> #define HT_IRQ_HIGH_DEST_ID(v) \
>> ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK)
>>
>> +/*
>> + * Memory Region Reserved for HyperTransport
>> + */
>> +
>> +#define HT_RESERVED_MEM_START 0xfd00000000ULL
>> +#define HT_RESERVED_MEM_END 0x10000000000ULL
>> +
>> #endif /* _ASM_X86_HYPERTRANSPORT_H */
>> diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
>> index 0d1171c..73ae54f 100644
>> --- a/arch/x86/include/asm/processor.h
>> +++ b/arch/x86/include/asm/processor.h
>> @@ -970,6 +970,22 @@ extern int set_tsc_mode(unsigned int val);
>>
>> extern int amd_get_nb_id(int cpu);
>>
>> +#if defined(CONFIG_CPU_SUP_AMD) && defined(CONFIG_X86_64)
>> +extern int amd_with_ram_above_ht(void);
>> +extern unsigned long amd_init_high_memory_mapping(void);
>> +#else
>> +static inline int amd_with_ram_above_ht(void)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline unsigned long amd_init_high_memory_mapping(void)
>> +{
>> + BUG();
>> + return 0;
>> +}
>> +#endif
>> +
>> struct aperfmperf {
>> u64 aperf, mperf;
>> };
>> diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
>> index b13ed39..a57b010 100644
>> --- a/arch/x86/kernel/cpu/amd.c
>> +++ b/arch/x86/kernel/cpu/amd.c
>> @@ -7,6 +7,7 @@
>> #include <asm/apic.h>
>> #include <asm/cpu.h>
>> #include <asm/pci-direct.h>
>> +#include <asm/hypertransport.h>
>>
>> #ifdef CONFIG_X86_64
>> # include <asm/numa_64.h>
>> @@ -755,3 +756,42 @@ bool cpu_has_amd_erratum(const int *erratum)
>> }
>>
>> EXPORT_SYMBOL_GPL(cpu_has_amd_erratum);
>> +
>> +#if defined(CONFIG_CPU_SUP_AMD) && defined(CONFIG_X86_64)
>> +int __cpuinit amd_with_ram_above_ht(void)
>> +{
>> + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
>> + max_pfn << PAGE_SHIFT >= HT_RESERVED_MEM_START)
>> + return 1;
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * On AMD systems, memory region 0xfd_0000_0000 ~ 0xff_ffff_ffff is reserved by
>> + * HyperTransport and cannot be used by the processor. On systems with more than
>> + * 1TB of RAM, BIOS may take memory immediately below the HT region and "hoist"
>> + * it up above the HT region, leaving a hole.
>> + */
>> +unsigned long __cpuinit amd_init_high_memory_mapping(void)
>> +{
>> + unsigned long ret;
>> +
>> + /* remove HT region from the e820 map, if it's declared as usable */
>> + e820_remove_range(HT_RESERVED_MEM_START,
>> + HT_RESERVED_MEM_END - HT_RESERVED_MEM_START,
>> + E820_RAM, 1);
>> + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
>> +
>> + /* direct mapping of high memory under HT */
>> + ret = init_memory_mapping(1UL << 32,
>> + e820_end_of_ram_under_ht_pfn() << PAGE_SHIFT);
>> +
>> + /* skip HT region, direct mapping of high memory above HT */
>> + if (max_pfn << PAGE_SHIFT >= HT_RESERVED_MEM_END)
>> + ret = init_memory_mapping(HT_RESERVED_MEM_END,
>> + max_pfn << PAGE_SHIFT);
>> +
>> + return ret;
>> +}
>> +#endif
>> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
>> index 3e2ef84..c0ba036 100644
>> --- a/arch/x86/kernel/e820.c
>> +++ b/arch/x86/kernel/e820.c
>> @@ -22,6 +22,7 @@
>> #include <asm/e820.h>
>> #include <asm/proto.h>
>> #include <asm/setup.h>
>> +#include <asm/hypertransport.h>
>>
>> /*
>> * The e820 map is the map that gets modified e.g. with command line parameters
>> @@ -826,6 +827,11 @@ unsigned long __init e820_end_of_low_ram_pfn(void)
>> return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
>> }
>>
>> +unsigned long __init e820_end_of_ram_under_ht_pfn(void)
>> +{
>> + return e820_end_pfn(HT_RESERVED_MEM_START >> PAGE_SHIFT, E820_RAM);
>> +}
>> +
>> static void early_panic(char *msg)
>> {
>> early_printk(msg);
>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>> index afaf384..84d0968 100644
>> --- a/arch/x86/kernel/setup.c
>> +++ b/arch/x86/kernel/setup.c
>> @@ -937,8 +937,12 @@ void __init setup_arch(char **cmdline_p)
>>
>> #ifdef CONFIG_X86_64
>> if (max_pfn > max_low_pfn) {
>> - max_pfn_mapped = init_memory_mapping(1UL<<32,
>> - max_pfn<<PAGE_SHIFT);
>> + if (amd_with_ram_above_ht())
>> + max_pfn_mapped = amd_init_high_memory_mapping();
>> + else
>> + max_pfn_mapped = init_memory_mapping(1UL << 32,
>> + max_pfn << PAGE_SHIFT);
>> +
>> /* can we preseve max_low_pfn ?*/
>> max_low_pfn = max_pfn;
>> }
that is too late to change e820 here.
You need to update e820 map before
memblock_x86_fill()
like the place of: trim_bios_range() or early_gart_iommu_check()
BTW:
BIOS should put that range in reserved, right?
about mapping for hole above 4g, if you do think that is problem, we can unmap them later
if that does not cause any TLB stress.
Thanks
Yinghai Lu
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2011-10-11 22:09 [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB Jacob Shin
2011-10-11 23:44 ` H. Peter Anvin
2011-10-13 11:04 ` Andreas Herrmann
@ 2012-10-16 16:47 ` Shuah Khan
2 siblings, 0 replies; 12+ messages in thread
From: Shuah Khan @ 2012-10-16 16:47 UTC (permalink / raw)
To: Jacob Shin
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin, linux-kernel, x86,
Yinghai Lu, shuahkhan
Jacob,
On Tue, 2011-10-11 at 17:09 -0500, Jacob Shin wrote:
> The entire HT hole and also the unused address range before that hole
> need to be excluded from direct mapping. Otherwise speculative
> accesses to that reserved region can happen which cause machine
> checks.
Has the HT support even been added? I have a system with HT memory
histing feature and I am unable to boot with 1TB. Has this work been
redone as discussed in this thread?
-- Shuah
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
[not found] ` <8630fe28-0c1c-4f3a-90e1-df2d1b6615a6@blur>
@ 2012-10-16 17:48 ` Shuah Khan
2012-10-16 18:26 ` Jacob Shin
0 siblings, 1 reply; 12+ messages in thread
From: Shuah Khan @ 2012-10-16 17:48 UTC (permalink / raw)
To: Shin, Jacob
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
linux-kernel@vger.kernel.org, x86@kernel.org, Yinghai Lu,
shuahkhan
On Tue, 2012-10-16 at 16:55 +0000, Shin, Jacob wrote:
> (Sorry for the top post, on mobile phone..)
>
> You can follow the latest thread here:
> https://lkml.org/lkml/2012/9/30/23
>
> Yinghai's for-x86-mm branch should boot 1TB AMD with hoisting
> enabled.
>
> Thanks!
Jacob,
Thanks. This includes several patches and large change. Good for
upstream, however won't go into stable trees. Any thoughts on a smaller
subset of changes that are suitable for stable kernels to enable HT?
Thanks,
-- Shuah
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2012-10-16 17:48 ` Shuah Khan
@ 2012-10-16 18:26 ` Jacob Shin
2012-10-16 19:02 ` H. Peter Anvin
2012-10-17 18:25 ` H. Peter Anvin
0 siblings, 2 replies; 12+ messages in thread
From: Jacob Shin @ 2012-10-16 18:26 UTC (permalink / raw)
To: Shuah Khan
Cc: Thomas Gleixner, Ingo Molnar, H. Peter Anvin,
linux-kernel@vger.kernel.org, x86@kernel.org, Yinghai Lu,
shuahkhan, andreas.herrmann3, borislav.petkov
On Tue, Oct 16, 2012 at 11:48:58AM -0600, Shuah Khan wrote:
> On Tue, 2012-10-16 at 16:55 +0000, Shin, Jacob wrote:
> > (Sorry for the top post, on mobile phone..)
> >
> > You can follow the latest thread here:
> > https://lkml.org/lkml/2012/9/30/23
> >
> > Yinghai's for-x86-mm branch should boot 1TB AMD with hoisting
> > enabled.
> >
> > Thanks!
>
> Jacob,
>
> Thanks. This includes several patches and large change. Good for
> upstream, however won't go into stable trees. Any thoughts on a smaller
> subset of changes that are suitable for stable kernels to enable HT?
Right, we are (AMD) thinking about the same thing .. how to backport to
stable kernels of major enterprise OSes
The simplest and the least disruptive solution would be to not map memory
holes that occur above 4GB, it won't affect 32 bit kernels, and it won't
touch legacy (under 4GB) area (ISA, the PCI MMIO region ..):
https://lkml.org/lkml/2011/10/20/323
HPA, it would be great if we can first get this patch upstream, and also
into the stable trees .. and after that, we can work on getting Yinghai's
mm refactoring in ..
Thoughts?
Thanks!
-Jacob
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2012-10-16 18:26 ` Jacob Shin
@ 2012-10-16 19:02 ` H. Peter Anvin
2012-10-17 15:30 ` Shuah Khan
2012-10-17 18:25 ` H. Peter Anvin
1 sibling, 1 reply; 12+ messages in thread
From: H. Peter Anvin @ 2012-10-16 19:02 UTC (permalink / raw)
To: Jacob Shin
Cc: Shuah Khan, Thomas Gleixner, Ingo Molnar,
linux-kernel@vger.kernel.org, x86@kernel.org, Yinghai Lu,
shuahkhan, andreas.herrmann3, borislav.petkov
On 10/16/2012 11:26 AM, Jacob Shin wrote:
>
> HPA, it would be great if we can first get this patch upstream, and also
> into the stable trees .. and after that, we can work on getting Yinghai's
> mm refactoring in ..
>
I'll be back from travels tomorrow, I'll look at it then.
-hpa
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2012-10-16 19:02 ` H. Peter Anvin
@ 2012-10-17 15:30 ` Shuah Khan
0 siblings, 0 replies; 12+ messages in thread
From: Shuah Khan @ 2012-10-17 15:30 UTC (permalink / raw)
To: H. Peter Anvin
Cc: Jacob Shin, Thomas Gleixner, Ingo Molnar,
linux-kernel@vger.kernel.org, x86@kernel.org, Yinghai Lu,
andreas.herrmann3, borislav.petkov, shuahkhan
On Tue, 2012-10-16 at 12:02 -0700, H. Peter Anvin wrote:
> On 10/16/2012 11:26 AM, Jacob Shin wrote:
> >
> > HPA, it would be great if we can first get this patch upstream, and also
> > into the stable trees .. and after that, we can work on getting Yinghai's
> > mm refactoring in ..
> >
>
> I'll be back from travels tomorrow, I'll look at it then.
For what its worth, I applied this patch to RHEL 6.3 based on 2.6.32.
kernel and my test system came with 1 terabyte memory.
https://lkml.org/lkml/2011/10/20/323
Thanks,
-- Shuah
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB
2012-10-16 18:26 ` Jacob Shin
2012-10-16 19:02 ` H. Peter Anvin
@ 2012-10-17 18:25 ` H. Peter Anvin
1 sibling, 0 replies; 12+ messages in thread
From: H. Peter Anvin @ 2012-10-17 18:25 UTC (permalink / raw)
To: Jacob Shin
Cc: Shuah Khan, Thomas Gleixner, Ingo Molnar,
linux-kernel@vger.kernel.org, x86@kernel.org, Yinghai Lu,
shuahkhan, andreas.herrmann3, borislav.petkov
On 10/16/2012 11:26 AM, Jacob Shin wrote:
>
> Right, we are (AMD) thinking about the same thing .. how to backport to
> stable kernels of major enterprise OSes
>
> The simplest and the least disruptive solution would be to not map memory
> holes that occur above 4GB, it won't affect 32 bit kernels, and it won't
> touch legacy (under 4GB) area (ISA, the PCI MMIO region ..):
>
> https://lkml.org/lkml/2011/10/20/323
>
> HPA, it would be great if we can first get this patch upstream, and also
> into the stable trees .. and after that, we can work on getting Yinghai's
> mm refactoring in ..
>
Yes, that makes sense at this point.
-hpa
--
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel. I don't speak on their behalf.
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2012-10-17 18:26 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-10-11 22:09 [PATCH 1/1] x86, e820: Remove direct mapping of reserved space for HT hole around 1TB Jacob Shin
2011-10-11 23:44 ` H. Peter Anvin
2011-10-13 9:57 ` Andreas Herrmann
2011-10-13 15:52 ` H. Peter Anvin
2011-10-13 11:04 ` Andreas Herrmann
2011-10-14 5:45 ` Yinghai Lu
2012-10-16 16:47 ` Shuah Khan
[not found] <6ec48b96-afab-4c8b-ab74-2c640c2a161b@blur>
[not found] ` <8630fe28-0c1c-4f3a-90e1-df2d1b6615a6@blur>
2012-10-16 17:48 ` Shuah Khan
2012-10-16 18:26 ` Jacob Shin
2012-10-16 19:02 ` H. Peter Anvin
2012-10-17 15:30 ` Shuah Khan
2012-10-17 18:25 ` H. Peter Anvin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).