linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] x86: introduce page_size_mask for 64bit
@ 2008-07-08  8:41 Yinghai Lu
  2008-07-08  8:43 ` [PATCH] x86: not overmap than end in init_memory_mapping - 64bit Yinghai Lu
  2008-07-09  7:38 ` [PATCH] x86: introduce page_size_mask for 64bit Ingo Molnar
  0 siblings, 2 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-08  8:41 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: LKML


prepare for overmapped patch

also printout last_map_addr together with end

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/mm/init_64.c |   98 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 63 insertions(+), 35 deletions(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -300,7 +300,8 @@ phys_pte_update(pmd_t *pmd, unsigned lon
 }
 
 static unsigned long __meminit
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
+phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
+			 unsigned long page_size_mask)
 {
 	unsigned long pages = 0;
 
@@ -325,7 +326,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
 			continue;
 		}
 
-		if (cpu_has_pse) {
+		if (page_size_mask & (1<<PG_LEVEL_2M)) {
 			pages++;
 			set_pte((pte_t *)pmd,
 				pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
@@ -343,20 +344,22 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
 }
 
 static unsigned long __meminit
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
+phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
+			 unsigned long page_size_mask)
 {
 	pmd_t *pmd = pmd_offset(pud, 0);
 	unsigned long last_map_addr;
 
 	spin_lock(&init_mm.page_table_lock);
-	last_map_addr = phys_pmd_init(pmd, address, end);
+	last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
 	spin_unlock(&init_mm.page_table_lock);
 	__flush_tlb_all();
 	return last_map_addr;
 }
 
 static unsigned long __meminit
-phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
+phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
+			 unsigned long page_size_mask)
 {
 	unsigned long pages = 0;
 	unsigned long last_map_addr = end;
@@ -378,11 +381,12 @@ phys_pud_init(pud_t *pud_page, unsigned
 
 		if (pud_val(*pud)) {
 			if (!pud_large(*pud))
-				last_map_addr = phys_pmd_update(pud, addr, end);
+				last_map_addr = phys_pmd_update(pud, addr, end,
+							 page_size_mask);
 			continue;
 		}
 
-		if (direct_gbpages) {
+		if (page_size_mask & (1<<PG_LEVEL_1G)) {
 			pages++;
 			set_pte((pte_t *)pud,
 				pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
@@ -393,7 +397,7 @@ phys_pud_init(pud_t *pud_page, unsigned
 		pmd = alloc_low_page(&pmd_phys);
 
 		spin_lock(&init_mm.page_table_lock);
-		last_map_addr = phys_pmd_init(pmd, addr, end);
+		last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
 		unmap_low_page(pmd);
 		pud_populate(&init_mm, pud, __va(pmd_phys));
 		spin_unlock(&init_mm.page_table_lock);
@@ -406,13 +410,14 @@ phys_pud_init(pud_t *pud_page, unsigned
 }
 
 static unsigned long __meminit
-phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end)
+phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
+		 unsigned long page_size_mask)
 {
 	pud_t *pud;
 
 	pud = (pud_t *)pgd_page_vaddr(*pgd);
 
-	return phys_pud_init(pud, addr, end);
+	return phys_pud_init(pud, addr, end, page_size_mask);
 }
 
 static void __init find_early_table_space(unsigned long end)
@@ -582,29 +587,12 @@ static void __init early_memtest(unsigne
 }
 #endif
 
-/*
- * Setup the direct mapping of the physical memory at PAGE_OFFSET.
- * This runs before bootmem is initialized and gets pages directly from
- * the physical memory. To access them they are temporarily mapped.
- */
-unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
+static unsigned long __init kernel_physical_mapping_init(unsigned long start,
+						unsigned long end,
+						unsigned long page_size_mask)
 {
-	unsigned long next, last_map_addr = end;
-	unsigned long start_phys = start, end_phys = end;
 
-	printk(KERN_INFO "init_memory_mapping\n");
-
-	/*
-	 * Find space for the kernel direct mapping tables.
-	 *
-	 * Later we should allocate these tables in the local node of the
-	 * memory mapped. Unfortunately this is done currently before the
-	 * nodes are discovered.
-	 */
-	if (!after_bootmem) {
-		init_gbpages();
-		find_early_table_space(end);
-	}
+	unsigned long next, last_map_addr = end;
 
 	start = (unsigned long)__va(start);
 	end = (unsigned long)__va(end);
@@ -619,7 +607,8 @@ unsigned long __init_refok init_memory_m
 			next = end;
 
 		if (pgd_val(*pgd)) {
-			last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end));
+			last_map_addr = phys_pud_update(pgd, __pa(start),
+						 __pa(end), page_size_mask);
 			continue;
 		}
 
@@ -628,22 +617,61 @@ unsigned long __init_refok init_memory_m
 		else
 			pud = alloc_low_page(&pud_phys);
 
-		last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
+		last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
+						 page_size_mask);
 		unmap_low_page(pud);
 		pgd_populate(&init_mm, pgd_offset_k(start),
 			     __va(pud_phys));
 	}
 
+	return last_map_addr;
+}
+/*
+ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+ * This runs before bootmem is initialized and gets pages directly from
+ * the physical memory. To access them they are temporarily mapped.
+ */
+unsigned long __init_refok init_memory_mapping(unsigned long start,
+					       unsigned long end)
+{
+	unsigned long last_map_addr;
+	unsigned long page_size_mask = 0;
+
+	printk(KERN_INFO "init_memory_mapping\n");
+
+	/*
+	 * Find space for the kernel direct mapping tables.
+	 *
+	 * Later we should allocate these tables in the local node of the
+	 * memory mapped. Unfortunately this is done currently before the
+	 * nodes are discovered.
+	 */
+	if (!after_bootmem) {
+		init_gbpages();
+		find_early_table_space(end);
+	}
+
+	if (direct_gbpages)
+		page_size_mask |= 1 << PG_LEVEL_1G;
+	if (cpu_has_pse)
+		page_size_mask |= 1 << PG_LEVEL_2M;
+
+	last_map_addr = kernel_physical_mapping_init(start, end,
+							 page_size_mask);
+
 	if (!after_bootmem)
 		mmu_cr4_features = read_cr4();
 	__flush_tlb_all();
 
-	if (!after_bootmem)
+	if (!after_bootmem && table_end > table_start)
 		reserve_early(table_start << PAGE_SHIFT,
 				 table_end << PAGE_SHIFT, "PGTABLE");
 
+	printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
+			 last_map_addr, end);
+
 	if (!after_bootmem)
-		early_memtest(start_phys, end_phys);
+		early_memtest(start, end);
 
 	return last_map_addr >> PAGE_SHIFT;
 }

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: not overmap than end in init_memory_mapping - 64bit
  2008-07-08  8:41 [PATCH] x86: introduce page_size_mask for 64bit Yinghai Lu
@ 2008-07-08  8:43 ` Yinghai Lu
  2008-07-09  7:38   ` Ingo Molnar
  2008-07-10  3:15   ` [PATCh] x86: overmapped fix when 4K pages on tail " Yinghai Lu
  2008-07-09  7:38 ` [PATCH] x86: introduce page_size_mask for 64bit Ingo Molnar
  1 sibling, 2 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-08  8:43 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: LKML


handle head and tail that can not aligned to big pages.

with this patch, on system that support gbpages
change
last_map_addr: 1080000000 end: 1078000000
to
last_map_addr: 1078000000 end: 1078000000

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/mm/init_64.c |   77 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 65 insertions(+), 12 deletions(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -422,18 +422,25 @@ phys_pud_update(pgd_t *pgd, unsigned lon
 
 static void __init find_early_table_space(unsigned long end)
 {
-	unsigned long puds, tables, start;
+	unsigned long puds, pmds, ptes, tables, start;
 
 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
 	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
-	if (!direct_gbpages) {
-		unsigned long pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
-		tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
-	}
-	if (!cpu_has_pse) {
-		unsigned long ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
-		tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
-	}
+	if (direct_gbpages) {
+		unsigned long extra;
+		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
+		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
+	} else
+		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+	tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+
+	if (cpu_has_pse) {
+		unsigned long extra;
+		extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	} else
+		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
 
 	/*
 	 * RED-PEN putting page tables only on node 0 could
@@ -634,8 +641,9 @@ static unsigned long __init kernel_physi
 unsigned long __init_refok init_memory_mapping(unsigned long start,
 					       unsigned long end)
 {
-	unsigned long last_map_addr;
+	unsigned long last_map_addr = end;
 	unsigned long page_size_mask = 0;
+	unsigned long start_pfn, end_pfn;
 
 	printk(KERN_INFO "init_memory_mapping\n");
 
@@ -656,8 +664,53 @@ unsigned long __init_refok init_memory_m
 	if (cpu_has_pse)
 		page_size_mask |= 1 << PG_LEVEL_2M;
 
-	last_map_addr = kernel_physical_mapping_init(start, end,
-							 page_size_mask);
+	/* head if not big page aligment ?*/
+	start_pfn = start >> PAGE_SHIFT;
+	end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
+			<< (PMD_SHIFT - PAGE_SHIFT);
+	if (start_pfn < end_pfn)
+		last_map_addr = kernel_physical_mapping_init(
+					start_pfn<<PAGE_SHIFT,
+					end_pfn<<PAGE_SHIFT, 0);
+
+	/* big page (2M) range*/
+	start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
+			 << (PMD_SHIFT - PAGE_SHIFT);
+	end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
+			 << (PUD_SHIFT - PAGE_SHIFT);
+	if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
+		end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
+	if (start_pfn < end_pfn)
+		last_map_addr = kernel_physical_mapping_init(
+					     start_pfn<<PAGE_SHIFT,
+					     end_pfn<<PAGE_SHIFT,
+					     page_size_mask & (1<<PG_LEVEL_2M));
+
+	/* big page (1G) range */
+	start_pfn = end_pfn;
+	end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+	if (start_pfn < end_pfn)
+		last_map_addr = kernel_physical_mapping_init(
+					     start_pfn<<PAGE_SHIFT,
+					     end_pfn<<PAGE_SHIFT,
+					     page_size_mask & ((1<<PG_LEVEL_2M)
+							 | (1<<PG_LEVEL_1G)));
+
+	/* tail is not big page (1G) alignment */
+	start_pfn = end_pfn;
+	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+	if (start_pfn < end_pfn)
+		last_map_addr = kernel_physical_mapping_init(
+					     start_pfn<<PAGE_SHIFT,
+					     end_pfn<<PAGE_SHIFT,
+					     page_size_mask & (1<<PG_LEVEL_2M));
+	/* tail is not big page (2M) alignment */
+	start_pfn = end_pfn;
+	end_pfn = end>>PAGE_SHIFT;
+	if (start_pfn < end_pfn)
+		last_map_addr = kernel_physical_mapping_init(
+					     start_pfn<<PAGE_SHIFT,
+					     end_pfn<<PAGE_SHIFT, 0);
 
 	if (!after_bootmem)
 		mmu_cr4_features = read_cr4();

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: introduce page_size_mask for 64bit
  2008-07-08  8:41 [PATCH] x86: introduce page_size_mask for 64bit Yinghai Lu
  2008-07-08  8:43 ` [PATCH] x86: not overmap than end in init_memory_mapping - 64bit Yinghai Lu
@ 2008-07-09  7:38 ` Ingo Molnar
  1 sibling, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-09  7:38 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> prepare for overmapped patch
> 
> also printout last_map_addr together with end

applied, thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: not overmap than end in init_memory_mapping - 64bit
  2008-07-08  8:43 ` [PATCH] x86: not overmap than end in init_memory_mapping - 64bit Yinghai Lu
@ 2008-07-09  7:38   ` Ingo Molnar
  2008-07-09  8:34     ` Ingo Molnar
  2008-07-10  3:15   ` [PATCh] x86: overmapped fix when 4K pages on tail " Yinghai Lu
  1 sibling, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-09  7:38 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> handle head and tail that can not aligned to big pages.
> 
> with this patch, on system that support gbpages
> change
> last_map_addr: 1080000000 end: 1078000000
> to
> last_map_addr: 1078000000 end: 1078000000
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

applied to tip/x86/core, thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: not overmap than end in init_memory_mapping - 64bit
  2008-07-09  7:38   ` Ingo Molnar
@ 2008-07-09  8:34     ` Ingo Molnar
  2008-07-09  8:37       ` Yinghai Lu
  2008-07-09  8:45       ` Ingo Molnar
  0 siblings, 2 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-09  8:34 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, LKML


* Ingo Molnar <mingo@elte.hu> wrote:

> > handle head and tail that can not aligned to big pages.
> > 
> > with this patch, on system that support gbpages
> > change
> > last_map_addr: 1080000000 end: 1078000000
> > to
> > last_map_addr: 1078000000 end: 1078000000
> > 
> > Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
> 
> applied to tip/x86/core, thanks Yinghai.

found an early boot crash on a testbox:

[    0.000000] last_pfn = 0x3fff0 max_arch_pfn = 0x3ffffffff
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] init_memory_mapping
[    0.000000] kernel direct mapping tables up to 3fff0000 @ 8000-b000
[    0.000000] last_map_addr: 40000000 end: 3fff0000
[    0.000000] ACPI: RSDP 000F76F0, 0014 (r0 Nvidia)
PANIC: early exception 0e rip 10:ffffffff803d3b8a error 0 cr2 ffff88003fff3040
[    0.000000] Pid: 0, comm: swapper Not tainted 2.6.26-rc9-tip-00118-g95f03c7-dirty #16954
[    0.000000]
[    0.000000] Call Trace:
[    0.000000]  [<ffffffff80c05196>] early_idt_handler+0x56/0x6a
[    0.000000]  [<ffffffff803d3b8a>] ? acpi_tb_print_table_header+0xe/0xd5
[    0.000000]  [<ffffffff803d3bfc>] ? acpi_tb_print_table_header+0x80/0xd5
[    0.000000]  [<ffffffff803d42f4>] ? acpi_tb_scan_memory_for_rsdp+0xc8/0xd3

i bisected it down to:

|  a1007454854803f6fc63f0a881518cea87df6d9a is first bad commit
|  commit a1007454854803f6fc63f0a881518cea87df6d9a
|  Author: Yinghai Lu <yhlu.kernel@gmail.com>
|  Date:   Tue Jul 8 01:43:27 2008 -0700
|
|      x86: not overmap more than the end of RAM in init_memory_mapping - 64bit

with this config:

  http://redhat.com/~mingo/misc/config-Wed_Jul__9_09_43_06_CEST_2008.bad

crashlog:

 http://redhat.com/~mingo/misc/crashlog-Wed_Jul__9_09_43_06_CEST_2008.bad

i've pushed the failing tree out to tip/tmp.x86.Jul__9_09_43

reverting the commit solves the crash. The crash seems to be because 
ACPI is unable to access that memory range. (perhaps early_ioremap 
fails?)

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: not overmap than end in init_memory_mapping - 64bit
  2008-07-09  8:34     ` Ingo Molnar
@ 2008-07-09  8:37       ` Yinghai Lu
  2008-07-09  8:46         ` Ingo Molnar
  2008-07-09  8:45       ` Ingo Molnar
  1 sibling, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-09  8:37 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, LKML

[-- Attachment #1: Type: text/plain, Size: 2227 bytes --]

On Wed, Jul 9, 2008 at 1:34 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Ingo Molnar <mingo@elte.hu> wrote:
>
>> > handle head and tail that can not aligned to big pages.
>> >
>> > with this patch, on system that support gbpages
>> > change
>> > last_map_addr: 1080000000 end: 1078000000
>> > to
>> > last_map_addr: 1078000000 end: 1078000000
>> >
>> > Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
>>
>> applied to tip/x86/core, thanks Yinghai.
>
> found an early boot crash on a testbox:
>
> [    0.000000] last_pfn = 0x3fff0 max_arch_pfn = 0x3ffffffff
> [    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
> [    0.000000] init_memory_mapping
> [    0.000000] kernel direct mapping tables up to 3fff0000 @ 8000-b000
> [    0.000000] last_map_addr: 40000000 end: 3fff0000
> [    0.000000] ACPI: RSDP 000F76F0, 0014 (r0 Nvidia)
> PANIC: early exception 0e rip 10:ffffffff803d3b8a error 0 cr2 ffff88003fff3040
> [    0.000000] Pid: 0, comm: swapper Not tainted 2.6.26-rc9-tip-00118-g95f03c7-dirty #16954
> [    0.000000]
> [    0.000000] Call Trace:
> [    0.000000]  [<ffffffff80c05196>] early_idt_handler+0x56/0x6a
> [    0.000000]  [<ffffffff803d3b8a>] ? acpi_tb_print_table_header+0xe/0xd5
> [    0.000000]  [<ffffffff803d3bfc>] ? acpi_tb_print_table_header+0x80/0xd5
> [    0.000000]  [<ffffffff803d42f4>] ? acpi_tb_scan_memory_for_rsdp+0xc8/0xd3
>
> i bisected it down to:
>
> |  a1007454854803f6fc63f0a881518cea87df6d9a is first bad commit
> |  commit a1007454854803f6fc63f0a881518cea87df6d9a
> |  Author: Yinghai Lu <yhlu.kernel@gmail.com>
> |  Date:   Tue Jul 8 01:43:27 2008 -0700
> |
> |      x86: not overmap more than the end of RAM in init_memory_mapping - 64bit
>
> with this config:
>
>  http://redhat.com/~mingo/misc/config-Wed_Jul__9_09_43_06_CEST_2008.bad
>
> crashlog:
>
>  http://redhat.com/~mingo/misc/crashlog-Wed_Jul__9_09_43_06_CEST_2008.bad
>
> i've pushed the failing tree out to tip/tmp.x86.Jul__9_09_43
>
> reverting the commit solves the crash. The crash seems to be because
> ACPI is unable to access that memory range. (perhaps early_ioremap
> fails?)

system with less than 4g?

please test attached patch
[PATCH] x86: make max_pfn cover acpi table below 4g

YH

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: e820_end.patch --]
[-- Type: text/x-patch; name=e820_end.patch, Size: 3356 bytes --]

[PATCH] x86: make max_pfn cover acpi table below 4g

when system have 4g less ram installed, and acpi table sit
near end of ram. make max_pfn cover them too.
so 64bit kernel don't need to mess up fixmap.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/e820.c  |   18 ++++++++++++------
 arch/x86/kernel/setup.c |   13 +++----------
 include/asm-x86/e820.h  |    2 +-
 3 files changed, 16 insertions(+), 17 deletions(-)

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -1056,12 +1056,20 @@ unsigned long __initdata end_user_pfn =
 /*
  * Find the highest page frame number we have available
  */
-unsigned long __init e820_end_of_ram(void)
+unsigned long __init e820_end(void)
 {
-	unsigned long last_pfn;
+	int i;
+	unsigned long last_pfn = 0;
 	unsigned long max_arch_pfn = MAX_ARCH_PFN;
 
-	last_pfn = find_max_pfn_with_active_regions();
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		unsigned long end_pfn;
+
+		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
+		if (end_pfn > last_pfn)
+			last_pfn = end_pfn;
+	}
 
 	if (last_pfn > max_arch_pfn)
 		last_pfn = max_arch_pfn;
@@ -1192,9 +1200,7 @@ static int __init parse_memmap_opt(char
 		 * the real mem size before original memory map is
 		 * reset.
 		 */
-		e820_register_active_regions(0, 0, -1UL);
-		saved_max_pfn = e820_end_of_ram();
-		remove_all_active_ranges();
+		saved_max_pfn = e820_end();
 #endif
 		e820.nr_map = 0;
 		userdef = 1;
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -714,22 +714,18 @@ void __init setup_arch(char **cmdline_p)
 	early_gart_iommu_check();
 #endif
 
-	e820_register_active_regions(0, 0, -1UL);
 	/*
 	 * partially used pages are not usable - thus
 	 * we are rounding upwards:
 	 */
-	max_pfn = e820_end_of_ram();
+	max_pfn = e820_end();
 
 	/* preallocate 4k for mptable mpc */
 	early_reserve_e820_mpc_new();
 	/* update e820 for memory not covered by WB MTRRs */
 	mtrr_bp_init();
-	if (mtrr_trim_uncached_memory(max_pfn)) {
-		remove_all_active_ranges();
-		e820_register_active_regions(0, 0, -1UL);
-		max_pfn = e820_end_of_ram();
-	}
+	if (mtrr_trim_uncached_memory(max_pfn))
+		max_pfn = e820_end();
 
 #ifdef CONFIG_X86_32
 	/* max_low_pfn get updated here */
@@ -772,9 +768,6 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	acpi_boot_table_init();
 
-	/* Remove active ranges so rediscovery with NUMA-awareness happens */
-	remove_all_active_ranges();
-
 #ifdef CONFIG_ACPI_NUMA
 	/*
 	 * Parse SRAT to discover nodes.
Index: linux-2.6/include/asm-x86/e820.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820.h
+++ linux-2.6/include/asm-x86/e820.h
@@ -99,7 +99,7 @@ extern void free_early(u64 start, u64 en
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
-extern unsigned long e820_end_of_ram(void);
+extern unsigned long e820_end(void);
 extern int e820_find_active_region(const struct e820entry *ei,
 				  unsigned long start_pfn,
 				  unsigned long last_pfn,

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: not overmap than end in init_memory_mapping - 64bit
  2008-07-09  8:34     ` Ingo Molnar
  2008-07-09  8:37       ` Yinghai Lu
@ 2008-07-09  8:45       ` Ingo Molnar
  1 sibling, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-09  8:45 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, LKML


* Ingo Molnar <mingo@elte.hu> wrote:

> > applied to tip/x86/core, thanks Yinghai.
> 
> found an early boot crash on a testbox:

ah - this crash gets fixed by the "x86: make max_pfn cover acpi table 
below 4g" RFC fix you posted in the other thread.

So the overmapping change made the problem more prominent. (But the 
problem was pre-existing with certain memory layouts - as witnessed by 
the other thread.)

I've flipped around these two commits to help bisection.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: not overmap than end in init_memory_mapping - 64bit
  2008-07-09  8:37       ` Yinghai Lu
@ 2008-07-09  8:46         ` Ingo Molnar
  2008-07-09  8:58           ` Yinghai Lu
  2008-07-09 10:01           ` Yinghai Lu
  0 siblings, 2 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-09  8:46 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> > reverting the commit solves the crash. The crash seems to be because 
> > ACPI is unable to access that memory range. (perhaps early_ioremap 
> > fails?)
> 
> system with less than 4g?

yeah.

> please test attached patch
> [PATCH] x86: make max_pfn cover acpi table below 4g

that fixed it, thanks.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: not overmap than end in init_memory_mapping - 64bit
  2008-07-09  8:46         ` Ingo Molnar
@ 2008-07-09  8:58           ` Yinghai Lu
  2008-07-09 10:01           ` Yinghai Lu
  1 sibling, 0 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-09  8:58 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, LKML

On Wed, Jul 9, 2008 at 1:46 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> > reverting the commit solves the crash. The crash seems to be because
>> > ACPI is unable to access that memory range. (perhaps early_ioremap
>> > fails?)
>>
>> system with less than 4g?
>
> yeah.
>
>> please test attached patch
>> [PATCH] x86: make max_pfn cover acpi table below 4g
>
> that fixed it, thanks.

[    0.000000] kernel direct mapping tables up to 3fff0000 @ 8000-b000
[    0.000000] last_map_addr: 40000000 end: 3fff0000
[    0.000000] ACPI: RSDP 000F76F0, 0014 (r0 Nvidia)

last_map_addr is supposed to be 3fff0000.

so it means before init_memory_mapping, initial page table already
cover to 0x40000000 with 2M big page.
and init_memory_mapping can not do anything with that is already mapped.

it seems that need to make early_ioremap more smart...

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: not overmap than end in init_memory_mapping - 64bit
  2008-07-09  8:46         ` Ingo Molnar
  2008-07-09  8:58           ` Yinghai Lu
@ 2008-07-09 10:01           ` Yinghai Lu
  2008-07-09 10:30             ` Ingo Molnar
  1 sibling, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-09 10:01 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, LKML

[-- Attachment #1: Type: text/plain, Size: 470 bytes --]

On Wed, Jul 9, 2008 at 1:46 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> > reverting the commit solves the crash. The crash seems to be because
>> > ACPI is unable to access that memory range. (perhaps early_ioremap
>> > fails?)
>>
>> system with less than 4g?
>
> yeah.
>
>> please test attached patch
>> [PATCH] x86: make max_pfn cover acpi table below 4g
>
> that fixed it, thanks.

please check fix for 32 bit too.

YH

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: e820_end_32.patch --]
[-- Type: text/x-patch; name=e820_end_32.patch, Size: 746 bytes --]

[PATCH] x86: make e820_end return max ram type only for 32 bit

to avoid warning from find_low_pfn_range for high pages size etc

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/e820.c |    5 +++++
 1 file changed, 5 insertions(+)

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -1066,6 +1066,11 @@ unsigned long __init e820_end(void)
 		struct e820entry *ei = &e820.map[i];
 		unsigned long end_pfn;
 
+#ifdef CONFIG_X86_32
+		if (ei->type != E820_RAM)
+			continue;
+#endif
+
 		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
 		if (end_pfn > last_pfn)
 			last_pfn = end_pfn;

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: not overmap than end in init_memory_mapping - 64bit
  2008-07-09 10:01           ` Yinghai Lu
@ 2008-07-09 10:30             ` Ingo Molnar
  0 siblings, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-09 10:30 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> >> please test attached patch
> >> [PATCH] x86: make max_pfn cover acpi table below 4g
> >
> > that fixed it, thanks.
> 
> please check fix for 32 bit too.
> 
> YH

> [PATCH] x86: make e820_end return max ram type only for 32 bit
> 
> to avoid warning from find_low_pfn_range for high pages size etc

applied to tip/x86/core - thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-08  8:43 ` [PATCH] x86: not overmap than end in init_memory_mapping - 64bit Yinghai Lu
  2008-07-09  7:38   ` Ingo Molnar
@ 2008-07-10  3:15   ` Yinghai Lu
  2008-07-10  3:16     ` [PATCH] x86: merge __acpi_map_table Yinghai Lu
                       ` (2 more replies)
  1 sibling, 3 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-10  3:15 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML



fix phys_pmd_init to make sure not to return big value than end.

also print out range split:1G/2M/4K

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/mm/init_64.c |  106 +++++++++++++++++++++++++++++++++-----------------
 1 file changed, 72 insertions(+), 34 deletions(-)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -302,11 +302,13 @@ static __meminit void unmap_low_page(voi
 	early_iounmap(adr, PAGE_SIZE);
 }
 
-static void __meminit
+static unsigned long __meminit
 phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
 {
 	unsigned pages = 0;
+	unsigned long last_map_addr = end;
 	int i;
+
 	pte_t *pte = pte_page + pte_index(addr);
 
 	for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
@@ -326,17 +328,20 @@ phys_pte_init(pte_t *pte_page, unsigned
 			printk("   pte=%p addr=%lx pte=%016lx\n",
 			       pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
 		set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
+		last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
 		pages++;
 	}
 	update_page_count(PG_LEVEL_4K, pages);
+
+	return last_map_addr;
 }
 
-static void __meminit
+static unsigned long __meminit
 phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
 {
 	pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
 
-	phys_pte_init(pte, address, end);
+	return phys_pte_init(pte, address, end);
 }
 
 static unsigned long __meminit
@@ -344,6 +349,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
 			 unsigned long page_size_mask)
 {
 	unsigned long pages = 0;
+	unsigned long last_map_addr = end;
 
 	int i = pmd_index(address);
 
@@ -362,7 +368,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
 
 		if (pmd_val(*pmd)) {
 			if (!pmd_large(*pmd))
-				phys_pte_update(pmd, address, end);
+				last_map_addr = phys_pte_update(pmd, address,
+								 end);
 			continue;
 		}
 
@@ -370,17 +377,18 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
 			pages++;
 			set_pte((pte_t *)pmd,
 				pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+			last_map_addr = (address & PMD_MASK) + PMD_SIZE;
 			continue;
 		}
 
 		pte = alloc_low_page(&pte_phys);
-		phys_pte_init(pte, address, end);
+		last_map_addr = phys_pte_init(pte, address, end);
 		unmap_low_page(pte);
 
 		pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
 	}
 	update_page_count(PG_LEVEL_2M, pages);
-	return address;
+	return last_map_addr;
 }
 
 static unsigned long __meminit
@@ -673,6 +681,32 @@ static unsigned long __init kernel_physi
 
 	return last_map_addr;
 }
+
+struct map_range {
+	unsigned long start;
+	unsigned long end;
+	unsigned page_size_mask;
+};
+
+#define NR_RANGE_MR 5
+
+static int save_mr(struct map_range *mr, int nr_range,
+		   unsigned long start_pfn, unsigned long end_pfn,
+		   unsigned long page_size_mask)
+{
+
+	if (start_pfn < end_pfn) {
+		if (nr_range >= NR_RANGE_MR)
+			panic("run out of range for init_memory_mapping\n");
+		mr[nr_range].start = start_pfn<<PAGE_SHIFT;
+		mr[nr_range].end   = end_pfn<<PAGE_SHIFT;
+		mr[nr_range].page_size_mask = page_size_mask;
+		nr_range++;
+	}
+
+	return nr_range;
+}
+
 /*
  * Setup the direct mapping of the physical memory at PAGE_OFFSET.
  * This runs before bootmem is initialized and gets pages directly from
@@ -681,10 +715,13 @@ static unsigned long __init kernel_physi
 unsigned long __init_refok init_memory_mapping(unsigned long start,
 					       unsigned long end)
 {
-	unsigned long last_map_addr = end;
+	unsigned long last_map_addr = 0;
 	unsigned long page_size_mask = 0;
 	unsigned long start_pfn, end_pfn;
 
+	struct map_range mr[NR_RANGE_MR];
+	int nr_range, i;
+
 	printk(KERN_INFO "init_memory_mapping\n");
 
 	/*
@@ -694,24 +731,22 @@ unsigned long __init_refok init_memory_m
 	 * memory mapped. Unfortunately this is done currently before the
 	 * nodes are discovered.
 	 */
-	if (!after_bootmem) {
+	if (!after_bootmem)
 		init_gbpages();
-		find_early_table_space(end);
-	}
 
 	if (direct_gbpages)
 		page_size_mask |= 1 << PG_LEVEL_1G;
 	if (cpu_has_pse)
 		page_size_mask |= 1 << PG_LEVEL_2M;
 
-	/* head if not big page aligment ?*/
+	memset(mr, 0, sizeof(mr));
+	nr_range = 0;
+
+	/* head if not big page alignment ?*/
 	start_pfn = start >> PAGE_SHIFT;
 	end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
 			<< (PMD_SHIFT - PAGE_SHIFT);
-	if (start_pfn < end_pfn)
-		last_map_addr = kernel_physical_mapping_init(
-					start_pfn<<PAGE_SHIFT,
-					end_pfn<<PAGE_SHIFT, 0);
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
 
 	/* big page (2M) range*/
 	start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
@@ -720,37 +755,40 @@ unsigned long __init_refok init_memory_m
 			 << (PUD_SHIFT - PAGE_SHIFT);
 	if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
 		end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
-	if (start_pfn < end_pfn)
-		last_map_addr = kernel_physical_mapping_init(
-					     start_pfn<<PAGE_SHIFT,
-					     end_pfn<<PAGE_SHIFT,
-					     page_size_mask & (1<<PG_LEVEL_2M));
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+			page_size_mask & (1<<PG_LEVEL_2M));
 
 	/* big page (1G) range */
 	start_pfn = end_pfn;
 	end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
-	if (start_pfn < end_pfn)
-		last_map_addr = kernel_physical_mapping_init(
-					     start_pfn<<PAGE_SHIFT,
-					     end_pfn<<PAGE_SHIFT,
-					     page_size_mask & ((1<<PG_LEVEL_2M)
-							 | (1<<PG_LEVEL_1G)));
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+				page_size_mask &
+				 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
 
 	/* tail is not big page (1G) alignment */
 	start_pfn = end_pfn;
 	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
-	if (start_pfn < end_pfn)
-		last_map_addr = kernel_physical_mapping_init(
-					     start_pfn<<PAGE_SHIFT,
-					     end_pfn<<PAGE_SHIFT,
-					     page_size_mask & (1<<PG_LEVEL_2M));
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+			page_size_mask & (1<<PG_LEVEL_2M));
+
 	/* tail is not big page (2M) alignment */
 	start_pfn = end_pfn;
 	end_pfn = end>>PAGE_SHIFT;
-	if (start_pfn < end_pfn)
+	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+
+	for (i = 0; i < nr_range; i++)
+		printk(KERN_DEBUG " %010lx - %010lx page %s\n",
+				mr[i].start, mr[i].end,
+			(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
+			 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
+
+	if (!after_bootmem)
+		find_early_table_space(end);
+
+	for (i = 0; i < nr_range; i++)
 		last_map_addr = kernel_physical_mapping_init(
-					     start_pfn<<PAGE_SHIFT,
-					     end_pfn<<PAGE_SHIFT, 0);
+					mr[i].start, mr[i].end,
+					mr[i].page_size_mask);
 
 	if (!after_bootmem)
 		mmu_cr4_features = read_cr4();

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: merge __acpi_map_table
  2008-07-10  3:15   ` [PATCh] x86: overmapped fix when 4K pages on tail " Yinghai Lu
@ 2008-07-10  3:16     ` Yinghai Lu
  2008-07-10  3:17       ` [PATCH] x86: make e820_end return end_of_ram again for 64bit Yinghai Lu
  2008-07-10  6:54       ` [PATCH] x86: merge __acpi_map_table Ingo Molnar
  2008-07-10  6:53     ` [PATCh] x86: overmapped fix when 4K pages on tail - 64bit Ingo Molnar
  2008-07-10 14:16     ` Arjan van de Ven
  2 siblings, 2 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-10  3:16 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


and let 64bit fallback to use fixmap too

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/acpi/boot.c |   23 ++++++-----------------
 include/asm-x86/fixmap_64.h |    5 +++++
 2 files changed, 11 insertions(+), 17 deletions(-)

Index: linux-2.6/arch/x86/kernel/acpi/boot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/acpi/boot.c
+++ linux-2.6/arch/x86/kernel/acpi/boot.c
@@ -109,21 +109,6 @@ static u64 acpi_lapic_addr __initdata =
  */
 enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
 
-#ifdef	CONFIG_X86_64
-
-/* rely on all ACPI tables being in the direct mapping */
-char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
-{
-	if (!phys_addr || !size)
-		return NULL;
-
-	if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE)
-		return __va(phys_addr);
-
-	return NULL;
-}
-
-#else
 
 /*
  * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
@@ -142,11 +127,15 @@ char *__init __acpi_map_table(unsigned l
 	unsigned long base, offset, mapped_size;
 	int idx;
 
-	if (phys + size < 8 * 1024 * 1024)
+	if (!phys || !size)
+		return NULL;
+
+	if (phys+size <= (max_pfn_mapped << PAGE_SHIFT))
 		return __va(phys);
 
 	offset = phys & (PAGE_SIZE - 1);
 	mapped_size = PAGE_SIZE - offset;
+	clear_fixmap(FIX_ACPI_END);
 	set_fixmap(FIX_ACPI_END, phys);
 	base = fix_to_virt(FIX_ACPI_END);
 
@@ -158,13 +147,13 @@ char *__init __acpi_map_table(unsigned l
 		if (--idx < FIX_ACPI_BEGIN)
 			return NULL;	/* cannot handle this */
 		phys += PAGE_SIZE;
+		clear_fixmap(idx);
 		set_fixmap(idx, phys);
 		mapped_size += PAGE_SIZE;
 	}
 
 	return ((unsigned char *)base + offset);
 }
-#endif
 
 #ifdef CONFIG_PCI_MMCONFIG
 /* The physical address of the MMCONFIG aperture.  Set from ACPI tables. */
Index: linux-2.6/include/asm-x86/fixmap_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/fixmap_64.h
+++ linux-2.6/include/asm-x86/fixmap_64.h
@@ -12,6 +12,7 @@
 #define _ASM_FIXMAP_64_H
 
 #include <linux/kernel.h>
+#include <asm/acpi.h>
 #include <asm/apicdef.h>
 #include <asm/page.h>
 #include <asm/vsyscall.h>
@@ -49,6 +50,10 @@ enum fixed_addresses {
 #ifdef CONFIG_PARAVIRT
 	FIX_PARAVIRT_BOOTMAP,
 #endif
+#ifdef CONFIG_ACPI
+	FIX_ACPI_BEGIN,
+	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+#endif
 #ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
 	FIX_OHCI1394_BASE,
 #endif

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: make e820_end return end_of_ram again for 64bit
  2008-07-10  3:16     ` [PATCH] x86: merge __acpi_map_table Yinghai Lu
@ 2008-07-10  3:17       ` Yinghai Lu
  2008-07-10  7:00         ` Ingo Molnar
                           ` (3 more replies)
  2008-07-10  6:54       ` [PATCH] x86: merge __acpi_map_table Ingo Molnar
  1 sibling, 4 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-10  3:17 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, LKML; +Cc: Suresh Siddha


even for 64bit system with less 4G, we can use fixmap to handle
acpi sit near end of ram

change e820_end to e820_end_of_ram again?
or e820_ram_pfn?

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/e820.c |    2 --
 1 file changed, 2 deletions(-)

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -1066,10 +1066,8 @@ unsigned long __init e820_end(void)
 		struct e820entry *ei = &e820.map[i];
 		unsigned long end_pfn;
 
-#ifdef CONFIG_X86_32
 		if (ei->type != E820_RAM)
 			continue;
-#endif
 
 		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
 		if (end_pfn > last_pfn)

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-10  3:15   ` [PATCh] x86: overmapped fix when 4K pages on tail " Yinghai Lu
  2008-07-10  3:16     ` [PATCH] x86: merge __acpi_map_table Yinghai Lu
@ 2008-07-10  6:53     ` Ingo Molnar
  2008-07-10  6:57       ` Yinghai Lu
  2008-07-10 14:16     ` Arjan van de Ven
  2 siblings, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-10  6:53 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML,
	Jeremy Fitzhardinge


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> fix phys_pmd_init to make sure not to return big value than end.
> 
> also print out range split:1G/2M/4K

applied to tip/x86/core, thanks Yinghai. Lets hope it all goes well, 
this is a rather scary change - but it's also fairly clean.

perhaps add a comment to the limit of 5 of mapping ranges - to point out 
that the number of mapping ranges depends on our programming, not on any 
external factor. I.e. if anyone adds a new mapping range to the kernel 
for any purpose, it must be extended - but otherwise it cannot run out 
due to new hardware.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: merge __acpi_map_table
  2008-07-10  3:16     ` [PATCH] x86: merge __acpi_map_table Yinghai Lu
  2008-07-10  3:17       ` [PATCH] x86: make e820_end return end_of_ram again for 64bit Yinghai Lu
@ 2008-07-10  6:54       ` Ingo Molnar
  1 sibling, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-10  6:54 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML,
	Jeremy Fitzhardinge


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> and let 64bit fallback to use fixmap too

applied to tip/x86/core, thanks Yinghai.

> -#ifdef	CONFIG_X86_64
> -
> -/* rely on all ACPI tables being in the direct mapping */

nice cleanup.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-10  6:53     ` [PATCh] x86: overmapped fix when 4K pages on tail - 64bit Ingo Molnar
@ 2008-07-10  6:57       ` Yinghai Lu
  2008-07-10  7:20         ` Ingo Molnar
  0 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-10  6:57 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML,
	Jeremy Fitzhardinge

On Wed, Jul 9, 2008 at 11:53 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> fix phys_pmd_init to make sure not to return big value than end.
>>
>> also print out range split:1G/2M/4K
>
> applied to tip/x86/core, thanks Yinghai. Lets hope it all goes well,
> this is a rather scary change - but it's also fairly clean.
>
> perhaps add a comment to the limit of 5 of mapping ranges - to point out
> that the number of mapping ranges depends on our programming, not on any
> external factor. I.e. if anyone adds a new mapping range to the kernel
> for any purpose, it must be extended - but otherwise it cannot run out
> due to new hardware.

4k, 2M, 1G, 2M, 4k

some day will get 512g page?

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: make e820_end return end_of_ram again for 64bit
  2008-07-10  3:17       ` [PATCH] x86: make e820_end return end_of_ram again for 64bit Yinghai Lu
@ 2008-07-10  7:00         ` Ingo Molnar
  2008-07-10 11:17         ` [PATCH] x86: e820 remove the range instead of update it to reserved Yinghai Lu
                           ` (2 subsequent siblings)
  3 siblings, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-10  7:00 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, LKML, Suresh Siddha,
	Jeremy Fitzhardinge


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> even for 64bit system with less 4G, we can use fixmap to handle acpi 
> sit near end of ram

ok - applied to tip/x86/core - lets see whether Suresh's system now 
works fine with this change.

> change e820_end to e820_end_of_ram again?
> or e820_ram_pfn?

i'd suggest e820_end_of_ram_pfn().

we now have symmetric behavior on 32-bit and 64-bit, e820_end() returns 
the true end of RAM. (64-bit used to return the last PFN mentioned in 
the e820 map - i.e. ACPI tables and other end of RAM items could have 
been included)

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-10  6:57       ` Yinghai Lu
@ 2008-07-10  7:20         ` Ingo Molnar
  2008-07-10  7:32           ` Yinghai Lu
  0 siblings, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-10  7:20 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML,
	Jeremy Fitzhardinge, Arjan van de Ven


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> > that the number of mapping ranges depends on our programming, not on 
> > any external factor. I.e. if anyone adds a new mapping range to the 
> > kernel for any purpose, it must be extended - but otherwise it 
> > cannot run out due to new hardware.
> 
> 4k, 2M, 1G, 2M, 4k
> 
> some day will get 512g page?

i'd not be surprised to see that in ~10 years. Then we'll have to extend 
the array to 7 entries ;-)

btw., i have a weird system:

[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
[    0.000000]  BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 000000003ed93000 (usable)
[    0.000000]  BIOS-e820: 000000003ed93000 - 000000003ee4d000 (ACPI NVS)
[    0.000000]  BIOS-e820: 000000003ee4d000 - 000000003fea2000 (usable)
[    0.000000]  BIOS-e820: 000000003fea2000 - 000000003fee9000 (ACPI NVS)
[    0.000000]  BIOS-e820: 000000003fee9000 - 000000003feed000 (usable)
[    0.000000]  BIOS-e820: 000000003feed000 - 000000003feff000 (ACPI data)
[    0.000000]  BIOS-e820: 000000003feff000 - 000000003ff00000 (usable)

look at the RAM splitup:

  640K + BIOS-hole + ~1GB + acpi + 17MB + acpi + 16K + acpi + 4K

and the end of it is not 1024 MB but 1023 MB.

so the _best_ mapping strategy would probably be to do 2MB granular 
mapping up to 1GB, i.e. to 'overmap' into the end of RAM. But we also 
have to make sure that we have no PCI resources or weird chipset 
resources in the final 1MB that could hurt us with PAT, aliasing-wise. 

Since i'm not sure we can really ensure sanity on that level, i guess 
your solution to precisely map everything without overmapping is our 
best choice. Thus sane hw with such end of RAM mappings:

 BIOS-e820: 0000000100000000 - 0000000120000000 (usable)

and another one with:

 BIOS-e820: 0000000100000000 - 0000000830000000 (usable)

... would be slightly faster (because it would use 2MB TLBs at the end 
of kernel RAM, instead of broken-up 4K TLBs)

perhaps we could also have a config and boot option that would sanitize 
the e820 map to just ignore all non-2MB granular RAM. Losing 1-2MB of 
RAM is not an issue on a 32GB system.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-10  7:20         ` Ingo Molnar
@ 2008-07-10  7:32           ` Yinghai Lu
  0 siblings, 0 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-10  7:32 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML,
	Jeremy Fitzhardinge, Arjan van de Ven

On Thu, Jul 10, 2008 at 12:20 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> > that the number of mapping ranges depends on our programming, not on
>> > any external factor. I.e. if anyone adds a new mapping range to the
>> > kernel for any purpose, it must be extended - but otherwise it
>> > cannot run out due to new hardware.
>>
>> 4k, 2M, 1G, 2M, 4k
>>
>> some day will get 512g page?
>
> i'd not be surprised to see that in ~10 years. Then we'll have to extend
> the array to 7 entries ;-)
>
> btw., i have a weird system:
>
> [    0.000000] BIOS-provided physical RAM map:
> [    0.000000]  BIOS-e820: 0000000000000000 - 000000000009fc00 (usable)
> [    0.000000]  BIOS-e820: 000000000009fc00 - 00000000000a0000 (reserved)
> [    0.000000]  BIOS-e820: 00000000000e0000 - 0000000000100000 (reserved)
> [    0.000000]  BIOS-e820: 0000000000100000 - 000000003ed93000 (usable)
> [    0.000000]  BIOS-e820: 000000003ed93000 - 000000003ee4d000 (ACPI NVS)
> [    0.000000]  BIOS-e820: 000000003ee4d000 - 000000003fea2000 (usable)
> [    0.000000]  BIOS-e820: 000000003fea2000 - 000000003fee9000 (ACPI NVS)
> [    0.000000]  BIOS-e820: 000000003fee9000 - 000000003feed000 (usable)
> [    0.000000]  BIOS-e820: 000000003feed000 - 000000003feff000 (ACPI data)
> [    0.000000]  BIOS-e820: 000000003feff000 - 000000003ff00000 (usable)
>
> look at the RAM splitup:
>
>  640K + BIOS-hole + ~1GB + acpi + 17MB + acpi + 16K + acpi + 4K
>
> and the end of it is not 1024 MB but 1023 MB.

what were those BIOS engineer doing?

>
> so the _best_ mapping strategy would probably be to do 2MB granular
> mapping up to 1GB, i.e. to 'overmap' into the end of RAM. But we also
> have to make sure that we have no PCI resources or weird chipset
> resources in the final 1MB that could hurt us with PAT, aliasing-wise.
>
> Since i'm not sure we can really ensure sanity on that level, i guess
> your solution to precisely map everything without overmapping is our
> best choice. Thus sane hw with such end of RAM mappings:
>
>  BIOS-e820: 0000000100000000 - 0000000120000000 (usable)
>
> and another one with:
>
>  BIOS-e820: 0000000100000000 - 0000000830000000 (usable)
>
> ... would be slightly faster (because it would use 2MB TLBs at the end
> of kernel RAM, instead of broken-up 4K TLBs)
>
> perhaps we could also have a config and boot option that would sanitize
> the e820 map to just ignore all non-2MB granular RAM. Losing 1-2MB of
> RAM is not an issue on a 32GB system.

change left over to E820_RESERVED,

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: e820 remove the range instead of update it to reserved
  2008-07-10  3:17       ` [PATCH] x86: make e820_end return end_of_ram again for 64bit Yinghai Lu
  2008-07-10  7:00         ` Ingo Molnar
@ 2008-07-10 11:17         ` Yinghai Lu
  2008-07-11  8:20           ` Ingo Molnar
  2008-07-11  3:36         ` [PATCH] x86: save slit Yinghai Lu
  2008-07-11  3:38         ` [PATCH] x86: introduce max_low_pfn_mapped for 64bit Yinghai Lu
  3 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-10 11:17 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: LKML, Bernhard Walle


also let mem= to print out modified e820 map too

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/e820.c |   11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -1165,6 +1165,8 @@ static void early_panic(char *msg)
 	panic(msg);
 }
 
+static int userdef __initdata;
+
 /* "mem=nopentium" disables the 4MB page tables. */
 static int __init parse_memopt(char *p)
 {
@@ -1180,17 +1182,15 @@ static int __init parse_memopt(char *p)
 	}
 #endif
 
+	userdef = 1;
 	mem_size = memparse(p, &p);
 	end_user_pfn = mem_size>>PAGE_SHIFT;
-	e820_update_range(mem_size, ULLONG_MAX - mem_size,
-		E820_RAM, E820_RESERVED);
+	e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
 
 	return 0;
 }
 early_param("mem", parse_memopt);
 
-static int userdef __initdata;
-
 static int __init parse_memmap_opt(char *p)
 {
 	char *oldp;
@@ -1230,8 +1230,7 @@ static int __init parse_memmap_opt(char
 		e820_add_region(start_at, mem_size, E820_RESERVED);
 	} else {
 		end_user_pfn = (mem_size >> PAGE_SHIFT);
-		e820_update_range(mem_size, ULLONG_MAX - mem_size,
-			E820_RAM, E820_RESERVED);
+		e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
 	}
 	return *p == '\0' ? 0 : -EINVAL;
 }

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-10  3:15   ` [PATCh] x86: overmapped fix when 4K pages on tail " Yinghai Lu
  2008-07-10  3:16     ` [PATCH] x86: merge __acpi_map_table Yinghai Lu
  2008-07-10  6:53     ` [PATCh] x86: overmapped fix when 4K pages on tail - 64bit Ingo Molnar
@ 2008-07-10 14:16     ` Arjan van de Ven
  2008-07-13 14:57       ` Andi Kleen
  2 siblings, 1 reply; 84+ messages in thread
From: Arjan van de Ven @ 2008-07-10 14:16 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML

On Wed, 9 Jul 2008 20:15:02 -0700
Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> 
> 
> fix phys_pmd_init to make sure not to return big value than end.
> 
> also print out range split:1G/2M/4K
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>


Making an accurate mapping solves a lot of potentially nasty/tricky
corner cases, so I like the approach

Acked-by: Arjan van de Ven <arjan@linux.intel.com>

-- 
If you want to reach me at my work email, use arjan@linux.intel.com
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: save slit
  2008-07-10  3:17       ` [PATCH] x86: make e820_end return end_of_ram again for 64bit Yinghai Lu
  2008-07-10  7:00         ` Ingo Molnar
  2008-07-10 11:17         ` [PATCH] x86: e820 remove the range instead of update it to reserved Yinghai Lu
@ 2008-07-11  3:36         ` Yinghai Lu
  2008-07-11  8:22           ` Ingo Molnar
  2008-07-11  3:38         ` [PATCH] x86: introduce max_low_pfn_mapped for 64bit Yinghai Lu
  3 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-11  3:36 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


in case we are using fixmap to read it. and that fixmap could be clearred by others.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/mm/srat_64.c |   14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

Index: linux-2.6/arch/x86/mm/srat_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/srat_64.c
+++ linux-2.6/arch/x86/mm/srat_64.c
@@ -100,7 +100,19 @@ static __init inline int srat_disabled(v
 /* Callback for SLIT parsing */
 void __init acpi_numa_slit_init(struct acpi_table_slit *slit)
 {
-	acpi_slit = slit;
+	unsigned length;
+	unsigned long phys;
+
+	length = slit->header.length;
+	phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length,
+		 PAGE_SIZE);
+
+	if (phys == -1L)
+		panic(" Can not save slit!\n");
+
+	acpi_slit = __va(phys);
+	memcpy(acpi_slit, slit, length);
+	reserve_early(phys, phys + length, "ACPI SLIT");
 }
 
 /* Callback for Proximity Domain -> LAPIC mapping */

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: introduce max_low_pfn_mapped for 64bit
  2008-07-10  3:17       ` [PATCH] x86: make e820_end return end_of_ram again for 64bit Yinghai Lu
                           ` (2 preceding siblings ...)
  2008-07-11  3:36         ` [PATCH] x86: save slit Yinghai Lu
@ 2008-07-11  3:38         ` Yinghai Lu
  2008-07-11  8:26           ` Ingo Molnar
                             ` (4 more replies)
  3 siblings, 5 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-11  3:38 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


when 4g more memory installed, don't map big hole below 4g.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/acpi/boot.c  |    2 +-
 arch/x86/kernel/cpu/amd_64.c |   10 +++++++---
 arch/x86/kernel/e820.c       |   23 ++++++++++++++++++++---
 arch/x86/kernel/efi.c        |    2 +-
 arch/x86/kernel/setup.c      |   22 ++++++++++++++++++----
 arch/x86/mm/init_32.c        |    1 +
 arch/x86/mm/init_64.c        |    1 +
 arch/x86/mm/pageattr.c       |   19 +++++++++++++++++--
 arch/x86/mm/pat.c            |    3 ++-
 arch/x86/pci/i386.c          |    4 +++-
 include/asm-x86/e820.h       |    3 ++-
 include/asm-x86/page.h       |    1 +
 12 files changed, 74 insertions(+), 17 deletions(-)

Index: linux-2.6/arch/x86/kernel/acpi/boot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/acpi/boot.c
+++ linux-2.6/arch/x86/kernel/acpi/boot.c
@@ -130,7 +130,7 @@ char *__init __acpi_map_table(unsigned l
 	if (!phys || !size)
 		return NULL;
 
-	if (phys+size <= (max_pfn_mapped << PAGE_SHIFT))
+	if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
 		return __va(phys);
 
 	offset = phys & (PAGE_SIZE - 1);
Index: linux-2.6/arch/x86/kernel/cpu/amd_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/amd_64.c
+++ linux-2.6/arch/x86/kernel/cpu/amd_64.c
@@ -199,10 +199,14 @@ static void __cpuinit init_amd(struct cp
 		 * Don't do it for gbpages because there seems very little
 		 * benefit in doing so.
 		 */
-		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
-		    (tseg >> PMD_SHIFT) <
-			(max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
+		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+		    if ((tseg>>PMD_SHIFT) <
+				(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
+			((tseg>>PMD_SHIFT) <
+				(max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
+			 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
 			set_memory_4k((unsigned long)__va(tseg), 1);
+		}
 	}
 }
 
Index: linux-2.6/arch/x86/kernel/e820.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820.c
+++ linux-2.6/arch/x86/kernel/e820.c
@@ -1056,7 +1056,7 @@ unsigned long __initdata end_user_pfn =
 /*
  * Find the highest page frame number we have available
  */
-unsigned long __init e820_end(void)
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
 {
 	int i;
 	unsigned long last_pfn = 0;
@@ -1064,12 +1064,21 @@ unsigned long __init e820_end(void)
 
 	for (i = 0; i < e820.nr_map; i++) {
 		struct e820entry *ei = &e820.map[i];
+		unsigned long start_pfn;
 		unsigned long end_pfn;
 
-		if (ei->type != E820_RAM)
+		if (ei->type != type)
 			continue;
 
+		start_pfn = ei->addr >> PAGE_SHIFT;
 		end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
+
+		if (start_pfn >= limit_pfn)
+			continue;
+		if (end_pfn > limit_pfn) {
+			last_pfn = limit_pfn;
+			break;
+		}
 		if (end_pfn > last_pfn)
 			last_pfn = end_pfn;
 	}
@@ -1083,7 +1092,15 @@ unsigned long __init e820_end(void)
 			 last_pfn, max_arch_pfn);
 	return last_pfn;
 }
+unsigned long __init e820_end_of_ram_pfn(void)
+{
+	return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
+}
 
+unsigned long __init e820_end_of_low_ram_pfn(void)
+{
+	return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
+}
 /*
  * Finds an active region in the address range from start_pfn to last_pfn and
  * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
@@ -1206,7 +1223,7 @@ static int __init parse_memmap_opt(char
 		 * the real mem size before original memory map is
 		 * reset.
 		 */
-		saved_max_pfn = e820_end();
+		saved_max_pfn = e820_end_of_ram_pfn();
 #endif
 		e820.nr_map = 0;
 		userdef = 1;
Index: linux-2.6/arch/x86/kernel/efi.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/efi.c
+++ linux-2.6/arch/x86/kernel/efi.c
@@ -473,7 +473,7 @@ void __init efi_enter_virtual_mode(void)
 		size = md->num_pages << EFI_PAGE_SHIFT;
 		end = md->phys_addr + size;
 
-		if (PFN_UP(end) <= max_pfn_mapped)
+		if (PFN_UP(end) <= max_low_pfn_mapped)
 			va = __va(md->phys_addr);
 		else
 			va = efi_ioremap(md->phys_addr, size);
Index: linux-2.6/arch/x86/kernel/setup.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup.c
+++ linux-2.6/arch/x86/kernel/setup.c
@@ -719,14 +719,14 @@ void __init setup_arch(char **cmdline_p)
 	 * partially used pages are not usable - thus
 	 * we are rounding upwards:
 	 */
-	max_pfn = e820_end();
+	max_pfn = e820_end_of_ram_pfn();
 
 	/* preallocate 4k for mptable mpc */
 	early_reserve_e820_mpc_new();
 	/* update e820 for memory not covered by WB MTRRs */
 	mtrr_bp_init();
 	if (mtrr_trim_uncached_memory(max_pfn))
-		max_pfn = e820_end();
+		max_pfn = e820_end_of_ram_pfn();
 
 #ifdef CONFIG_X86_32
 	/* max_low_pfn get updated here */
@@ -738,12 +738,26 @@ void __init setup_arch(char **cmdline_p)
 
 	/* How many end-of-memory variables you have, grandma! */
 	/* need this before calling reserve_initrd */
-	max_low_pfn = max_pfn;
+	if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
+		max_low_pfn = e820_end_of_low_ram_pfn();
+	else
+		max_low_pfn = max_pfn;
+
 	high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
 #endif
 
 	/* max_pfn_mapped is updated here */
-	max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT));
+	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+	max_pfn_mapped = max_low_pfn_mapped;
+
+#ifdef CONFIG_X86_64
+	if (max_pfn > max_low_pfn) {
+		max_pfn_mapped = init_memory_mapping(1UL<<32,
+						     max_pfn<<PAGE_SHIFT);
+		/* can we preseve max_low_pfn ?*/
+		max_low_pfn = max_pfn;
+	}
+#endif
 
 	/*
 	 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
Index: linux-2.6/arch/x86/mm/init_32.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_32.c
+++ linux-2.6/arch/x86/mm/init_32.c
@@ -50,6 +50,7 @@
 
 unsigned int __VMALLOC_RESERVE = 128 << 20;
 
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -53,6 +53,7 @@
  * The direct mapping extends to max_pfn_mapped, so that we can directly access
  * apertures, ACPI and other tables without having to play with fixmaps.
  */
+unsigned long max_low_pfn_mapped;
 unsigned long max_pfn_mapped;
 
 static unsigned long dma_reserve __initdata;
Index: linux-2.6/arch/x86/mm/pageattr.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/pageattr.c
+++ linux-2.6/arch/x86/mm/pageattr.c
@@ -537,8 +537,14 @@ static int split_large_page(pte_t *kpte,
 		set_pte(&pbase[i], pfn_pte(pfn, ref_prot));
 
 	if (address >= (unsigned long)__va(0) &&
+		address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
+		split_page_count(level);
+
+#ifdef CONFIG_X86_64
+	if (address >= (unsigned long)__va(1UL<<32) &&
 		address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
 		split_page_count(level);
+#endif
 
 	/*
 	 * Install the new, split up pagetable. Important details here:
@@ -655,12 +661,21 @@ static int cpa_process_alias(struct cpa_
 	if (cpa->pfn > max_pfn_mapped)
 		return 0;
 
+#ifdef CONFIG_X86_64
+	if (cpa->pfn > max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
+		return 0;
+#endif
 	/*
 	 * No need to redo, when the primary call touched the direct
 	 * mapping already:
 	 */
-	if (!within(cpa->vaddr, PAGE_OFFSET,
-		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
+	if (!(within(cpa->vaddr, PAGE_OFFSET,
+		    PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
+#ifdef CONFIG_X86_64
+		|| within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
+		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
+#endif
+	)) {
 
 		alias_cpa = *cpa;
 		alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
Index: linux-2.6/arch/x86/mm/pat.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/pat.c
+++ linux-2.6/arch/x86/mm/pat.c
@@ -449,7 +449,8 @@ int phys_mem_access_prot_allowed(struct
 	if (retval < 0)
 		return 0;
 
-	if (pfn <= max_pfn_mapped &&
+	if (((pfn <= max_low_pfn_mapped) ||
+	     (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
 	    ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
 		free_memtype(offset, offset + size);
 		printk(KERN_INFO
Index: linux-2.6/arch/x86/pci/i386.c
===================================================================
--- linux-2.6.orig/arch/x86/pci/i386.c
+++ linux-2.6/arch/x86/pci/i386.c
@@ -334,7 +334,9 @@ int pci_mmap_page_range(struct pci_dev *
 		flags = new_flags;
 	}
 
-	if (vma->vm_pgoff <= max_pfn_mapped &&
+	if (((vma->vm_pgoff <= max_low_pfn_mapped) ||
+	     (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
+	      vma->vm_pgoff <= max_pfn_mapped)) &&
 	    ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
 		free_memtype(addr, addr + len);
 		return -EINVAL;
Index: linux-2.6/include/asm-x86/e820.h
===================================================================
--- linux-2.6.orig/include/asm-x86/e820.h
+++ linux-2.6/include/asm-x86/e820.h
@@ -99,7 +99,8 @@ extern void free_early(u64 start, u64 en
 extern void early_res_to_bootmem(u64 start, u64 end);
 extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
 
-extern unsigned long e820_end(void);
+extern unsigned long e820_end_of_ram_pfn(void);
+extern unsigned long e820_end_of_low_ram_pfn(void);
 extern int e820_find_active_region(const struct e820entry *ei,
 				  unsigned long start_pfn,
 				  unsigned long last_pfn,
Index: linux-2.6/include/asm-x86/page.h
===================================================================
--- linux-2.6.orig/include/asm-x86/page.h
+++ linux-2.6/include/asm-x86/page.h
@@ -61,6 +61,7 @@ extern void map_devmem(unsigned long pfn
 extern void unmap_devmem(unsigned long pfn, unsigned long size,
 			 pgprot_t vma_prot);
 
+extern unsigned long max_low_pfn_mapped;
 extern unsigned long max_pfn_mapped;
 
 struct page;

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: e820 remove the range instead of update it to reserved
  2008-07-10 11:17         ` [PATCH] x86: e820 remove the range instead of update it to reserved Yinghai Lu
@ 2008-07-11  8:20           ` Ingo Molnar
  0 siblings, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-11  8:20 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, LKML, Bernhard Walle


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> also let mem= to print out modified e820 map too
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

applied to tip/x86/core - thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: save slit
  2008-07-11  3:36         ` [PATCH] x86: save slit Yinghai Lu
@ 2008-07-11  8:22           ` Ingo Molnar
  0 siblings, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-11  8:22 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> in case we are using fixmap to read it. and that fixmap could be 
> clearred by others.
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

applied to tip/x86/core, thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: introduce max_low_pfn_mapped for 64bit
  2008-07-11  3:38         ` [PATCH] x86: introduce max_low_pfn_mapped for 64bit Yinghai Lu
@ 2008-07-11  8:26           ` Ingo Molnar
  2008-07-11  8:39             ` Yinghai Lu
  2008-07-12  1:41           ` [PATCH] x86: let 32bit use apic_ops too Yinghai Lu
                             ` (3 subsequent siblings)
  4 siblings, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-11  8:26 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> when 4g more memory installed, don't map big hole below 4g.

applied to tip/x86/core, thanks Yinghai.

This is a scarier change - no way to do it in smaller steps?

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: introduce max_low_pfn_mapped for 64bit
  2008-07-11  8:26           ` Ingo Molnar
@ 2008-07-11  8:39             ` Yinghai Lu
  2008-07-11  8:51               ` Ingo Molnar
  0 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-11  8:39 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML

On Fri, Jul 11, 2008 at 1:26 AM, Ingo Molnar <mingo@elte.hu> wrote:
>
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> when 4g more memory installed, don't map big hole below 4g.
>
> applied to tip/x86/core, thanks Yinghai.
>
> This is a scarier change - no way to do it in smaller steps?

1. e820_end ==> e820_end_of_ram_pfn :: totally safe...
2. others

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: introduce max_low_pfn_mapped for 64bit
  2008-07-11  8:39             ` Yinghai Lu
@ 2008-07-11  8:51               ` Ingo Molnar
  0 siblings, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-11  8:51 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> On Fri, Jul 11, 2008 at 1:26 AM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> > * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> >
> >> when 4g more memory installed, don't map big hole below 4g.
> >
> > applied to tip/x86/core, thanks Yinghai.
> >
> > This is a scarier change - no way to do it in smaller steps?
> 
> 1. e820_end ==> e820_end_of_ram_pfn :: totally safe...
> 2. others

yeah. I'd also split out the introduction of max_low_pfn_mapped into a 
separate, safe patch as well. I.e. first patch just introduces it, 
second patch makes use of it.

the #ifdef 64-bit assymetry looks a bit ugly too.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: let 32bit use apic_ops too
  2008-07-11  3:38         ` [PATCH] x86: introduce max_low_pfn_mapped for 64bit Yinghai Lu
  2008-07-11  8:26           ` Ingo Molnar
@ 2008-07-12  1:41           ` Yinghai Lu
  2008-07-12  1:43             ` [PATCH] x86: mach_apicdef.h need to include before smp.h Yinghai Lu
                               ` (3 more replies)
  2008-07-12 21:30           ` [PATCH] x86: max_low_pfn_mapped fix #1 Yinghai Lu
                             ` (2 subsequent siblings)
  4 siblings, 4 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-12  1:41 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/apic_32.c |   39 +++++++++++++++++++++++++++++++--------
 include/asm-x86/apic.h    |   13 ++-----------
 2 files changed, 33 insertions(+), 19 deletions(-)

Index: linux-2.6/arch/x86/kernel/apic_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic_32.c
+++ linux-2.6/arch/x86/kernel/apic_32.c
@@ -145,19 +145,13 @@ static int modern_apic(void)
 	return lapic_get_version() >= 0x14;
 }
 
-void apic_icr_write(u32 low, u32 id)
-{
-	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id));
-	apic_write_around(APIC_ICR, low);
-}
-
-void apic_wait_icr_idle(void)
+void xapic_wait_icr_idle(void)
 {
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
 		cpu_relax();
 }
 
-u32 safe_apic_wait_icr_idle(void)
+u32 safe_xapic_wait_icr_idle(void)
 {
 	u32 send_status;
 	int timeout;
@@ -173,6 +167,35 @@ u32 safe_apic_wait_icr_idle(void)
 	return send_status;
 }
 
+void xapic_icr_write(u32 low, u32 id)
+{
+	apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+	apic_write_around(APIC_ICR, low);
+}
+
+u64 xapic_icr_read(void)
+{
+	u32 icr1, icr2;
+
+	icr2 = apic_read(APIC_ICR2);
+	icr1 = apic_read(APIC_ICR);
+
+	return icr1 | ((u64)icr2 << 32);
+}
+
+static struct apic_ops xapic_ops = {
+	.read = native_apic_mem_read,
+	.write = native_apic_mem_write,
+	.write_atomic = native_apic_mem_write_atomic,
+	.icr_read = xapic_icr_read,
+	.icr_write = xapic_icr_write,
+	.wait_icr_idle = xapic_wait_icr_idle,
+	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+};
+
+struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+EXPORT_SYMBOL_GPL(apic_ops);
+
 /**
  * enable_NMI_through_LVT0 - enable NMI through local vector table 0
  */
Index: linux-2.6/include/asm-x86/apic.h
===================================================================
--- linux-2.6.orig/include/asm-x86/apic.h
+++ linux-2.6/include/asm-x86/apic.h
@@ -49,11 +49,6 @@ extern int disable_apic;
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
-#ifndef CONFIG_X86_64
-#define apic_write native_apic_mem_write
-#define apic_write_atomic native_apic_mem_write_atomic
-#define apic_read native_apic_mem_read
-#endif
 #define setup_boot_clock setup_boot_APIC_clock
 #define setup_secondary_clock setup_secondary_APIC_clock
 #endif
@@ -95,16 +90,13 @@ static inline u32 native_apic_msr_read(u
 	return low;
 }
 
-#ifdef CONFIG_X86_32
-extern void apic_wait_icr_idle(void);
-extern u32 safe_apic_wait_icr_idle(void);
-extern void apic_icr_write(u32 low, u32 id);
-#else
+#ifndef CONFIG_X86_32
 extern int x2apic, x2apic_preenabled;
 extern void check_x2apic(void);
 extern void enable_x2apic(void);
 extern void enable_IR_x2apic(void);
 extern void x2apic_icr_write(u32 low, u32 id);
+#endif
 
 struct apic_ops {
 	u32 (*read)(u32 reg);
@@ -125,7 +117,6 @@ extern struct apic_ops *apic_ops;
 #define apic_icr_write (apic_ops->icr_write)
 #define apic_wait_icr_idle (apic_ops->wait_icr_idle)
 #define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
-#endif
 
 extern int get_physical_broadcast(void);

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: mach_apicdef.h need to include before smp.h
  2008-07-12  1:41           ` [PATCH] x86: let 32bit use apic_ops too Yinghai Lu
@ 2008-07-12  1:43             ` Yinghai Lu
  2008-07-12  1:44               ` [PATCH] x86: make read_apic_id return final apicid Yinghai Lu
  2008-07-13  1:08             ` [PATCH] x86: let 32bit use apic_ops too Suresh Siddha
                               ` (2 subsequent siblings)
  3 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-12  1:43 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


smp.h internal has include, so need to include that at first
when genericarch use them need to have different apicdef.h

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/mach-generic/bigsmp.c |    5 ++---
 arch/x86/mach-generic/es7000.c |    3 +--
 arch/x86/mach-generic/numaq.c  |    4 ++--
 arch/x86/mach-generic/summit.c |    5 ++---
 4 files changed, 7 insertions(+), 10 deletions(-)

Index: linux-2.6/arch/x86/mach-generic/bigsmp.c
===================================================================
--- linux-2.6.orig/arch/x86/mach-generic/bigsmp.c
+++ linux-2.6/arch/x86/mach-generic/bigsmp.c
@@ -5,17 +5,16 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
-#include <linux/smp.h>
 #include <linux/init.h>
 #include <linux/dmi.h>
-#include <asm/mach-bigsmp/mach_apic.h>
 #include <asm/mach-bigsmp/mach_apicdef.h>
+#include <linux/smp.h>
+#include <asm/mach-bigsmp/mach_apic.h>
 #include <asm/mach-bigsmp/mach_ipi.h>
 #include <asm/mach-default/mach_mpparse.h>
 
Index: linux-2.6/arch/x86/mach-generic/es7000.c
===================================================================
--- linux-2.6.orig/arch/x86/mach-generic/es7000.c
+++ linux-2.6/arch/x86/mach-generic/es7000.c
@@ -4,16 +4,15 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/smp.h>
 #include <linux/init.h>
 #include <asm/mach-es7000/mach_apicdef.h>
+#include <linux/smp.h>
 #include <asm/mach-es7000/mach_apic.h>
 #include <asm/mach-es7000/mach_ipi.h>
 #include <asm/mach-es7000/mach_mpparse.h>
Index: linux-2.6/arch/x86/mach-generic/numaq.c
===================================================================
--- linux-2.6.orig/arch/x86/mach-generic/numaq.c
+++ linux-2.6/arch/x86/mach-generic/numaq.c
@@ -4,7 +4,6 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <linux/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
@@ -12,8 +11,9 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 #include <linux/init.h>
-#include <asm/mach-numaq/mach_apic.h>
 #include <asm/mach-numaq/mach_apicdef.h>
+#include <linux/smp.h>
+#include <asm/mach-numaq/mach_apic.h>
 #include <asm/mach-numaq/mach_ipi.h>
 #include <asm/mach-numaq/mach_mpparse.h>
 #include <asm/mach-numaq/mach_wakecpu.h>
Index: linux-2.6/arch/x86/mach-generic/summit.c
===================================================================
--- linux-2.6.orig/arch/x86/mach-generic/summit.c
+++ linux-2.6/arch/x86/mach-generic/summit.c
@@ -4,17 +4,16 @@
 #define APIC_DEFINITION 1
 #include <linux/threads.h>
 #include <linux/cpumask.h>
-#include <asm/smp.h>
 #include <asm/mpspec.h>
 #include <asm/genapic.h>
 #include <asm/fixmap.h>
 #include <asm/apicdef.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
-#include <linux/smp.h>
 #include <linux/init.h>
-#include <asm/mach-summit/mach_apic.h>
 #include <asm/mach-summit/mach_apicdef.h>
+#include <linux/smp.h>
+#include <asm/mach-summit/mach_apic.h>
 #include <asm/mach-summit/mach_ipi.h>
 #include <asm/mach-summit/mach_mpparse.h>
 

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: make read_apic_id return final apicid
  2008-07-12  1:43             ` [PATCH] x86: mach_apicdef.h need to include before smp.h Yinghai Lu
@ 2008-07-12  1:44               ` Yinghai Lu
  2008-07-12  8:01                 ` [PATCH] x86: make 64bit have get_apic_id Yinghai Lu
  2008-07-13  1:19                 ` [PATCH] x86: make read_apic_id return final apicid Suresh Siddha
  0 siblings, 2 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-12  1:44 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


also remove GET_APIC_ID when read_apic_id is used.

need to apply after
	[PATCH] x86: mach_apicdef.h need to include before smp.h


Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/acpi/boot.c                 |    2 +-
 arch/x86/kernel/apic_32.c                   |    4 ++--
 arch/x86/kernel/apic_64.c                   |    6 +++---
 arch/x86/kernel/genapic_flat_64.c           |    2 +-
 arch/x86/kernel/io_apic_32.c                |    5 ++---
 arch/x86/kernel/io_apic_64.c                |    4 ++--
 arch/x86/kernel/smpboot.c                   |    6 +++---
 include/asm-x86/mach-default/mach_apic.h    |    2 +-
 include/asm-x86/mach-default/mach_apicdef.h |    3 +--
 include/asm-x86/mach-es7000/mach_apic.h     |    2 +-
 include/asm-x86/smp.h                       |   11 ++++++++---
 11 files changed, 25 insertions(+), 22 deletions(-)

Index: linux-2.6/arch/x86/kernel/acpi/boot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/acpi/boot.c
+++ linux-2.6/arch/x86/kernel/acpi/boot.c
@@ -763,7 +763,7 @@ static void __init acpi_register_lapic_a
 
 	set_fixmap_nocache(FIX_APIC_BASE, address);
 	if (boot_cpu_physical_apicid == -1U) {
-		boot_cpu_physical_apicid  = GET_APIC_ID(read_apic_id());
+		boot_cpu_physical_apicid  = read_apic_id();
 #ifdef CONFIG_X86_32
 		apic_version[boot_cpu_physical_apicid] =
 			 GET_APIC_VERSION(apic_read(APIC_LVR));
Index: linux-2.6/arch/x86/kernel/apic_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic_32.c
+++ linux-2.6/arch/x86/kernel/apic_32.c
@@ -1230,7 +1230,7 @@ void __init init_apic_mappings(void)
 	 * default configuration (or the MP table is broken).
 	 */
 	if (boot_cpu_physical_apicid == -1U)
-		boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+		boot_cpu_physical_apicid = read_apic_id();
 
 }
 
@@ -1270,7 +1270,7 @@ int __init APIC_init_uniprocessor(void)
 	 * might be zero if read from MP tables. Get it from LAPIC.
 	 */
 #ifdef CONFIG_CRASH_DUMP
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 #endif
 	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
 
Index: linux-2.6/arch/x86/kernel/apic_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/apic_64.c
+++ linux-2.6/arch/x86/kernel/apic_64.c
@@ -1060,7 +1060,7 @@ void __init early_init_lapic_mapping(voi
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 }
 
 /**
@@ -1069,7 +1069,7 @@ void __init early_init_lapic_mapping(voi
 void __init init_apic_mappings(void)
 {
 	if (x2apic) {
-		boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+		boot_cpu_physical_apicid = read_apic_id();
 		return;
 	}
 
@@ -1092,7 +1092,7 @@ void __init init_apic_mappings(void)
 	 * Fetch the APIC ID of the BSP in case we have a
 	 * default configuration (or the MP table is broken).
 	 */
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 }
 
 /*
Index: linux-2.6/arch/x86/kernel/genapic_flat_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/genapic_flat_64.c
+++ linux-2.6/arch/x86/kernel/genapic_flat_64.c
@@ -101,7 +101,7 @@ static unsigned int read_xapic_id(void)
 {
 	unsigned int id;
 
-	id = GET_XAPIC_ID(apic_read(APIC_ID));
+	id = GET_APIC_ID(apic_read(APIC_ID));
 	return id;
 }
 
Index: linux-2.6/arch/x86/kernel/io_apic_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/io_apic_32.c
+++ linux-2.6/arch/x86/kernel/io_apic_32.c
@@ -1501,7 +1501,7 @@ void /*__init*/ print_local_APIC(void *d
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
 	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v,
-			GET_APIC_ID(read_apic_id()));
+			GET_APIC_ID(v));
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1709,8 +1709,7 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest.physical.physical_dest =
-					GET_APIC_ID(read_apic_id());
+		entry.dest.physical.physical_dest = read_apic_id();
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
Index: linux-2.6/arch/x86/kernel/io_apic_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/io_apic_64.c
+++ linux-2.6/arch/x86/kernel/io_apic_64.c
@@ -1246,7 +1246,7 @@ void __apicdebuginit print_local_APIC(vo
 	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
 		smp_processor_id(), hard_smp_processor_id());
 	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
+	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
 	v = apic_read(APIC_LVR);
 	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
 	ver = GET_APIC_VERSION(v);
@@ -1441,7 +1441,7 @@ void disable_IO_APIC(void)
 		entry.dest_mode       = 0; /* Physical */
 		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
 		entry.vector          = 0;
-		entry.dest          = GET_APIC_ID(read_apic_id());
+		entry.dest            = read_apic_id();
 
 		/*
 		 * Add it to the IO-APIC irq-routing table:
Index: linux-2.6/arch/x86/kernel/smpboot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/smpboot.c
+++ linux-2.6/arch/x86/kernel/smpboot.c
@@ -211,7 +211,7 @@ static void __cpuinit smp_callin(void)
 	/*
 	 * (This works even if the APIC is not enabled.)
 	 */
-	phys_id = GET_APIC_ID(read_apic_id());
+	phys_id = read_apic_id();
 	cpuid = smp_processor_id();
 	if (cpu_isset(cpuid, cpu_callin_map)) {
 		panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -1157,9 +1157,9 @@ void __init native_smp_prepare_cpus(unsi
 	}
 
 	preempt_disable();
-	if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) {
+	if (read_apic_id() != boot_cpu_physical_apicid) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
-		     GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid);
+		     read_apic_id(), boot_cpu_physical_apicid);
 		/* Or can we switch back to PIC here? */
 	}
 	preempt_enable();
Index: linux-2.6/include/asm-x86/mach-default/mach_apic.h
===================================================================
--- linux-2.6.orig/include/asm-x86/mach-default/mach_apic.h
+++ linux-2.6/include/asm-x86/mach-default/mach_apic.h
@@ -56,7 +56,7 @@ static inline void init_apic_ldr(void)
 
 static inline int apic_id_registered(void)
 {
-	return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map);
+	return physid_isset(read_apic_id(), phys_cpu_present_map);
 }
 
 static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
Index: linux-2.6/include/asm-x86/mach-default/mach_apicdef.h
===================================================================
--- linux-2.6.orig/include/asm-x86/mach-default/mach_apicdef.h
+++ linux-2.6/include/asm-x86/mach-default/mach_apicdef.h
@@ -5,9 +5,8 @@
 
 #ifdef CONFIG_X86_64
 #define	APIC_ID_MASK		(0xFFu<<24)
-#define GET_APIC_ID(x)          (x)
+#define GET_APIC_ID(x)		(((x)>>24) & 0xFFu)
 #define	SET_APIC_ID(x)		(((x)<<24))
-#define GET_XAPIC_ID(x)		(((x) >> 24) & 0xFFu)
 #else
 #define		APIC_ID_MASK		(0xF<<24)
 static inline unsigned get_apic_id(unsigned long x) 
Index: linux-2.6/include/asm-x86/mach-es7000/mach_apic.h
===================================================================
--- linux-2.6.orig/include/asm-x86/mach-es7000/mach_apic.h
+++ linux-2.6/include/asm-x86/mach-es7000/mach_apic.h
@@ -141,7 +141,7 @@ static inline void setup_portio_remap(vo
 extern unsigned int boot_cpu_physical_apicid;
 static inline int check_phys_apicid_present(int cpu_physical_apicid)
 {
-	boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
+	boot_cpu_physical_apicid = read_apic_id();
 	return (1);
 }
 
Index: linux-2.6/include/asm-x86/smp.h
===================================================================
--- linux-2.6.orig/include/asm-x86/smp.h
+++ linux-2.6/include/asm-x86/smp.h
@@ -172,9 +172,14 @@ static inline int logical_smp_processor_
 	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
 }
 
+#include <mach_apicdef.h>
 static inline unsigned int read_apic_id(void)
 {
-	return *(u32 *)(APIC_BASE + APIC_ID);
+	unsigned int reg;
+
+	reg = *(u32 *)(APIC_BASE + APIC_ID);
+
+	return GET_APIC_ID(reg);
 }
 #endif
 
@@ -182,11 +187,11 @@ static inline unsigned int read_apic_id(
 # if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
 extern int hard_smp_processor_id(void);
 # else
-#  include <mach_apicdef.h>
+#include <mach_apicdef.h>
 static inline int hard_smp_processor_id(void)
 {
 	/* we don't want to mark this access volatile - bad code generation */
-	return GET_APIC_ID(read_apic_id());
+	return read_apic_id();
 }
 # endif /* APIC_DEFINITION */
 

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: make 64bit have get_apic_id
  2008-07-12  1:44               ` [PATCH] x86: make read_apic_id return final apicid Yinghai Lu
@ 2008-07-12  8:01                 ` Yinghai Lu
  2008-07-13  6:28                   ` Ingo Molnar
  2008-07-13  1:19                 ` [PATCH] x86: make read_apic_id return final apicid Suresh Siddha
  1 sibling, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-12  8:01 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


and read_apic_id become macro
GET_APIC_ID(apic_read(APIC_ID))

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/genapic_flat_64.c           |   26 +++++++++++++++++++++++---
 arch/x86/kernel/genx2apic_cluster.c         |   20 +++++++++++++++++++-
 arch/x86/kernel/genx2apic_phys.c            |   20 +++++++++++++++++++-
 arch/x86/kernel/genx2apic_uv_x.c            |   23 ++++++++++++++++++++---
 include/asm-x86/genapic_64.h                |    4 +++-
 include/asm-x86/mach-default/mach_apic.h    |    2 +-
 include/asm-x86/mach-default/mach_apicdef.h |    6 +++---
 7 files changed, 88 insertions(+), 13 deletions(-)

Index: linux-2.6/arch/x86/kernel/genapic_flat_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/genapic_flat_64.c
+++ linux-2.6/arch/x86/kernel/genapic_flat_64.c
@@ -97,11 +97,27 @@ static void flat_send_IPI_all(int vector
 		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
 }
 
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = (((x)>>24) & 0xFFu);
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = ((id & 0xFFu)<<24);
+	return x;
+}
+
 static unsigned int read_xapic_id(void)
 {
 	unsigned int id;
 
-	id = GET_APIC_ID(apic_read(APIC_ID));
+	id = get_apic_id(apic_read(APIC_ID));
 	return id;
 }
 
@@ -134,7 +150,9 @@ struct genapic apic_flat =  {
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
-	.read_apic_id = read_xapic_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFu<<24),
 };
 
 /*
@@ -200,5 +218,7 @@ struct genapic apic_physflat =  {
 	.send_IPI_self = apic_send_IPI_self,
 	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
-	.read_apic_id = read_xapic_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFu<<24),
 };
Index: linux-2.6/arch/x86/kernel/genx2apic_cluster.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/genx2apic_cluster.c
+++ linux-2.6/arch/x86/kernel/genx2apic_cluster.c
@@ -94,6 +94,22 @@ static unsigned int x2apic_cpu_mask_to_a
 		return BAD_APICID;
 }
 
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = x;
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = id;
+	return x;
+}
+
 static unsigned int x2apic_read_id(void)
 {
 	return apic_read(APIC_ID);
@@ -131,5 +147,7 @@ struct genapic apic_x2apic_cluster = {
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
-	.read_apic_id = x2apic_read_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFFFFFFFu),
 };
Index: linux-2.6/arch/x86/kernel/genx2apic_phys.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/genx2apic_phys.c
+++ linux-2.6/arch/x86/kernel/genx2apic_phys.c
@@ -84,6 +84,22 @@ static unsigned int x2apic_cpu_mask_to_a
 		return BAD_APICID;
 }
 
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = x;
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = id;
+	return x;
+}
+
 static unsigned int x2apic_read_id(void)
 {
 	return apic_read(APIC_ID);
@@ -118,5 +134,7 @@ struct genapic apic_x2apic_phys = {
 	.send_IPI_self = x2apic_send_IPI_self,
 	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,
-	.read_apic_id = x2apic_read_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFFFFFFFu),
 };
Index: linux-2.6/arch/x86/kernel/genx2apic_uv_x.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/genx2apic_uv_x.c
+++ linux-2.6/arch/x86/kernel/genx2apic_uv_x.c
@@ -139,16 +139,31 @@ static unsigned int uv_cpu_mask_to_apici
 		return BAD_APICID;
 }
 
-static unsigned int uv_read_apic_id(void)
+static unsigned int get_apic_id(unsigned long x)
 {
 	unsigned int id;
 
 	WARN_ON(preemptible() && num_online_cpus() > 1);
-	id = apic_read(APIC_ID) | __get_cpu_var(x2apic_extra_bits);
+	id = x | __get_cpu_var(x2apic_extra_bits);
 
 	return id;
 }
 
+static long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	/* maskout x2apic_extra_bits ? */
+	x = id;
+	return x;
+}
+
+static unsigned int uv_read_apic_id(void)
+{
+
+	return get_apic_id(apic_read(APIC_ID));
+}
+
 static unsigned int phys_pkg_id(int index_msb)
 {
 	return uv_read_apic_id() >> index_msb;
@@ -175,7 +190,9 @@ struct genapic apic_x2apic_uv_x = {
 	/* ZZZ.send_IPI_self = uv_send_IPI_self, */
 	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
 	.phys_pkg_id = phys_pkg_id,	/* Fixme ZZZ */
-	.read_apic_id = uv_read_apic_id,
+	.get_apic_id = get_apic_id,
+	.set_apic_id = set_apic_id,
+	.apic_id_mask = (0xFFFFFFFFu),
 };
 
 static __cpuinit void set_x2apic_extra_bits(int pnode)
Index: linux-2.6/include/asm-x86/genapic_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/genapic_64.h
+++ linux-2.6/include/asm-x86/genapic_64.h
@@ -28,7 +28,9 @@ struct genapic {
 	/* */
 	unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
 	unsigned int (*phys_pkg_id)(int index_msb);
-	unsigned int (*read_apic_id)(void);
+	unsigned int (*get_apic_id)(unsigned long x);
+	unsigned long (*set_apic_id)(unsigned int id);
+	unsigned long apic_id_mask;
 };
 
 extern struct genapic *genapic;
Index: linux-2.6/include/asm-x86/mach-default/mach_apic.h
===================================================================
--- linux-2.6.orig/include/asm-x86/mach-default/mach_apic.h
+++ linux-2.6/include/asm-x86/mach-default/mach_apic.h
@@ -30,7 +30,7 @@ static inline cpumask_t target_cpus(void
 #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
 #define phys_pkg_id	(genapic->phys_pkg_id)
 #define vector_allocation_domain    (genapic->vector_allocation_domain)
-#define read_apic_id  (genapic->read_apic_id)
+#define read_apic_id()  (GET_APIC_ID(apic_read(APIC_ID)))
 #define send_IPI_self (genapic->send_IPI_self)
 extern void setup_apic_routing(void);
 #else
Index: linux-2.6/include/asm-x86/mach-default/mach_apicdef.h
===================================================================
--- linux-2.6.orig/include/asm-x86/mach-default/mach_apicdef.h
+++ linux-2.6/include/asm-x86/mach-default/mach_apicdef.h
@@ -4,9 +4,9 @@
 #include <asm/apic.h>
 
 #ifdef CONFIG_X86_64
-#define	APIC_ID_MASK		(0xFFu<<24)
-#define GET_APIC_ID(x)		(((x)>>24) & 0xFFu)
-#define	SET_APIC_ID(x)		(((x)<<24))
+#define	APIC_ID_MASK		(genapic->apic_id_mask)
+#define GET_APIC_ID(x)		(genapic->get_apic_id(x))
+#define	SET_APIC_ID(x)		(genapic->set_apic_id(x))
 #else
 #define		APIC_ID_MASK		(0xF<<24)
 static inline unsigned get_apic_id(unsigned long x) 

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: max_low_pfn_mapped fix #1
  2008-07-11  3:38         ` [PATCH] x86: introduce max_low_pfn_mapped for 64bit Yinghai Lu
  2008-07-11  8:26           ` Ingo Molnar
  2008-07-12  1:41           ` [PATCH] x86: let 32bit use apic_ops too Yinghai Lu
@ 2008-07-12 21:30           ` Yinghai Lu
  2008-07-13  9:45             ` Ingo Molnar
  2008-07-12 21:31           ` [PATCH] x86: max_low_pfn_mapped fix #2 Yinghai Lu
  2008-07-12 21:32           ` [PATCH] x86: max_low_pfn_mapped fix #3 Yinghai Lu
  4 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-12 21:30 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


fix crash on Ingo's big box:
calling  pci_iommu_init+0x0/0x17
PCI-DMA: Disabling AGP.
PCI-DMA: aperture base @ d0000000 size 65536 KB
PCI-DMA: using GART IOMMU.
PCI-DMA: Reserving 64MB of IOMMU area in the AGP aperture
BUG: unable to handle kernel paging request at ffff88000003be88
IP: [<ffffffff8026d377>] __alloc_pages_internal+0xc3/0x3f2
PGD 202063 PUD 206063 PMD 22fc00163 PTE 3b162
Oops: 0000 [1] SMP 
CPU 0 
Modules linked in:
Pid: 1, comm: swapper Not tainted 2.6.26-rc9 #18193
RIP: 0010:[<ffffffff8026d377>]  [<ffffffff8026d377>] __alloc_pages_internal+0xc3/0x3f2
RSP: 0018:ffff88042f08fd40  EFLAGS: 00010246
RAX: 0000000000000000 RBX: 00000000000080d0 RCX: 0000000000000000
RDX: ffff88000003be80 RSI: 0000000000000000 RDI: 00000000000080d0
RBP: 0000000000000000 R08: 0000000000000040 R09: ffff88082f075278
R10: 0000000000000246 R11: 0000000000000002 R12: ffff88000003be80
R13: 0000000000000000 R14: ffff88022f0994e0 R15: 0000000100000000
FS:  0000000000000000(0000) GS:ffffffff806dbb00(0000) knlGS:0000000000000000
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: ffff88000003be88 CR3: 0000000000201000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000

and e820 is
 BIOS-e820: 0000000000000000 - 000000000009ac00 (usable)
 BIOS-e820: 000000000009ac00 - 00000000000a0000 (reserved)
 BIOS-e820: 00000000000ca000 - 0000000000100000 (reserved)
 BIOS-e820: 0000000000100000 - 000000007ff70000 (usable)
 BIOS-e820: 000000007ff70000 - 000000007ff86000 (ACPI data)
 BIOS-e820: 000000007ff86000 - 0000000080000000 (ACPI NVS)
 BIOS-e820: 0000000080000000 - 00000000cfe00000 (usable)
 BIOS-e820: 00000000cfe00000 - 00000000d0000000 (reserved)
 BIOS-e820: 00000000e0000000 - 00000000f0000000 (reserved)
 BIOS-e820: 00000000fec00000 - 00000000fec10000 (reserved)
 BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
 BIOS-e820: 00000000fff80000 - 0000000100000000 (reserved)
 BIOS-e820: 0000000100000000 - 0000000830000000 (usable)

system get 32G ram installed.
max_low_pfn_mapped is 0xcfe00, and gart aperture is not mapped.

so try to use init_memory_mapping to map that area, because iommu think
that area is ram...

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/pci-gart_64.c |   11 +++++++++++
 1 file changed, 11 insertions(+)

Index: linux-2.6/arch/x86/kernel/pci-gart_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/pci-gart_64.c
+++ linux-2.6/arch/x86/kernel/pci-gart_64.c
@@ -631,6 +631,7 @@ static __init int init_k8_gatt(struct ag
 	struct pci_dev *dev;
 	void *gatt;
 	int i, error;
+	unsigned long start_pfn, end_pfn;
 
 	printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
 	aper_size = aper_base = info->aper_size = 0;
@@ -675,6 +676,16 @@ static __init int init_k8_gatt(struct ag
 
 	printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
 	       aper_base, aper_size>>10);
+
+	/* need to map that range */
+	end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
+	if (end_pfn > max_low_pfn_mapped) {
+		start_pfn = max_low_pfn_mapped;
+		max_low_pfn_mapped = init_memory_mapping(start_pfn<<PAGE_SHIFT,
+							 end_pfn<<PAGE_SHIFT);
+		if (max_pfn_mapped < max_low_pfn_mapped)
+			max_pfn_mapped = max_low_pfn_mapped;
+	}
 	return 0;
 
  nommu:

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: max_low_pfn_mapped fix #2
  2008-07-11  3:38         ` [PATCH] x86: introduce max_low_pfn_mapped for 64bit Yinghai Lu
                             ` (2 preceding siblings ...)
  2008-07-12 21:30           ` [PATCH] x86: max_low_pfn_mapped fix #1 Yinghai Lu
@ 2008-07-12 21:31           ` Yinghai Lu
  2008-07-12 21:32           ` [PATCH] x86: max_low_pfn_mapped fix #3 Yinghai Lu
  4 siblings, 0 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-12 21:31 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


tight the boundary check

also print out tseg for amd cpu.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/cpu/amd_64.c |    1 +
 arch/x86/mm/pageattr.c       |    4 ++--
 arch/x86/mm/pat.c            |    4 ++--
 arch/x86/pci/i386.c          |    4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/amd_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/amd_64.c
+++ linux-2.6/arch/x86/kernel/cpu/amd_64.c
@@ -200,6 +200,7 @@ static void __cpuinit init_amd(struct cp
 		 * benefit in doing so.
 		 */
 		if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
+		    printk(KERN_DEBUG "tseg: %010llx\n", tseg);
 		    if ((tseg>>PMD_SHIFT) <
 				(max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
 			((tseg>>PMD_SHIFT) <
Index: linux-2.6/arch/x86/mm/pageattr.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/pageattr.c
+++ linux-2.6/arch/x86/mm/pageattr.c
@@ -658,11 +658,11 @@ static int cpa_process_alias(struct cpa_
 	struct cpa_data alias_cpa;
 	int ret = 0;
 
-	if (cpa->pfn > max_pfn_mapped)
+	if (cpa->pfn >= max_pfn_mapped)
 		return 0;
 
 #ifdef CONFIG_X86_64
-	if (cpa->pfn > max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
+	if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
 		return 0;
 #endif
 	/*
Index: linux-2.6/arch/x86/mm/pat.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/pat.c
+++ linux-2.6/arch/x86/mm/pat.c
@@ -449,8 +449,8 @@ int phys_mem_access_prot_allowed(struct
 	if (retval < 0)
 		return 0;
 
-	if (((pfn <= max_low_pfn_mapped) ||
-	     (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn <= max_pfn_mapped)) &&
+	if (((pfn < max_low_pfn_mapped) ||
+	     (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
 	    ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
 		free_memtype(offset, offset + size);
 		printk(KERN_INFO
Index: linux-2.6/arch/x86/pci/i386.c
===================================================================
--- linux-2.6.orig/arch/x86/pci/i386.c
+++ linux-2.6/arch/x86/pci/i386.c
@@ -334,9 +334,9 @@ int pci_mmap_page_range(struct pci_dev *
 		flags = new_flags;
 	}
 
-	if (((vma->vm_pgoff <= max_low_pfn_mapped) ||
+	if (((vma->vm_pgoff < max_low_pfn_mapped) ||
 	     (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) &&
-	      vma->vm_pgoff <= max_pfn_mapped)) &&
+	      vma->vm_pgoff < max_pfn_mapped)) &&
 	    ioremap_change_attr((unsigned long)__va(addr), len, flags)) {
 		free_memtype(addr, addr + len);
 		return -EINVAL;

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: max_low_pfn_mapped fix #3
  2008-07-11  3:38         ` [PATCH] x86: introduce max_low_pfn_mapped for 64bit Yinghai Lu
                             ` (3 preceding siblings ...)
  2008-07-12 21:31           ` [PATCH] x86: max_low_pfn_mapped fix #2 Yinghai Lu
@ 2008-07-12 21:32           ` Yinghai Lu
  2008-07-13 21:29             ` [PATCH] x86: max_low_pfn_mapped fix #4 Yinghai Lu
                               ` (2 more replies)
  4 siblings, 3 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-12 21:32 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


try to merge the range with same page size in init_memory_mapping

so when GBpages is not there, we could do 2M pages at same time.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/mm/init_64.c |   14 ++++++++++++++
 1 file changed, 14 insertions(+)

Index: linux-2.6/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.orig/arch/x86/mm/init_64.c
+++ linux-2.6/arch/x86/mm/init_64.c
@@ -763,6 +763,20 @@ unsigned long __init_refok init_memory_m
 	end_pfn = end>>PAGE_SHIFT;
 	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
 
+	/* try to merge same page size and continuous */
+	for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
+		unsigned long old_start;
+		if (mr[i].end != mr[i+1].start ||
+		    mr[i].page_size_mask != mr[i+1].page_size_mask)
+			continue;
+		/* move it */
+		old_start = mr[i].start;
+		memmove(&mr[i], &mr[i+1],
+			 (nr_range - 1 - i) * sizeof (struct map_range));
+		mr[i].start = old_start;
+		nr_range--;
+	}
+
 	for (i = 0; i < nr_range; i++)
 		printk(KERN_DEBUG " %010lx - %010lx page %s\n",
 				mr[i].start, mr[i].end,

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-12  1:41           ` [PATCH] x86: let 32bit use apic_ops too Yinghai Lu
  2008-07-12  1:43             ` [PATCH] x86: mach_apicdef.h need to include before smp.h Yinghai Lu
@ 2008-07-13  1:08             ` Suresh Siddha
  2008-07-13  2:04               ` Yinghai Lu
  2008-07-13  1:43             ` Maciej W. Rozycki
  2008-07-14  5:19             ` [PATCH] x86: let 32bit use apic_ops too - fix Yinghai Lu
  3 siblings, 1 reply; 84+ messages in thread
From: Suresh Siddha @ 2008-07-13  1:08 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Siddha, Suresh B,
	LKML

Yinghai,

We can now cleanup ack_APIC_irq() to use the native_apic_mem_write()
for both 32bit and 64bit

And also arch/x86/kernel/ipi.c can also use native_apic_mem_write()
similar to include/asm-x86/ipi.h

Then you will have my ACK :)

thanks,
suresh

On Fri, Jul 11, 2008 at 06:41:54PM -0700, Yinghai Lu wrote:
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
> 
> ---
>  arch/x86/kernel/apic_32.c |   39 +++++++++++++++++++++++++++++++--------
>  include/asm-x86/apic.h    |   13 ++-----------
>  2 files changed, 33 insertions(+), 19 deletions(-)
> 
> Index: linux-2.6/arch/x86/kernel/apic_32.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/apic_32.c
> +++ linux-2.6/arch/x86/kernel/apic_32.c
> @@ -145,19 +145,13 @@ static int modern_apic(void)
>         return lapic_get_version() >= 0x14;
>  }
> 
> -void apic_icr_write(u32 low, u32 id)
> -{
> -       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id));
> -       apic_write_around(APIC_ICR, low);
> -}
> -
> -void apic_wait_icr_idle(void)
> +void xapic_wait_icr_idle(void)
>  {
>         while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
>                 cpu_relax();
>  }
> 
> -u32 safe_apic_wait_icr_idle(void)
> +u32 safe_xapic_wait_icr_idle(void)
>  {
>         u32 send_status;
>         int timeout;
> @@ -173,6 +167,35 @@ u32 safe_apic_wait_icr_idle(void)
>         return send_status;
>  }
> 
> +void xapic_icr_write(u32 low, u32 id)
> +{
> +       apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(id));
> +       apic_write_around(APIC_ICR, low);
> +}
> +
> +u64 xapic_icr_read(void)
> +{
> +       u32 icr1, icr2;
> +
> +       icr2 = apic_read(APIC_ICR2);
> +       icr1 = apic_read(APIC_ICR);
> +
> +       return icr1 | ((u64)icr2 << 32);
> +}
> +
> +static struct apic_ops xapic_ops = {
> +       .read = native_apic_mem_read,
> +       .write = native_apic_mem_write,
> +       .write_atomic = native_apic_mem_write_atomic,
> +       .icr_read = xapic_icr_read,
> +       .icr_write = xapic_icr_write,
> +       .wait_icr_idle = xapic_wait_icr_idle,
> +       .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
> +};
> +
> +struct apic_ops __read_mostly *apic_ops = &xapic_ops;
> +EXPORT_SYMBOL_GPL(apic_ops);
> +
>  /**
>   * enable_NMI_through_LVT0 - enable NMI through local vector table 0
>   */
> Index: linux-2.6/include/asm-x86/apic.h
> ===================================================================
> --- linux-2.6.orig/include/asm-x86/apic.h
> +++ linux-2.6/include/asm-x86/apic.h
> @@ -49,11 +49,6 @@ extern int disable_apic;
>  #ifdef CONFIG_PARAVIRT
>  #include <asm/paravirt.h>
>  #else
> -#ifndef CONFIG_X86_64
> -#define apic_write native_apic_mem_write
> -#define apic_write_atomic native_apic_mem_write_atomic
> -#define apic_read native_apic_mem_read
> -#endif
>  #define setup_boot_clock setup_boot_APIC_clock
>  #define setup_secondary_clock setup_secondary_APIC_clock
>  #endif
> @@ -95,16 +90,13 @@ static inline u32 native_apic_msr_read(u
>         return low;
>  }
> 
> -#ifdef CONFIG_X86_32
> -extern void apic_wait_icr_idle(void);
> -extern u32 safe_apic_wait_icr_idle(void);
> -extern void apic_icr_write(u32 low, u32 id);
> -#else
> +#ifndef CONFIG_X86_32
>  extern int x2apic, x2apic_preenabled;
>  extern void check_x2apic(void);
>  extern void enable_x2apic(void);
>  extern void enable_IR_x2apic(void);
>  extern void x2apic_icr_write(u32 low, u32 id);
> +#endif
> 
>  struct apic_ops {
>         u32 (*read)(u32 reg);
> @@ -125,7 +117,6 @@ extern struct apic_ops *apic_ops;
>  #define apic_icr_write (apic_ops->icr_write)
>  #define apic_wait_icr_idle (apic_ops->wait_icr_idle)
>  #define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
> -#endif
> 
>  extern int get_physical_broadcast(void);

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: make read_apic_id return final apicid
  2008-07-12  1:44               ` [PATCH] x86: make read_apic_id return final apicid Yinghai Lu
  2008-07-12  8:01                 ` [PATCH] x86: make 64bit have get_apic_id Yinghai Lu
@ 2008-07-13  1:19                 ` Suresh Siddha
  1 sibling, 0 replies; 84+ messages in thread
From: Suresh Siddha @ 2008-07-13  1:19 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Siddha, Suresh B,
	LKML

On Fri, Jul 11, 2008 at 06:44:16PM -0700, Yinghai Lu wrote:
> 
> also remove GET_APIC_ID when read_apic_id is used.
> 
> need to apply after
>         [PATCH] x86: mach_apicdef.h need to include before smp.h
> 
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-12  1:41           ` [PATCH] x86: let 32bit use apic_ops too Yinghai Lu
  2008-07-12  1:43             ` [PATCH] x86: mach_apicdef.h need to include before smp.h Yinghai Lu
  2008-07-13  1:08             ` [PATCH] x86: let 32bit use apic_ops too Suresh Siddha
@ 2008-07-13  1:43             ` Maciej W. Rozycki
  2008-07-13  1:45               ` Yinghai Lu
  2008-07-14  5:19             ` [PATCH] x86: let 32bit use apic_ops too - fix Yinghai Lu
  3 siblings, 1 reply; 84+ messages in thread
From: Maciej W. Rozycki @ 2008-07-13  1:43 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML

On Fri, 11 Jul 2008, Yinghai Lu wrote:

> -void apic_wait_icr_idle(void)
> +void xapic_wait_icr_idle(void)
>  {
>  	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
>  		cpu_relax();
>  }
>  
> -u32 safe_apic_wait_icr_idle(void)
> +u32 safe_xapic_wait_icr_idle(void)

 Etc...  Why are you changing names of these functions?  Are they meant 
not to apply to older APIC implementations anymore?

  Maciej

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13  1:43             ` Maciej W. Rozycki
@ 2008-07-13  1:45               ` Yinghai Lu
  2008-07-13  1:54                 ` Maciej W. Rozycki
  0 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-13  1:45 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML

On Sat, Jul 12, 2008 at 6:43 PM, Maciej W. Rozycki <macro@linux-mips.org> wrote:
> On Fri, 11 Jul 2008, Yinghai Lu wrote:
>
>> -void apic_wait_icr_idle(void)
>> +void xapic_wait_icr_idle(void)
>>  {
>>       while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
>>               cpu_relax();
>>  }
>>
>> -u32 safe_apic_wait_icr_idle(void)
>> +u32 safe_xapic_wait_icr_idle(void)
>
>  Etc...  Why are you changing names of these functions?  Are they meant
> not to apply to older APIC implementations anymore?

because 64bit has that name.

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13  1:45               ` Yinghai Lu
@ 2008-07-13  1:54                 ` Maciej W. Rozycki
  2008-07-13 16:43                   ` Suresh Siddha
  0 siblings, 1 reply; 84+ messages in thread
From: Maciej W. Rozycki @ 2008-07-13  1:54 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML

On Sat, 12 Jul 2008, Yinghai Lu wrote:

> >  Etc...  Why are you changing names of these functions?  Are they meant
> > not to apply to older APIC implementations anymore?
> 
> because 64bit has that name.

 Shouldn't it be the other way round then?  Our 32-bit code is the
original one and supports more APIC variations than the 64-bit one.  So at
the point of unification 64-bit xAPIC-only functions will become universal
ones making the naming counter-intuitive: apic_ -> any APIC, xapic_ ->
xAPIC only.  I think only functions specific to the xAPIC should have the
xapic_ prefix.

  Maciej

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13  1:08             ` [PATCH] x86: let 32bit use apic_ops too Suresh Siddha
@ 2008-07-13  2:04               ` Yinghai Lu
  2008-07-13 16:28                 ` Suresh Siddha
  0 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-13  2:04 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, LKML

On Sat, Jul 12, 2008 at 6:08 PM, Suresh Siddha
<suresh.b.siddha@intel.com> wrote:
> Yinghai,
>
> We can now cleanup ack_APIC_irq() to use the native_apic_mem_write()
> for both 32bit and 64bit

apic_write_around(), in case 32bit not have CONFIG_X86_GOOD_APIC?
or let 64 bit use that apic_write_around()?

>
> And also arch/x86/kernel/ipi.c can also use native_apic_mem_write()
> similar to include/asm-x86/ipi.h

should remove duplicated functions in ipi.c

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: make 64bit have get_apic_id
  2008-07-12  8:01                 ` [PATCH] x86: make 64bit have get_apic_id Yinghai Lu
@ 2008-07-13  6:28                   ` Ingo Molnar
  2008-07-13  6:59                     ` Ingo Molnar
  0 siblings, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-13  6:28 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> and read_apic_id become macro
> GET_APIC_ID(apic_read(APIC_ID))

applied to tip/x86/x2apic, thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: make 64bit have get_apic_id
  2008-07-13  6:28                   ` Ingo Molnar
@ 2008-07-13  6:59                     ` Ingo Molnar
  2008-07-13  7:05                       ` Yinghai Lu
  0 siblings, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-13  6:59 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML


> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> 
> > and read_apic_id become macro
> > GET_APIC_ID(apic_read(APIC_ID))

Yinghai, you had that x86/x2apic crash - do you think we can now try it 
again in tip/master?

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: make 64bit have get_apic_id
  2008-07-13  6:59                     ` Ingo Molnar
@ 2008-07-13  7:05                       ` Yinghai Lu
  2008-07-13  9:23                         ` Ingo Molnar
  0 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-13  7:05 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML

On Sat, Jul 12, 2008 at 11:59 PM, Ingo Molnar <mingo@elte.hu> wrote:
>
>> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>>
>> > and read_apic_id become macro
>> > GET_APIC_ID(apic_read(APIC_ID))
>
> Yinghai, you had that x86/x2apic crash - do you think we can now try it
> again in tip/master?

Yes. it looks good now. I test it for two days already. no problem found.

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: make 64bit have get_apic_id
  2008-07-13  7:05                       ` Yinghai Lu
@ 2008-07-13  9:23                         ` Ingo Molnar
  2008-07-13  9:28                           ` Ingo Molnar
  0 siblings, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-13  9:23 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> On Sat, Jul 12, 2008 at 11:59 PM, Ingo Molnar <mingo@elte.hu> wrote:
> >
> >> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> >>
> >> > and read_apic_id become macro
> >> > GET_APIC_ID(apic_read(APIC_ID))
> >
> > Yinghai, you had that x86/x2apic crash - do you think we can now try it
> > again in tip/master?
> 
> Yes. it looks good now. I test it for two days already. no problem 
> found.

ok, thanks - i've merged it into tip/master and will do some more 
testing myself as well.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: make 64bit have get_apic_id
  2008-07-13  9:23                         ` Ingo Molnar
@ 2008-07-13  9:28                           ` Ingo Molnar
  2008-07-13 16:15                             ` Suresh Siddha
  0 siblings, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-13  9:28 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML


* Ingo Molnar <mingo@elte.hu> wrote:

> > > Yinghai, you had that x86/x2apic crash - do you think we can now 
> > > try it again in tip/master?
> > 
> > Yes. it looks good now. I test it for two days already. no problem 
> > found.
> 
> ok, thanks - i've merged it into tip/master and will do some more 
> testing myself as well.

the paravirt build is still broken:

 arch/x86/kernel/paravirt.c:377: error: expected identifier before ‘(' token
 arch/x86/kernel/paravirt.c:378: error: expected ‘}' before ‘.' token

with:

  http://redhat.com/~mingo/misc/config-Sun_Jul_13_11_25_20_CEST_2008.bad

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: max_low_pfn_mapped fix #1
  2008-07-12 21:30           ` [PATCH] x86: max_low_pfn_mapped fix #1 Yinghai Lu
@ 2008-07-13  9:45             ` Ingo Molnar
  0 siblings, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-13  9:45 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> fix crash on Ingo's big box:
> calling  pci_iommu_init+0x0/0x17
> PCI-DMA: Disabling AGP.
> PCI-DMA: aperture base @ d0000000 size 65536 KB
> PCI-DMA: using GART IOMMU.
> PCI-DMA: Reserving 64MB of IOMMU area in the AGP aperture
> BUG: unable to handle kernel paging request at ffff88000003be88
> IP: [<ffffffff8026d377>] __alloc_pages_internal+0xc3/0x3f2

that indeed solved the crash - thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-10 14:16     ` Arjan van de Ven
@ 2008-07-13 14:57       ` Andi Kleen
  2008-07-13 15:33         ` Arjan van de Ven
  2008-07-13 18:17         ` Yinghai Lu
  0 siblings, 2 replies; 84+ messages in thread
From: Andi Kleen @ 2008-07-13 14:57 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Suresh Siddha, LKML

Arjan van de Ven <arjan@infradead.org> writes:

> On Wed, 9 Jul 2008 20:15:02 -0700
> Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>
>> 
>> 
>> fix phys_pmd_init to make sure not to return big value than end.
>> 
>> also print out range split:1G/2M/4K
>> 
>> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
>
>
> Making an accurate mapping solves a lot of potentially nasty/tricky
> corner cases, so I like the approach

It also means that e.g. on a 1GB system the direct mapping will 
never use 1GB pages.

And the CPU has to handle this anyways because all the old 
kernels overmap and no x86 CPU can drop support for all old
kernels.

In the end it means only large systems will benefit from 1GB
pages, which seems wrong to me.

-Andi

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-13 14:57       ` Andi Kleen
@ 2008-07-13 15:33         ` Arjan van de Ven
  2008-07-13 18:25           ` Andi Kleen
  2008-07-13 18:17         ` Yinghai Lu
  1 sibling, 1 reply; 84+ messages in thread
From: Arjan van de Ven @ 2008-07-13 15:33 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Suresh Siddha, LKML

On Sun, 13 Jul 2008 16:57:50 +0200
Andi Kleen <andi@firstfloor.org> wrote:

> Arjan van de Ven <arjan@infradead.org> writes:
> 
> > On Wed, 9 Jul 2008 20:15:02 -0700
> > Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> >
> >> 
> >> 
> >> fix phys_pmd_init to make sure not to return big value than end.
> >> 
> >> also print out range split:1G/2M/4K
> >> 
> >> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
> >
> >
> > Making an accurate mapping solves a lot of potentially nasty/tricky
> > corner cases, so I like the approach
> 
> It also means that e.g. on a 1GB system the direct mapping will 
> never use 1GB pages.
> 
> And the CPU has to handle this anyways because all the old 
> kernels overmap and no x86 CPU can drop support for all old
> kernels.

for the 2mb case.. please find me a dimm which isn't a multiple of
2Mb ;-)


^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: make 64bit have get_apic_id
  2008-07-13  9:28                           ` Ingo Molnar
@ 2008-07-13 16:15                             ` Suresh Siddha
  0 siblings, 0 replies; 84+ messages in thread
From: Suresh Siddha @ 2008-07-13 16:15 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Yinghai Lu, Thomas Gleixner, H. Peter Anvin, Siddha, Suresh B,
	LKML

On Sun, Jul 13, 2008 at 02:28:23AM -0700, Ingo Molnar wrote:
> 
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > > > Yinghai, you had that x86/x2apic crash - do you think we can now
> > > > try it again in tip/master?
> > >
> > > Yes. it looks good now. I test it for two days already. no problem
> > > found.
> >
> > ok, thanks - i've merged it into tip/master and will do some more
> > testing myself as well.
> 
> the paravirt build is still broken:
> 
>  arch/x86/kernel/paravirt.c:377: error: expected identifier before ‘(' token
>  arch/x86/kernel/paravirt.c:378: error: expected ‘}' before ‘.' token
> 
> with:
> 
>   http://redhat.com/~mingo/misc/config-Sun_Jul_13_11_25_20_CEST_2008.bad

That's because of the Yinghai's, apic_ops patch for 32bit aswell.

Yinghai, we need to fix CONFIG_PARAVIRT aswell.

thanks,
suresh

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13  2:04               ` Yinghai Lu
@ 2008-07-13 16:28                 ` Suresh Siddha
  2008-07-13 16:51                   ` Maciej W. Rozycki
  0 siblings, 1 reply; 84+ messages in thread
From: Suresh Siddha @ 2008-07-13 16:28 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Siddha, Suresh B, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	LKML

On Sat, Jul 12, 2008 at 07:04:44PM -0700, Yinghai Lu wrote:
> On Sat, Jul 12, 2008 at 6:08 PM, Suresh Siddha
> <suresh.b.siddha@intel.com> wrote:
> > Yinghai,
> >
> > We can now cleanup ack_APIC_irq() to use the native_apic_mem_write()
> > for both 32bit and 64bit
> 
> apic_write_around(), in case 32bit not have CONFIG_X86_GOOD_APIC?
> or let 64 bit use that apic_write_around()?

hmm, yes this gets complicated.

Reason why I used native_apic_mem_write() in 64bit is, ack_APIC_irq()
is in the fast path (intr) and we are already sure (through irq_chip's) that
this ack is indeed for xapic. So instead of using function pointer, I directly
used the native access.

As we are now implementing apic ops for 32bit aswell, I though we should
also use the native access for perf reason. 

So we should probably have native_apic_mem_write_around() and use that
in ack_APIC_irq() and for good apic it is native_apic_mem_write()
and for !CONFIG_X86_GOOD_APIC, it is native_apic_mem_write_atomic()

> 
> >
> > And also arch/x86/kernel/ipi.c can also use native_apic_mem_write()
> > similar to include/asm-x86/ipi.h
> 
> should remove duplicated functions in ipi.c

yes, if there are no differences.

BTW, as I mentioned in another thread, we need to take care of the paravirt
(lguest, vmi, xen etc) cases aswell.

thanks,
suresh

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13  1:54                 ` Maciej W. Rozycki
@ 2008-07-13 16:43                   ` Suresh Siddha
  2008-07-13 17:05                     ` Maciej W. Rozycki
  0 siblings, 1 reply; 84+ messages in thread
From: Suresh Siddha @ 2008-07-13 16:43 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Siddha, Suresh B, LKML

On Sat, Jul 12, 2008 at 06:54:53PM -0700, Maciej W. Rozycki wrote:
> On Sat, 12 Jul 2008, Yinghai Lu wrote:
> 
> > >  Etc...  Why are you changing names of these functions?  Are they meant
> > > not to apply to older APIC implementations anymore?
> >
> > because 64bit has that name.
> 
>  Shouldn't it be the other way round then?  Our 32-bit code is the
> original one and supports more APIC variations than the 64-bit one.  So at
> the point of unification 64-bit xAPIC-only functions will become universal
> ones making the naming counter-intuitive: apic_ -> any APIC, xapic_ ->
> xAPIC only.  I think only functions specific to the xAPIC should have the
> xapic_ prefix.

Maciej, Yinghai's current 32bit xapic_* supports both 4-bit apic and xapic.

Yinghai, may be we should name them as apic_mem_* instead of xapic_*
or any better suggestions?

thanks,
suresh

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13 16:28                 ` Suresh Siddha
@ 2008-07-13 16:51                   ` Maciej W. Rozycki
  2008-07-13 17:16                     ` Cyrill Gorcunov
  0 siblings, 1 reply; 84+ messages in thread
From: Maciej W. Rozycki @ 2008-07-13 16:51 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin, LKML

On Sun, 13 Jul 2008, Suresh Siddha wrote:

> So we should probably have native_apic_mem_write_around() and use that
> in ack_APIC_irq() and for good apic it is native_apic_mem_write()
> and for !CONFIG_X86_GOOD_APIC, it is native_apic_mem_write_atomic()

 Why don't you simply define apic_write_around() correctly for your
platform? -- for 64-bit it is always the same as apic_write() as
X86_GOOD_APIC may only be cleared for the original Pentium processor, so
any unnecessary code for the opposite case will be optimised away at the
build time, whether you use a macro or an inline function.  Why do you
think you need to create more variations of this contraption which is a
workaround for a corner-case hw erratum anyway?

  Maciej

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13 16:43                   ` Suresh Siddha
@ 2008-07-13 17:05                     ` Maciej W. Rozycki
  0 siblings, 0 replies; 84+ messages in thread
From: Maciej W. Rozycki @ 2008-07-13 17:05 UTC (permalink / raw)
  To: Suresh Siddha
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin, LKML

On Sun, 13 Jul 2008, Suresh Siddha wrote:

> Maciej, Yinghai's current 32bit xapic_* supports both 4-bit apic and xapic.

 There is also the 82489DX, which our 32-bit code is meant to support, and
which is 8-bit physical, 32-bit logical, and which differs from the other
APICs slightly here and there -- please be careful about that.

> Yinghai, may be we should name them as apic_mem_* instead of xapic_*
> or any better suggestions?

 If you propose to rename the functions from apic_* to apic_mem_* to
signify memory-mapped rather then MSR access to the device (how
interestingly history repeats itself, with a design dated back to the VAX
now reappearing in the x86), then it sounds reasonable to me and you
surely have my support.

  Maciej

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13 16:51                   ` Maciej W. Rozycki
@ 2008-07-13 17:16                     ` Cyrill Gorcunov
  2008-07-13 23:46                       ` Maciej W. Rozycki
  0 siblings, 1 reply; 84+ messages in thread
From: Cyrill Gorcunov @ 2008-07-13 17:16 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Suresh Siddha, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	H. Peter Anvin, LKML

[Maciej W. Rozycki - Sun, Jul 13, 2008 at 05:51:30PM +0100]
| On Sun, 13 Jul 2008, Suresh Siddha wrote:
| 
| > So we should probably have native_apic_mem_write_around() and use that
| > in ack_APIC_irq() and for good apic it is native_apic_mem_write()
| > and for !CONFIG_X86_GOOD_APIC, it is native_apic_mem_write_atomic()
| 
|  Why don't you simply define apic_write_around() correctly for your
| platform? -- for 64-bit it is always the same as apic_write() as
| X86_GOOD_APIC may only be cleared for the original Pentium processor, so
| any unnecessary code for the opposite case will be optimised away at the
| build time, whether you use a macro or an inline function.  Why do you
| think you need to create more variations of this contraption which is a
| workaround for a corner-case hw erratum anyway?
| 
|   Maciej

Guys, when I was in attempt to unify apic code first thing was -
renaming apic_write. Here is a patch for this - only ESR and K8
registers are untouched - may be usefull to apply (actually not
sure if it will apply without fuzz now). Wonder if this help :)
If this attempt just a crap - ignore it please and don't swearing
me :-)

		- Cyrill -

---

x86: apic - unify apic writes to write_around form

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---

Index: linux-2.6.git/arch/x86/kernel/apic_32.c
===================================================================
--- linux-2.6.git.orig/arch/x86/kernel/apic_32.c	2008-07-10 20:06:16.000000000 +0400
+++ linux-2.6.git/arch/x86/kernel/apic_32.c	2008-07-10 20:06:21.000000000 +0400
@@ -807,7 +807,7 @@ int __init verify_local_APIC(void)
 	 */
 	reg0 = apic_read(APIC_LVR);
 	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+	apic_write_around(APIC_LVR, reg0 ^ APIC_LVR_MASK);
 	reg1 = apic_read(APIC_LVR);
 	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
 
@@ -1619,26 +1619,26 @@ static int lapic_resume(struct sys_devic
 	l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
 	wrmsr(MSR_IA32_APICBASE, l, h);
 
-	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-	apic_write(APIC_ID, apic_pm_state.apic_id);
-	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+	apic_write_around(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+	apic_write_around(APIC_ID, apic_pm_state.apic_id);
+	apic_write_around(APIC_DFR, apic_pm_state.apic_dfr);
+	apic_write_around(APIC_LDR, apic_pm_state.apic_ldr);
+	apic_write_around(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+	apic_write_around(APIC_SPIV, apic_pm_state.apic_spiv);
+	apic_write_around(APIC_LVT0, apic_pm_state.apic_lvt0);
+	apic_write_around(APIC_LVT1, apic_pm_state.apic_lvt1);
 #ifdef CONFIG_X86_MCE_P4THERMAL
 	if (maxlvt >= 5)
-		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+		apic_write_around(APIC_LVTTHMR, apic_pm_state.apic_thmr);
 #endif
 	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+		apic_write_around(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+	apic_write_around(APIC_LVTT, apic_pm_state.apic_lvtt);
+	apic_write_around(APIC_TDCR, apic_pm_state.apic_tdcr);
+	apic_write_around(APIC_TMICT, apic_pm_state.apic_tmict);
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
-	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+	apic_write_around(APIC_LVTERR, apic_pm_state.apic_lvterr);
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 	local_irq_restore(flags);
Index: linux-2.6.git/arch/x86/kernel/apic_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/kernel/apic_64.c	2008-07-10 20:06:21.000000000 +0400
+++ linux-2.6.git/arch/x86/kernel/apic_64.c	2008-07-10 20:06:21.000000000 +0400
@@ -155,7 +155,7 @@ void __cpuinit enable_NMI_through_LVT0(v
 
 	/* unmask and set to NMI */
 	v = APIC_DM_NMI;
-	apic_write(APIC_LVT0, v);
+	apic_write_around(APIC_LVT0, v);
 }
 
 /**
@@ -191,18 +191,18 @@ static void __setup_APIC_LVTT(unsigned i
 	if (!irqen)
 		lvtt_value |= APIC_LVT_MASKED;
 
-	apic_write(APIC_LVTT, lvtt_value);
+	apic_write_around(APIC_LVTT, lvtt_value);
 
 	/*
 	 * Divide PICLK by 16
 	 */
 	tmp_value = apic_read(APIC_TDCR);
-	apic_write(APIC_TDCR, (tmp_value
+	apic_write_around(APIC_TDCR, (tmp_value
 				& ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE))
 				| APIC_TDR_DIV_16);
 
 	if (!oneshot)
-		apic_write(APIC_TMICT, clocks);
+		apic_write_around(APIC_TMICT, clocks);
 }
 
 /*
@@ -241,7 +241,7 @@ u8 setup_APIC_eilvt_ibs(u8 vector, u8 ms
 static int lapic_next_event(unsigned long delta,
 			    struct clock_event_device *evt)
 {
-	apic_write(APIC_TMICT, delta);
+	apic_write_around(APIC_TMICT, delta);
 	return 0;
 }
 
@@ -270,7 +270,7 @@ static void lapic_timer_setup(enum clock
 	case CLOCK_EVT_MODE_SHUTDOWN:
 		v = apic_read(APIC_LVTT);
 		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
-		apic_write(APIC_LVTT, v);
+		apic_write_around(APIC_LVTT, v);
 		break;
 	case CLOCK_EVT_MODE_RESUME:
 		/* Nothing to do here */
@@ -529,33 +529,33 @@ void clear_local_APIC(void)
 	 */
 	if (maxlvt >= 3) {
 		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
-		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+		apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED);
 	}
 	/*
 	 * Careful: we have to set masks only first to deassert
 	 * any level-triggered sources.
 	 */
 	v = apic_read(APIC_LVTT);
-	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+	apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED);
 	v = apic_read(APIC_LVT0);
-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+	apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
 	v = apic_read(APIC_LVT1);
-	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
+	apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED);
 	if (maxlvt >= 4) {
 		v = apic_read(APIC_LVTPC);
-		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
+		apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED);
 	}
 
 	/*
 	 * Clean APIC state for other OSs:
 	 */
-	apic_write(APIC_LVTT, APIC_LVT_MASKED);
-	apic_write(APIC_LVT0, APIC_LVT_MASKED);
-	apic_write(APIC_LVT1, APIC_LVT_MASKED);
+	apic_write_around(APIC_LVTT, APIC_LVT_MASKED);
+	apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
+	apic_write_around(APIC_LVT1, APIC_LVT_MASKED);
 	if (maxlvt >= 3)
-		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
+		apic_write_around(APIC_LVTERR, APIC_LVT_MASKED);
 	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
+		apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 }
@@ -575,7 +575,7 @@ void disable_local_APIC(void)
 	 */
 	value = apic_read(APIC_SPIV);
 	value &= ~APIC_SPIV_APIC_ENABLED;
-	apic_write(APIC_SPIV, value);
+	apic_write_around(APIC_SPIV, value);
 }
 
 void lapic_shutdown(void)
@@ -606,7 +606,7 @@ int __init verify_local_APIC(void)
 	 */
 	reg0 = apic_read(APIC_LVR);
 	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+	apic_write_around(APIC_LVR, reg0 ^ APIC_LVR_MASK);
 	reg1 = apic_read(APIC_LVR);
 	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
 
@@ -633,10 +633,10 @@ int __init verify_local_APIC(void)
 	 */
 	reg0 = read_apic_id();
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
-	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+	apic_write_around(APIC_ID, reg0 ^ APIC_ID_MASK);
 	reg1 = read_apic_id();
 	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
-	apic_write(APIC_ID, reg0);
+	apic_write_around(APIC_ID, reg0);
 	if (reg1 != (reg0 ^ APIC_ID_MASK))
 		return 0;
 
@@ -668,7 +668,7 @@ void __init sync_Arb_IDs(void)
 	apic_wait_icr_idle();
 
 	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
-	apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
+	apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG
 				| APIC_DM_INIT);
 }
 
@@ -701,14 +701,14 @@ void __init init_bsp_APIC(void)
 	value |= APIC_SPIV_APIC_ENABLED;
 	value |= APIC_SPIV_FOCUS_DISABLED;
 	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
+	apic_write_around(APIC_SPIV, value);
 
 	/*
 	 * Set up the virtual wire mode.
 	 */
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+	apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
 	value = APIC_DM_NMI;
-	apic_write(APIC_LVT1, value);
+	apic_write_around(APIC_LVT1, value);
 }
 
 /**
@@ -744,7 +744,7 @@ void __cpuinit setup_local_APIC(void)
 	 */
 	value = apic_read(APIC_TASKPRI);
 	value &= ~APIC_TPRI_MASK;
-	apic_write(APIC_TASKPRI, value);
+	apic_write_around(APIC_TASKPRI, value);
 
 	/*
 	 * After a crash, we no longer service the interrupts and a pending
@@ -781,7 +781,7 @@ void __cpuinit setup_local_APIC(void)
 	 * Set spurious IRQ vector
 	 */
 	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
+	apic_write_around(APIC_SPIV, value);
 
 	/*
 	 * Set up LVT0, LVT1:
@@ -803,7 +803,7 @@ void __cpuinit setup_local_APIC(void)
 		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
 			    smp_processor_id());
 	}
-	apic_write(APIC_LVT0, value);
+	apic_write_around(APIC_LVT0, value);
 
 	/*
 	 * only the BP should see the LINT1 NMI signal, obviously.
@@ -812,7 +812,7 @@ void __cpuinit setup_local_APIC(void)
 		value = APIC_DM_NMI;
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	apic_write(APIC_LVT1, value);
+	apic_write_around(APIC_LVT1, value);
 	preempt_enable();
 }
 
@@ -820,7 +820,7 @@ static void __cpuinit lapic_setup_esr(vo
 {
 	unsigned maxlvt = lapic_get_maxlvt();
 
-	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
+	apic_write_around(APIC_LVTERR, ERROR_APIC_VECTOR);
 	/*
 	 * spec says clear errors after enabling vector.
 	 */
@@ -925,7 +925,7 @@ int __init APIC_init_uniprocessor(void)
 	connect_bsp_APIC();
 
 	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+	apic_write_around(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
 
 	setup_local_APIC();
 
@@ -1023,7 +1023,7 @@ void disconnect_bsp_APIC(int virt_wire_s
 	value &= ~APIC_VECTOR_MASK;
 	value |= APIC_SPIV_APIC_ENABLED;
 	value |= 0xf;
-	apic_write(APIC_SPIV, value);
+	apic_write_around(APIC_SPIV, value);
 
 	if (!virt_wire_setup) {
 		/*
@@ -1036,10 +1036,10 @@ void disconnect_bsp_APIC(int virt_wire_s
 			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
 		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
 		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
-		apic_write(APIC_LVT0, value);
+		apic_write_around(APIC_LVT0, value);
 	} else {
 		/* Disable LVT0 */
-		apic_write(APIC_LVT0, APIC_LVT_MASKED);
+		apic_write_around(APIC_LVT0, APIC_LVT_MASKED);
 	}
 
 	/* For LVT1 make it edge triggered, active high, nmi and enabled */
@@ -1049,7 +1049,7 @@ void disconnect_bsp_APIC(int virt_wire_s
 			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
 	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
 	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
-	apic_write(APIC_LVT1, value);
+	apic_write_around(APIC_LVT1, value);
 }
 
 void __cpuinit generic_processor_info(int apicid, int version)
@@ -1176,26 +1176,26 @@ static int lapic_resume(struct sys_devic
 	l &= ~MSR_IA32_APICBASE_BASE;
 	l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
 	wrmsr(MSR_IA32_APICBASE, l, h);
-	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
-	apic_write(APIC_ID, apic_pm_state.apic_id);
-	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
-	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
-	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
-	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
-	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
-	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+	apic_write_around(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+	apic_write_around(APIC_ID, apic_pm_state.apic_id);
+	apic_write_around(APIC_DFR, apic_pm_state.apic_dfr);
+	apic_write_around(APIC_LDR, apic_pm_state.apic_ldr);
+	apic_write_around(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+	apic_write_around(APIC_SPIV, apic_pm_state.apic_spiv);
+	apic_write_around(APIC_LVT0, apic_pm_state.apic_lvt0);
+	apic_write_around(APIC_LVT1, apic_pm_state.apic_lvt1);
 #ifdef CONFIG_X86_MCE_INTEL
 	if (maxlvt >= 5)
-		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+		apic_write_around(APIC_LVTTHMR, apic_pm_state.apic_thmr);
 #endif
 	if (maxlvt >= 4)
-		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
-	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
-	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
-	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+		apic_write_around(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+	apic_write_around(APIC_LVTT, apic_pm_state.apic_lvtt);
+	apic_write_around(APIC_TDCR, apic_pm_state.apic_tdcr);
+	apic_write_around(APIC_TMICT, apic_pm_state.apic_tmict);
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
-	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+	apic_write_around(APIC_LVTERR, apic_pm_state.apic_lvterr);
 	apic_write(APIC_ESR, 0);
 	apic_read(APIC_ESR);
 	local_irq_restore(flags);

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-13 14:57       ` Andi Kleen
  2008-07-13 15:33         ` Arjan van de Ven
@ 2008-07-13 18:17         ` Yinghai Lu
  2008-07-13 18:48           ` Andi Kleen
  1 sibling, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-13 18:17 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Arjan van de Ven, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Suresh Siddha, LKML

On Sun, Jul 13, 2008 at 7:57 AM, Andi Kleen <andi@firstfloor.org> wrote:
> Arjan van de Ven <arjan@infradead.org> writes:
>
>> On Wed, 9 Jul 2008 20:15:02 -0700
>> Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>>
>>>
>>>
>>> fix phys_pmd_init to make sure not to return big value than end.
>>>
>>> also print out range split:1G/2M/4K
>>>
>>> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
>>
>>
>> Making an accurate mapping solves a lot of potentially nasty/tricky
>> corner cases, so I like the approach
>
> It also means that e.g. on a 1GB system the direct mapping will
> never use 1GB pages.
>
> And the CPU has to handle this anyways because all the old
> kernels overmap and no x86 CPU can drop support for all old
> kernels.
>
> In the end it means only large systems will benefit from 1GB
> pages, which seems wrong to me.

with this patch, my 256g system still use gbpages for 1g-3g, 4g-256g

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-13 15:33         ` Arjan van de Ven
@ 2008-07-13 18:25           ` Andi Kleen
  0 siblings, 0 replies; 84+ messages in thread
From: Andi Kleen @ 2008-07-13 18:25 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Yinghai Lu, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Suresh Siddha, LKML

Arjan van de Ven <arjan@infradead.org> writes:
>
> for the 2mb case.. please find me a dimm which isn't a multiple of
> 2Mb ;-)

DIMMs are not necessarily mapped continuously. Often they are mapped
"around" holes. Or some parts of the DIMM disappear magically for 
some hidden purpose like SMM. The actual DIMM sizes don't matter, but what
matters is what memory map the kernel sees after all translation
done in other layers.  And that end result is not necessarily
aligned to 2MB.
 
In that case pretty much all 64bit kernels will overmap slightly,
32bit kernels sometimes (e.g. under the right PAGE_OFFSET split
or when the <4GB hole is very large)

-Andi

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-13 18:17         ` Yinghai Lu
@ 2008-07-13 18:48           ` Andi Kleen
  2008-07-13 19:00             ` Yinghai Lu
  2008-07-13 20:32             ` Ingo Molnar
  0 siblings, 2 replies; 84+ messages in thread
From: Andi Kleen @ 2008-07-13 18:48 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Arjan van de Ven, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Suresh Siddha, LKML


> with this patch, my 256g system still use gbpages for 1g-3g, 4g-256g

256GB certainly qualifies as "large system". But as Linus always says:
Linux is not for servers only. Ignoring the small systems makes you
look bad.

-Andi


^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-13 18:48           ` Andi Kleen
@ 2008-07-13 19:00             ` Yinghai Lu
  2008-07-13 20:32             ` Ingo Molnar
  1 sibling, 0 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-13 19:00 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Arjan van de Ven, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	Suresh Siddha, LKML

On Sun, Jul 13, 2008 at 11:48 AM, Andi Kleen <andi@firstfloor.org> wrote:
>
>> with this patch, my 256g system still use gbpages for 1g-3g, 4g-256g
>
> 256GB certainly qualifies as "large system". But as Linus always says:
> Linux is not for servers only. Ignoring the small systems makes you
> look bad.

You want system with 2g use gbpage too?
that is because of acpi nvs or smm code sit near the RAM.... why we
need to direct map these area?

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-13 18:48           ` Andi Kleen
  2008-07-13 19:00             ` Yinghai Lu
@ 2008-07-13 20:32             ` Ingo Molnar
  2008-07-13 20:51               ` Andi Kleen
  1 sibling, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-13 20:32 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Yinghai Lu, Arjan van de Ven, Thomas Gleixner, H. Peter Anvin,
	Suresh Siddha, LKML


* Andi Kleen <andi@firstfloor.org> wrote:

> > with this patch, my 256g system still use gbpages for 1g-3g, 4g-256g
> 
> 256GB certainly qualifies as "large system". But as Linus always says: 
> Linux is not for servers only. Ignoring the small systems makes you 
> look bad.

Yinghai is hard at work fixing long-time crappiness of the x86 memory 
setup code on 32-bit and 64-bit x86 alike. Contrary to your suggestion 
he has not been "ignoring small systems" in any way - he has done the 
exact opposite: Yinghai has fixed a ton of small-system bugs and 
usability annoyances along the way.

Your attempt trying to cast this much-needed cleanup, fixing and 
robustization effort into a negative light is as pityful as it is wrong.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-13 20:32             ` Ingo Molnar
@ 2008-07-13 20:51               ` Andi Kleen
  2008-07-14  0:04                 ` H. Peter Anvin
  0 siblings, 1 reply; 84+ messages in thread
From: Andi Kleen @ 2008-07-13 20:51 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Yinghai Lu, Arjan van de Ven, Thomas Gleixner, H. Peter Anvin,
	Suresh Siddha, LKML

Ingo Molnar wrote:
> * Andi Kleen <andi@firstfloor.org> wrote:
> 
>>> with this patch, my 256g system still use gbpages for 1g-3g, 4g-256g
>> 256GB certainly qualifies as "large system". But as Linus always says: 
>> Linux is not for servers only. Ignoring the small systems makes you 
>> look bad.
> 
> Yinghai is hard at work fixing long-time crappiness of the x86 memory 
> setup code on 32-bit and 64-bit x86 alike. Contrary to your suggestion 
> he has not been "ignoring small systems" in any way - he has done the 
> exact opposite: Yinghai has fixed a ton of small-system bugs and 
> usability annoyances along the way.

I haven't read them all so I have no real opinion on those.

As a general comment I must admit I am a little uneasy with extensive
changes in early boot up because this code is hard to test completely
and fragile (that is why I was always conservative in this area).

But we'll see how it fares and it's your decision anyways
(but of course people will also blame you, not me, if it goes wrong ;-)

> Your attempt trying to cast this much-needed cleanup, fixing and 
> robustization effort into a negative light is as pityful as it is wrong.

First I was only commenting on one specific patch, nothing more.

My point is full rounding to 4K on all corners is wasteful because the
CPUs have to handle that case anyways and every split costs precious
TLB entries in direct mapping accesses.

And on small systems this hurts more because a much larger fraction
of their mapped memory will be affected by this. Yes on a large system like his
256GB box it's more a rounding error (although one that can also cause
weird performance hickups there when suddenly some kernel internal
operation that happens to hit the wrong memory takes much longer).

And I might be old fashioned, but I still think minimizing TLB misses
in the kernel is still quite important since the TLBs of modern x86
CPUs are still comparatively small.

btw that is why I was  also quite disappointed that the new cpa eliminated
reassembly. It means that on a long uptime system even with moderate
traffic of CPA page allocation/free eventually the completely direct mapping
will be all 4K. And there will be TLB miss galore on each system call
when user space is TLB intensive.

Ok in that light Yinghai's patch is perhaps not so bad after longer
uptime in that scenario. Still performance directly after boot up is
also something that shouldn't be ignored and I'm still hopefully that
reassembly will be readded at some point anyways.

-Andi


^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: max_low_pfn_mapped fix #4
  2008-07-12 21:32           ` [PATCH] x86: max_low_pfn_mapped fix #3 Yinghai Lu
@ 2008-07-13 21:29             ` Yinghai Lu
  2008-07-13 21:30             ` [PATCH] x86: get x86_phys_bits early Yinghai Lu
  2008-07-13 21:32             ` [PATCH] x86: make 64bit hpet_set_mapping to use ioremap too Yinghai Lu
  2 siblings, 0 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-13 21:29 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


	only add direct mapping for aperture

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/pci-gart_64.c |    7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

Index: linux-2.6/arch/x86/kernel/pci-gart_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/pci-gart_64.c
+++ linux-2.6/arch/x86/kernel/pci-gart_64.c
@@ -680,11 +680,8 @@ static __init int init_k8_gatt(struct ag
 	/* need to map that range */
 	end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
 	if (end_pfn > max_low_pfn_mapped) {
-		start_pfn = max_low_pfn_mapped;
-		max_low_pfn_mapped = init_memory_mapping(start_pfn<<PAGE_SHIFT,
-							 end_pfn<<PAGE_SHIFT);
-		if (max_pfn_mapped < max_low_pfn_mapped)
-			max_pfn_mapped = max_low_pfn_mapped;
+		start_pfn = (aper_base>>PAGE_SHIFT);
+		init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
 	}
 	return 0;
 

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: get x86_phys_bits early
  2008-07-12 21:32           ` [PATCH] x86: max_low_pfn_mapped fix #3 Yinghai Lu
  2008-07-13 21:29             ` [PATCH] x86: max_low_pfn_mapped fix #4 Yinghai Lu
@ 2008-07-13 21:30             ` Yinghai Lu
  2008-07-13 21:32             ` [PATCH] x86: make 64bit hpet_set_mapping to use ioremap too Yinghai Lu
  2 siblings, 0 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-13 21:30 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: LKML



when try to make hpet_enable use io_remap instead fixmap got

ioremap: invalid physical address fed00000
------------[ cut here ]------------
WARNING: at arch/x86/mm/ioremap.c:161 __ioremap_caller+0x8c/0x2f3()
Modules linked in:
Pid: 0, comm: swapper Not tainted 2.6.26-rc9-tip-01873-ga9827e7-dirty #358

Call Trace:
 [<ffffffff8026615e>] warn_on_slowpath+0x6c/0xa7
 [<ffffffff802e2313>] ? __slab_alloc+0x20a/0x3fb
 [<ffffffff802d85c5>] ? mpol_new+0x88/0x17d
 [<ffffffff8022a4f4>] ? mcount_call+0x5/0x31
 [<ffffffff8022a4f4>] ? mcount_call+0x5/0x31
 [<ffffffff8024b0d2>] __ioremap_caller+0x8c/0x2f3
 [<ffffffff80e86dbd>] ? hpet_enable+0x39/0x241
 [<ffffffff8022a4f4>] ? mcount_call+0x5/0x31
 [<ffffffff8024b466>] ioremap_nocache+0x2a/0x40
 [<ffffffff80e86dbd>] hpet_enable+0x39/0x241
 [<ffffffff80e7a1f6>] hpet_time_init+0x21/0x4e
 [<ffffffff80e730e9>] start_kernel+0x302/0x395
 [<ffffffff80e722aa>] x86_64_start_reservations+0xb9/0xd4
 [<ffffffff80e722fe>] ? x86_64_init_pda+0x39/0x4f
 [<ffffffff80e72400>] x86_64_start_kernel+0xec/0x107

---[ end trace a7919e7f17c0a725 ]---


it seems for amd system that is set later...
try to move setting early in early_identify_cpu.
and remove same code for intel and centaur.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>


---
 arch/x86/kernel/cpu/centaur_64.c |   10 ----------
 arch/x86/kernel/cpu/common_64.c  |   12 +++++++-----
 arch/x86/kernel/cpu/intel_64.c   |   10 ----------
 3 files changed, 7 insertions(+), 25 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/centaur_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/centaur_64.c
+++ linux-2.6/arch/x86/kernel/cpu/centaur_64.c
@@ -16,16 +16,6 @@ static void __cpuinit early_init_centaur
 
 static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
 {
-	/* Cache sizes */
-	unsigned n;
-
-	n = c->extended_cpuid_level;
-	if (n >= 0x80000008) {
-		unsigned eax = cpuid_eax(0x80000008);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
-
 	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
Index: linux-2.6/arch/x86/kernel/cpu/common_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/common_64.c
+++ linux-2.6/arch/x86/kernel/cpu/common_64.c
@@ -99,7 +99,7 @@ int __cpuinit get_model_name(struct cpui
 
 void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
 {
-	unsigned int n, dummy, eax, ebx, ecx, edx;
+	unsigned int n, dummy, ebx, ecx, edx;
 
 	n = c->extended_cpuid_level;
 
@@ -122,11 +122,6 @@ void __cpuinit display_cacheinfo(struct
 		printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
 		c->x86_cache_size, ecx & 0xFF);
 	}
-	if (n >= 0x80000008) {
-		cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
 }
 
 void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -315,6 +310,13 @@ static void __cpuinit early_identify_cpu
 	if (c->extended_cpuid_level >= 0x80000007)
 		c->x86_power = cpuid_edx(0x80000007);
 
+	if (c->extended_cpuid_level >= 0x80000008) {
+		u32 eax = cpuid_eax(0x80000008);
+
+		c->x86_virt_bits = (eax >> 8) & 0xff;
+		c->x86_phys_bits = eax & 0xff;
+	}
+
 	/* Assume all 64-bit CPUs support 32-bit syscall */
 	set_cpu_cap(c, X86_FEATURE_SYSCALL32);
 
Index: linux-2.6/arch/x86/kernel/cpu/intel_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/intel_64.c
+++ linux-2.6/arch/x86/kernel/cpu/intel_64.c
@@ -54,9 +54,6 @@ static void __cpuinit srat_detect_node(v
 
 static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 {
-	/* Cache sizes */
-	unsigned n;
-
 	init_intel_cacheinfo(c);
 	if (c->cpuid_level > 9) {
 		unsigned eax = cpuid_eax(10);
@@ -78,13 +75,6 @@ static void __cpuinit init_intel(struct
 	if (cpu_has_bts)
 		ds_init_intel(c);
 
-	n = c->extended_cpuid_level;
-	if (n >= 0x80000008) {
-		unsigned eax = cpuid_eax(0x80000008);
-		c->x86_virt_bits = (eax >> 8) & 0xff;
-		c->x86_phys_bits = eax & 0xff;
-	}
-
 	if (c->x86 == 15)
 		c->x86_cache_alignment = c->x86_clflush_size * 2;
 	if (c->x86 == 6)

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: make 64bit hpet_set_mapping to use ioremap too
  2008-07-12 21:32           ` [PATCH] x86: max_low_pfn_mapped fix #3 Yinghai Lu
  2008-07-13 21:29             ` [PATCH] x86: max_low_pfn_mapped fix #4 Yinghai Lu
  2008-07-13 21:30             ` [PATCH] x86: get x86_phys_bits early Yinghai Lu
@ 2008-07-13 21:32             ` Yinghai Lu
  2008-07-13 21:50               ` [PATCH] x86: make 64bit hpet_set_mapping to use ioremap too v2 Yinghai Lu
  2 siblings, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-13 21:32 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: LKML


need to apply after
	[PATCH] x86: get x86_phys_bits early

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ea230ec..bfef53c 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -35,24 +35,6 @@ static inline void hpet_writel(unsigned long d, unsigned long a)
 	writel(d, hpet_virt_address + a);
 }
 
-#ifdef CONFIG_X86_64
-
-#include <asm/pgtable.h>
-
-static inline void hpet_set_mapping(void)
-{
-	set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
-	__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
-	hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
-}
-
-static inline void hpet_clear_mapping(void)
-{
-	hpet_virt_address = NULL;
-}
-
-#else
-
 static inline void hpet_set_mapping(void)
 {
 	hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
@@ -63,7 +45,6 @@ static inline void hpet_clear_mapping(void)
 	iounmap(hpet_virt_address);
 	hpet_virt_address = NULL;
 }
-#endif
 
 /*
  * HPET command line enable / disable

^ permalink raw reply related	[flat|nested] 84+ messages in thread

* [PATCH] x86: make 64bit hpet_set_mapping to use ioremap too v2
  2008-07-13 21:32             ` [PATCH] x86: make 64bit hpet_set_mapping to use ioremap too Yinghai Lu
@ 2008-07-13 21:50               ` Yinghai Lu
  0 siblings, 0 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-13 21:50 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin; +Cc: LKML


need to apply after
	[PATCH] x86: get x86_phys_bits early

keep the one for VSYSCALL_HPET

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/hpet.c      |   20 ++++----------------
 include/asm-x86/fixmap_64.h |    1 -
 2 files changed, 4 insertions(+), 17 deletions(-)

Index: linux-2.6/arch/x86/kernel/hpet.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/hpet.c
+++ linux-2.6/arch/x86/kernel/hpet.c
@@ -36,26 +36,15 @@ static inline void hpet_writel(unsigned
 }
 
 #ifdef CONFIG_X86_64
-
 #include <asm/pgtable.h>
-
-static inline void hpet_set_mapping(void)
-{
-	set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
-	__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
-	hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
-}
-
-static inline void hpet_clear_mapping(void)
-{
-	hpet_virt_address = NULL;
-}
-
-#else
+#endif
 
 static inline void hpet_set_mapping(void)
 {
 	hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
+#ifdef CONFIG_X86_64
+	__set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
+#endif
 }
 
 static inline void hpet_clear_mapping(void)
@@ -63,7 +52,6 @@ static inline void hpet_clear_mapping(vo
 	iounmap(hpet_virt_address);
 	hpet_virt_address = NULL;
 }
-#endif
 
 /*
  * HPET command line enable / disable
Index: linux-2.6/include/asm-x86/fixmap_64.h
===================================================================
--- linux-2.6.orig/include/asm-x86/fixmap_64.h
+++ linux-2.6/include/asm-x86/fixmap_64.h
@@ -40,7 +40,6 @@ enum fixed_addresses {
 	VSYSCALL_HPET,
 	FIX_DBGP_BASE,
 	FIX_EARLYCON_MEM_BASE,
-	FIX_HPET_BASE,
 	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
 	FIX_IO_APIC_BASE_0,
 	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13 17:16                     ` Cyrill Gorcunov
@ 2008-07-13 23:46                       ` Maciej W. Rozycki
  2008-07-14 16:48                         ` Cyrill Gorcunov
  0 siblings, 1 reply; 84+ messages in thread
From: Maciej W. Rozycki @ 2008-07-13 23:46 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Suresh Siddha, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	H. Peter Anvin, LKML

On Sun, 13 Jul 2008, Cyrill Gorcunov wrote:

> Guys, when I was in attempt to unify apic code first thing was -
> renaming apic_write. Here is a patch for this - only ESR and K8
> registers are untouched - may be usefull to apply (actually not
> sure if it will apply without fuzz now). Wonder if this help :)

 Confirmed -- with one exception all the generic write accesses to the
APIC absolutely have to use apic_write_around() because of the lethal
implications of the double-write erratum of some local APIC versions
integrated with Pentium CPUs.

 The exception is the ESR register which cannot use the function because
of: 1. its semantics which gives side-effects on a read, 2. another
erratum, which makes the register lose its contents on a write.  
Therefore the approach is to avoid writes, which are architecturally
required, altogether on Pentium CPUs, which ignore them by their
implementation, and then use straigth apic_write() on all the newer APIC
versions which would lose some information if a read happened before a
write.

 The K8 does not have to use apic_write_around() for the same reasons
x86-64 does not, as neither are hit by the double-write erratum, so all
their processor-specific write accesses may use apic_write() to avoid a
performance hit when used with a kernel with X86_GOOD_APIC cleared.  
Unfortunately, the LOCK# bus access always implied by the XCHG is quite
expensive, but still less intrusive than a sequence involving masking
interrupts locally beforehand and then restoring the IF flag to the
previous state afterwards.  As the APIC is local to the CPU, the grant
should not extend outside to the external bus though.

 And last, but not least, alternatives can be used these days to patch the
expensive XCHG instructions out with cheap MOV ones -- something that was
not available when the workaround was designed some ten years ago.

  Maciej

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-13 20:51               ` Andi Kleen
@ 2008-07-14  0:04                 ` H. Peter Anvin
  2008-07-14  6:39                   ` Andi Kleen
  0 siblings, 1 reply; 84+ messages in thread
From: H. Peter Anvin @ 2008-07-14  0:04 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, Yinghai Lu, Arjan van de Ven, Thomas Gleixner,
	Suresh Siddha, LKML

Andi Kleen wrote:
> 
> First I was only commenting on one specific patch, nothing more.
> 
> My point is full rounding to 4K on all corners is wasteful because the
> CPUs have to handle that case anyways and every split costs precious
> TLB entries in direct mapping accesses.
>

Well, the CPU *does* handle them... by splitting the larger pages into 
smaller pages.  They still end up in the small-page TLB, so there is no 
real difference if done in the CPU or in software.

> And I might be old fashioned, but I still think minimizing TLB misses
> in the kernel is still quite important since the TLBs of modern x86
> CPUs are still comparatively small.
> 
> btw that is why I was  also quite disappointed that the new cpa eliminated
> reassembly. It means that on a long uptime system even with moderate
> traffic of CPA page allocation/free eventually the completely direct mapping
> will be all 4K. And there will be TLB miss galore on each system call
> when user space is TLB intensive.
> 
> Ok in that light Yinghai's patch is perhaps not so bad after longer
> uptime in that scenario. Still performance directly after boot up is
> also something that shouldn't be ignored and I'm still hopefully that
> reassembly will be readded at some point anyways.

Memory state transitions are (fortunately) relatively rare and 
long-lived, but of course having reassembly is a nice thing to have in 
the long run.  Such reassembly also would rather naturally handle any 
small-page effects of boundary cases.

	-hpa

^ permalink raw reply	[flat|nested] 84+ messages in thread

* [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-12  1:41           ` [PATCH] x86: let 32bit use apic_ops too Yinghai Lu
                               ` (2 preceding siblings ...)
  2008-07-13  1:43             ` Maciej W. Rozycki
@ 2008-07-14  5:19             ` Yinghai Lu
  2008-07-14  7:12               ` Ingo Molnar
  2008-07-15 17:33               ` Suresh Siddha
  3 siblings, 2 replies; 84+ messages in thread
From: Yinghai Lu @ 2008-07-14  5:19 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Suresh Siddha; +Cc: LKML


fix for pv.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

---
 arch/x86/kernel/paravirt.c |    5 ----
 arch/x86/kernel/vmi_32.c   |   51 ++++++++++++++++++++++++++++++++++++++++++---
 arch/x86/xen/enlighten.c   |   19 +++++++---------
 include/asm-x86/paravirt.h |   29 -------------------------
 4 files changed, 57 insertions(+), 47 deletions(-)

Index: linux-2.6/arch/x86/kernel/paravirt.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/paravirt.c
+++ linux-2.6/arch/x86/kernel/paravirt.c
@@ -373,11 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
 
 struct pv_apic_ops pv_apic_ops = {
 #ifdef CONFIG_X86_LOCAL_APIC
-#ifndef CONFIG_X86_64
-	.apic_write = native_apic_mem_write,
-	.apic_write_atomic = native_apic_mem_write_atomic,
-	.apic_read = native_apic_mem_read,
-#endif
 	.setup_boot_clock = setup_boot_APIC_clock,
 	.setup_secondary_clock = setup_secondary_APIC_clock,
 	.startup_ipi_hook = paravirt_nop,
Index: linux-2.6/arch/x86/kernel/vmi_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/vmi_32.c
+++ linux-2.6/arch/x86/kernel/vmi_32.c
@@ -676,6 +676,50 @@ static inline int __init probe_vmi_rom(v
 	return 0;
 }
 
+#ifdef CONFIG_X86_LOCAL_APIC
+static u32 vmi_apic_read(u32 reg)
+{
+	return 0;
+}
+
+static void vmi_apic_write(u32 reg, u32 val)
+{
+	/* Warn to see if there's any stray references */
+	WARN_ON(1);
+}
+
+static u64 vmi_apic_icr_read(void)
+{
+	return 0;
+}
+
+static void vmi_apic_icr_write(u32 low, u32 id)
+{
+	/* Warn to see if there's any stray references */
+	WARN_ON(1);
+}
+
+static void vmi_apic_wait_icr_idle(void)
+{
+	return;
+}
+
+static u32 vmi_safe_apic_wait_icr_idle(void)
+{
+	return 0;
+}
+
+static struct apic_ops vmi_basic_apic_ops = {
+        .read = vmi_apic_read,
+        .write = vmi_apic_write,
+        .write_atomic = vmi_apic_write,
+        .icr_read = vmi_apic_icr_read,
+        .icr_write = vmi_apic_icr_write,
+        .wait_icr_idle = vmi_apic_wait_icr_idle,
+        .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle,
+};
+#endif
+
 /*
  * VMI setup common to all processors
  */
@@ -904,9 +948,10 @@ static inline int __init activate_vmi(vo
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	para_fill(pv_apic_ops.apic_read, APICRead);
-	para_fill(pv_apic_ops.apic_write, APICWrite);
-	para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
+	para_fill(vmi_basic_apic_ops.read, APICRead);
+	para_fill(vmi_basic_apic_ops.write, APICWrite);
+	para_fill(vmi_basic_apic_ops.write_atomic, APICWrite);
+	apic_ops = &vmi_basic_apic_ops;
 #endif
 
 	/*
Index: linux-2.6/arch/x86/xen/enlighten.c
===================================================================
--- linux-2.6.orig/arch/x86/xen/enlighten.c
+++ linux-2.6/arch/x86/xen/enlighten.c
@@ -587,7 +587,6 @@ static void xen_apic_write(u32 reg, u32
 	WARN_ON(1);
 }
 
-#ifdef CONFIG_X86_64
 static u64 xen_apic_icr_read(void)
 {
 	return 0;
@@ -604,6 +603,11 @@ static void xen_apic_wait_icr_idle(void)
         return;
 }
 
+static u32 xen_safe_apic_wait_icr_idle(void)
+{
+        return 0;
+}
+
 static struct apic_ops xen_basic_apic_ops = {
 	.read = xen_apic_read,
 	.write = xen_apic_write,
@@ -611,9 +615,8 @@ static struct apic_ops xen_basic_apic_op
 	.icr_read = xen_apic_icr_read,
 	.icr_write = xen_apic_icr_write,
 	.wait_icr_idle = xen_apic_wait_icr_idle,
-	.safe_wait_icr_idle = xen_apic_wait_icr_idle,
+	.safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
 };
-#endif
 
 #endif
 
@@ -1298,11 +1301,6 @@ static const struct pv_irq_ops xen_irq_o
 
 static const struct pv_apic_ops xen_apic_ops __initdata = {
 #ifdef CONFIG_X86_LOCAL_APIC
-#ifndef CONFIG_X86_64
-	.apic_write = xen_apic_write,
-	.apic_write_atomic = xen_apic_write,
-	.apic_read = xen_apic_read,
-#endif
 	.setup_boot_clock = paravirt_nop,
 	.setup_secondary_clock = paravirt_nop,
 	.startup_ipi_hook = paravirt_nop,
@@ -1704,9 +1702,10 @@ asmlinkage void __init xen_start_kernel(
 	pv_irq_ops = xen_irq_ops;
 	pv_apic_ops = xen_apic_ops;
 	pv_mmu_ops = xen_mmu_ops;
-#ifdef CONFIG_X86_64
+
+#ifdef CONFIG_X86_LOCAL_APIC
 	/*
-	 * for 64bit, set up the basic apic ops aswell.
+	 * set up the basic apic ops.
 	 */
 	apic_ops = &xen_basic_apic_ops;
 #endif
Index: linux-2.6/include/asm-x86/paravirt.h
===================================================================
--- linux-2.6.orig/include/asm-x86/paravirt.h
+++ linux-2.6/include/asm-x86/paravirt.h
@@ -200,15 +200,6 @@ struct pv_irq_ops {
 
 struct pv_apic_ops {
 #ifdef CONFIG_X86_LOCAL_APIC
-#ifndef CONFIG_X86_64
-	/*
-	 * Direct APIC operations, principally for VMI.  Ideally
-	 * these shouldn't be in this interface.
-	 */
-	void (*apic_write)(u32 reg, u32 v);
-	void (*apic_write_atomic)(u32 reg, u32 v);
-	u32 (*apic_read)(u32 reg);
-#endif
 	void (*setup_boot_clock)(void);
 	void (*setup_secondary_clock)(void);
 
@@ -901,26 +892,6 @@ static inline void slow_down_io(void)
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-/*
- * Basic functions accessing APICs.
- */
-#ifndef CONFIG_X86_64
-static inline void apic_write(u32 reg, u32 v)
-{
-	PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
-}
-
-static inline void apic_write_atomic(u32 reg, u32 v)
-{
-	PVOP_VCALL2(pv_apic_ops.apic_write_atomic, reg, v);
-}
-
-static inline u32 apic_read(u32 reg)
-{
-	return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
-}
-#endif
-
 static inline void setup_boot_clock(void)
 {
 	PVOP_VCALL0(pv_apic_ops.setup_boot_clock);

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCh] x86: overmapped fix when 4K pages on tail - 64bit
  2008-07-14  0:04                 ` H. Peter Anvin
@ 2008-07-14  6:39                   ` Andi Kleen
  0 siblings, 0 replies; 84+ messages in thread
From: Andi Kleen @ 2008-07-14  6:39 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Ingo Molnar, Yinghai Lu, Arjan van de Ven, Thomas Gleixner,
	Suresh Siddha, LKML

H. Peter Anvin wrote:
> Andi Kleen wrote:
>>
>> First I was only commenting on one specific patch, nothing more.
>>
>> My point is full rounding to 4K on all corners is wasteful because the
>> CPUs have to handle that case anyways and every split costs precious
>> TLB entries in direct mapping accesses.
>>
> 
> Well, the CPU *does* handle them... by splitting the larger pages into
> smaller pages.  They still end up in the small-page TLB, so there is no
> real difference if done in the CPU or in software.

There's actually a difference in some cases, but that's a different
issue.

Only when the hole is a real hole. But when it's just some firmware
area or similar that's not needed.

> Memory state transitions are (fortunately) relatively rare and
> long-lived,

That's not true today with several 3d driver setups. Also in general
I would expect more PAT use in the future and that is usually a split.

-Andi

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-14  5:19             ` [PATCH] x86: let 32bit use apic_ops too - fix Yinghai Lu
@ 2008-07-14  7:12               ` Ingo Molnar
  2008-07-14 16:49                 ` Suresh Siddha
  2008-07-15 17:33               ` Suresh Siddha
  1 sibling, 1 reply; 84+ messages in thread
From: Ingo Molnar @ 2008-07-14  7:12 UTC (permalink / raw)
  To: Yinghai Lu; +Cc: Thomas Gleixner, H. Peter Anvin, Suresh Siddha, LKML


* Yinghai Lu <yhlu.kernel@gmail.com> wrote:

> fix for pv.
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>

applied to tip/x86/x2apic - thanks Yinghai.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-13 23:46                       ` Maciej W. Rozycki
@ 2008-07-14 16:48                         ` Cyrill Gorcunov
  2008-07-14 17:20                           ` Maciej W. Rozycki
  0 siblings, 1 reply; 84+ messages in thread
From: Cyrill Gorcunov @ 2008-07-14 16:48 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Suresh Siddha, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	H. Peter Anvin, LKML

[Maciej W. Rozycki - Mon, Jul 14, 2008 at 12:46:11AM +0100]
| On Sun, 13 Jul 2008, Cyrill Gorcunov wrote:
| 
| > Guys, when I was in attempt to unify apic code first thing was -
| > renaming apic_write. Here is a patch for this - only ESR and K8
| > registers are untouched - may be usefull to apply (actually not
| > sure if it will apply without fuzz now). Wonder if this help :)
| 
|  Confirmed -- with one exception all the generic write accesses to the
| APIC absolutely have to use apic_write_around() because of the lethal
| implications of the double-write erratum of some local APIC versions
| integrated with Pentium CPUs.
| 
|  The exception is the ESR register which cannot use the function because
| of: 1. its semantics which gives side-effects on a read, 2. another
| erratum, which makes the register lose its contents on a write.  
| Therefore the approach is to avoid writes, which are architecturally
| required, altogether on Pentium CPUs, which ignore them by their
| implementation, and then use straigth apic_write() on all the newer APIC
| versions which would lose some information if a read happened before a
| write.
| 
|  The K8 does not have to use apic_write_around() for the same reasons
| x86-64 does not, as neither are hit by the double-write erratum, so all
| their processor-specific write accesses may use apic_write() to avoid a
| performance hit when used with a kernel with X86_GOOD_APIC cleared.  
| Unfortunately, the LOCK# bus access always implied by the XCHG is quite
| expensive, but still less intrusive than a sequence involving masking
| interrupts locally beforehand and then restoring the IF flag to the
| previous state afterwards.  As the APIC is local to the CPU, the grant
| should not extend outside to the external bus though.
| 
|  And last, but not least, alternatives can be used these days to patch the
| expensive XCHG instructions out with cheap MOV ones -- something that was
| not available when the workaround was designed some ten years ago.
| 
|   Maciej
| 

Maciej, but if we eliminate LOCK# by using simple MOV there will not
be guarantee for atomicity. Am I wrong?

		- Cyrill -

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-14  7:12               ` Ingo Molnar
@ 2008-07-14 16:49                 ` Suresh Siddha
  2008-07-14 17:00                   ` Yinghai Lu
  2008-07-18 17:06                   ` Ingo Molnar
  0 siblings, 2 replies; 84+ messages in thread
From: Suresh Siddha @ 2008-07-14 16:49 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Yinghai Lu, Thomas Gleixner, H. Peter Anvin, Siddha, Suresh B,
	LKML

On Mon, Jul 14, 2008 at 12:12:07AM -0700, Ingo Molnar wrote:
> 
> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
> 
> > fix for pv.
> >
> > Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
> 
> applied to tip/x86/x2apic - thanks Yinghai.

Ingo, before you try for the third attempt ;) we need one more lguest apic_ops
fix. Patch appended. Thanks.

---
[patch] x86: apic_ops for lguest

apic_ops for lguest.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
---

Index: x86.git/arch/x86/lguest/boot.c
===================================================================
--- x86.git.orig/arch/x86/lguest/boot.c	2008-07-14 09:44:31.000000000 -0700
+++ x86.git/arch/x86/lguest/boot.c	2008-07-14 09:45:12.000000000 -0700
@@ -791,6 +791,37 @@
 {
 	return 0;
 }
+
+static u64 lguest_apic_icr_read(void)
+{
+	return 0;
+}
+
+static void lguest_apic_icr_write(u32 low, u32 id)
+{
+	/* Warn to see if there's any stray references */
+	WARN_ON(1);
+}
+
+static void lguest_apic_wait_icr_idle(void)
+{
+	return;
+}
+
+static u32 lguest_apic_safe_wait_icr_idle(void)
+{
+	return 0;
+}
+
+static struct apic_ops lguest_basic_apic_ops = {
+	.read = lguest_apic_read,
+	.write = lguest_apic_write,
+	.write_atomic = lguest_apic_write,
+	.icr_read = lguest_apic_icr_read,
+	.icr_write = lguest_apic_icr_write,
+	.wait_icr_idle = lguest_apic_wait_icr_idle,
+	.safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
+};
 #endif
 
 /* STOP!  Until an interrupt comes in. */
@@ -990,9 +1021,7 @@
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	/* apic read/write intercepts */
-	pv_apic_ops.apic_write = lguest_apic_write;
-	pv_apic_ops.apic_write_atomic = lguest_apic_write;
-	pv_apic_ops.apic_read = lguest_apic_read;
+	apic_ops = &lguest_basic_apic_ops;
 #endif
 
 	/* time operations */

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-14 16:49                 ` Suresh Siddha
@ 2008-07-14 17:00                   ` Yinghai Lu
  2008-07-14 18:03                     ` Suresh Siddha
  2008-07-18 17:06                   ` Ingo Molnar
  1 sibling, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-14 17:00 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, LKML

On Mon, Jul 14, 2008 at 9:49 AM, Suresh Siddha
<suresh.b.siddha@intel.com> wrote:
> On Mon, Jul 14, 2008 at 12:12:07AM -0700, Ingo Molnar wrote:
>>
>> * Yinghai Lu <yhlu.kernel@gmail.com> wrote:
>>
>> > fix for pv.
>> >
>> > Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
>>
>> applied to tip/x86/x2apic - thanks Yinghai.
>
> Ingo, before you try for the third attempt ;) we need one more lguest apic_ops
> fix. Patch appended. Thanks.
>
> ---
> [patch] x86: apic_ops for lguest
>
> apic_ops for lguest.
>
> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
> Cc: Yinghai Lu <yhlu.kernel@gmail.com>
> ---
>
> Index: x86.git/arch/x86/lguest/boot.c
> ===================================================================
> --- x86.git.orig/arch/x86/lguest/boot.c 2008-07-14 09:44:31.000000000 -0700
> +++ x86.git/arch/x86/lguest/boot.c      2008-07-14 09:45:12.000000000 -0700
> @@ -791,6 +791,37 @@
>  {
>        return 0;
>  }
> +
> +static u64 lguest_apic_icr_read(void)
> +{
> +       return 0;
> +}
> +
> +static void lguest_apic_icr_write(u32 low, u32 id)
> +{
> +       /* Warn to see if there's any stray references */
> +       WARN_ON(1);
> +}
> +
> +static void lguest_apic_wait_icr_idle(void)
> +{
> +       return;
> +}
> +
> +static u32 lguest_apic_safe_wait_icr_idle(void)
> +{
> +       return 0;
> +}
> +
> +static struct apic_ops lguest_basic_apic_ops = {
> +       .read = lguest_apic_read,
> +       .write = lguest_apic_write,
> +       .write_atomic = lguest_apic_write,
> +       .icr_read = lguest_apic_icr_read,
> +       .icr_write = lguest_apic_icr_write,
> +       .wait_icr_idle = lguest_apic_wait_icr_idle,
> +       .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
> +};
>  #endif
>
>  /* STOP!  Until an interrupt comes in. */
> @@ -990,9 +1021,7 @@
>
>  #ifdef CONFIG_X86_LOCAL_APIC
>        /* apic read/write intercepts */
> -       pv_apic_ops.apic_write = lguest_apic_write;
> -       pv_apic_ops.apic_write_atomic = lguest_apic_write;
> -       pv_apic_ops.apic_read = lguest_apic_read;
> +       apic_ops = &lguest_basic_apic_ops;
>  #endif
>
>        /* time operations */

do we need one for KVM pv?

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-14 16:48                         ` Cyrill Gorcunov
@ 2008-07-14 17:20                           ` Maciej W. Rozycki
  2008-07-14 18:09                             ` Cyrill Gorcunov
  0 siblings, 1 reply; 84+ messages in thread
From: Maciej W. Rozycki @ 2008-07-14 17:20 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Suresh Siddha, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	H. Peter Anvin, LKML

On Mon, 14 Jul 2008, Cyrill Gorcunov wrote:

> Maciej, but if we eliminate LOCK# by using simple MOV there will not
> be guarantee for atomicity. Am I wrong?

 You are right, but we do not care about atomicity.  We only care about
interrupts.  This is because the local APIC is private to its associated
CPU and inaccessible from the outside, at least for writes (mind the
Remote Read command), so as long as the local CPU does not issue
consecutive write cycles, there is no problem with another CPU getting in
the way.  Which means any RMW instruction would suffice here, with the R
part of the cycle separating any possible preceding write from one
immediately following, but unfortunately the only one we can use is the
XCHG and it has always implied the LOCK#, since the 8086, which at that
point was considered a microoptimization (the LOCK# was cheap and an extra
memory byte, otherwise needed for the LOCK prefix, expensive back then).  
So atomicity is an unfortunate side effect rather than a part of the
design here.

 Now if we know the APIC does not suffer from the double-write erratum,
then we can use a straight MOV as consecutive writes are not a concern
anymore.

  Maciej

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-14 17:00                   ` Yinghai Lu
@ 2008-07-14 18:03                     ` Suresh Siddha
  0 siblings, 0 replies; 84+ messages in thread
From: Suresh Siddha @ 2008-07-14 18:03 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Siddha, Suresh B, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	LKML

On Mon, Jul 14, 2008 at 10:00:28AM -0700, Yinghai Lu wrote:
> On Mon, Jul 14, 2008 at 9:49 AM, Suresh Siddha
> <suresh.b.siddha@intel.com> wrote:
> > [patch] x86: apic_ops for lguest
> >
> > apic_ops for lguest.
> >
> > Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
> > Cc: Yinghai Lu <yhlu.kernel@gmail.com>
> 
> do we need one for KVM pv?

No. They use different op's for complete apic virt.

thanks,
suresh

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-14 17:20                           ` Maciej W. Rozycki
@ 2008-07-14 18:09                             ` Cyrill Gorcunov
  2008-07-14 18:24                               ` Maciej W. Rozycki
  0 siblings, 1 reply; 84+ messages in thread
From: Cyrill Gorcunov @ 2008-07-14 18:09 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Suresh Siddha, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	H. Peter Anvin, LKML

[Maciej W. Rozycki - Mon, Jul 14, 2008 at 06:20:26PM +0100]
| On Mon, 14 Jul 2008, Cyrill Gorcunov wrote:
| 
| > Maciej, but if we eliminate LOCK# by using simple MOV there will not
| > be guarantee for atomicity. Am I wrong?
| 
|  You are right, but we do not care about atomicity.  We only care about
| interrupts.  This is because the local APIC is private to its associated
| CPU and inaccessible from the outside, at least for writes (mind the
| Remote Read command), so as long as the local CPU does not issue
| consecutive write cycles, there is no problem with another CPU getting in
| the way.  Which means any RMW instruction would suffice here, with the R
| part of the cycle separating any possible preceding write from one
| immediately following, but unfortunately the only one we can use is the
| XCHG and it has always implied the LOCK#, since the 8086, which at that
| point was considered a microoptimization (the LOCK# was cheap and an extra
| memory byte, otherwise needed for the LOCK prefix, expensive back then).  
| So atomicity is an unfortunate side effect rather than a part of the
| design here.
| 
|  Now if we know the APIC does not suffer from the double-write erratum,
| then we can use a straight MOV as consecutive writes are not a concern
| anymore.
| 
|   Maciej
| 

  Maciej, check me please (it's a bit shame but I don't understand the problem
that deep) - we have only two errata here 3AP and 11AP. 3AP says - "Writes to
error register clears register" so we don't care about locking there since
our mostly task is to read error number or clear it (well we're recommened
to write before read - but that is different and not related to the hw
error).

  The second problem - 11AP says the following: "Back to back assertions of
HOLD or BOFF# may cause lost APIC write cycle". For this case we use LOCK#
since - HOLD is not recognized during LOCK cycles (as Intel docs says).

  Did I miss something? Or maybe it's completely out-of-topic? :)

		- Cyrill -

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-14 18:09                             ` Cyrill Gorcunov
@ 2008-07-14 18:24                               ` Maciej W. Rozycki
  2008-07-14 18:32                                 ` Cyrill Gorcunov
  0 siblings, 1 reply; 84+ messages in thread
From: Maciej W. Rozycki @ 2008-07-14 18:24 UTC (permalink / raw)
  To: Cyrill Gorcunov
  Cc: Suresh Siddha, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	H. Peter Anvin, LKML

On Mon, 14 Jul 2008, Cyrill Gorcunov wrote:

>   Maciej, check me please (it's a bit shame but I don't understand the problem
> that deep) - we have only two errata here 3AP and 11AP. 3AP says - "Writes to
> error register clears register" so we don't care about locking there since
> our mostly task is to read error number or clear it (well we're recommened
> to write before read - but that is different and not related to the hw
> error).
> 
>   The second problem - 11AP says the following: "Back to back assertions of
> HOLD or BOFF# may cause lost APIC write cycle". For this case we use LOCK#
> since - HOLD is not recognized during LOCK cycles (as Intel docs says).
> 
>   Did I miss something? Or maybe it's completely out-of-topic? :)

 Check the text of the 11AP erratum -- we simply use one of the Intel's
recommended workarounds, which says that an APIC read instruction before
every APIC write instruction will avoid the problem.

  Maciej

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too
  2008-07-14 18:24                               ` Maciej W. Rozycki
@ 2008-07-14 18:32                                 ` Cyrill Gorcunov
  0 siblings, 0 replies; 84+ messages in thread
From: Cyrill Gorcunov @ 2008-07-14 18:32 UTC (permalink / raw)
  To: Maciej W. Rozycki
  Cc: Suresh Siddha, Yinghai Lu, Ingo Molnar, Thomas Gleixner,
	H. Peter Anvin, LKML

[Maciej W. Rozycki - Mon, Jul 14, 2008 at 07:24:15PM +0100]
| On Mon, 14 Jul 2008, Cyrill Gorcunov wrote:
| 
| >   Maciej, check me please (it's a bit shame but I don't understand the problem
| > that deep) - we have only two errata here 3AP and 11AP. 3AP says - "Writes to
| > error register clears register" so we don't care about locking there since
| > our mostly task is to read error number or clear it (well we're recommened
| > to write before read - but that is different and not related to the hw
| > error).
| > 
| >   The second problem - 11AP says the following: "Back to back assertions of
| > HOLD or BOFF# may cause lost APIC write cycle". For this case we use LOCK#
| > since - HOLD is not recognized during LOCK cycles (as Intel docs says).
| > 
| >   Did I miss something? Or maybe it's completely out-of-topic? :)
| 
|  Check the text of the 11AP erratum -- we simply use one of the Intel's
| recommended workarounds, which says that an APIC read instruction before
| every APIC write instruction will avoid the problem.
| 
|   Maciej
| 

ok, thanks!

		- Cyrill -

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-14  5:19             ` [PATCH] x86: let 32bit use apic_ops too - fix Yinghai Lu
  2008-07-14  7:12               ` Ingo Molnar
@ 2008-07-15 17:33               ` Suresh Siddha
  2008-07-15 18:10                 ` Yinghai Lu
  2008-07-18 17:07                 ` Ingo Molnar
  1 sibling, 2 replies; 84+ messages in thread
From: Suresh Siddha @ 2008-07-15 17:33 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, Siddha, Suresh B,
	LKML

On Sun, Jul 13, 2008 at 10:19:35PM -0700, Yinghai Lu wrote:
> 
> fix for pv.
> 
> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
> 
> ---
>  arch/x86/kernel/paravirt.c |    5 ----
>  arch/x86/kernel/vmi_32.c   |   51 ++++++++++++++++++++++++++++++++++++++++++---
>  arch/x86/xen/enlighten.c   |   19 +++++++---------
>  include/asm-x86/paravirt.h |   29 -------------------------
>  4 files changed, 57 insertions(+), 47 deletions(-)
> 
> Index: linux-2.6/arch/x86/kernel/paravirt.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/paravirt.c
> +++ linux-2.6/arch/x86/kernel/paravirt.c
> @@ -373,11 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
> 
>  struct pv_apic_ops pv_apic_ops = {
>  #ifdef CONFIG_X86_LOCAL_APIC
> -#ifndef CONFIG_X86_64
> -       .apic_write = native_apic_mem_write,
> -       .apic_write_atomic = native_apic_mem_write_atomic,
> -       .apic_read = native_apic_mem_read,
> -#endif
>         .setup_boot_clock = setup_boot_APIC_clock,
>         .setup_secondary_clock = setup_secondary_APIC_clock,
>         .startup_ipi_hook = paravirt_nop,
> Index: linux-2.6/arch/x86/kernel/vmi_32.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/vmi_32.c
> +++ linux-2.6/arch/x86/kernel/vmi_32.c
> @@ -676,6 +676,50 @@ static inline int __init probe_vmi_rom(v
>         return 0;
>  }
> 
> +#ifdef CONFIG_X86_LOCAL_APIC
> +static u32 vmi_apic_read(u32 reg)
> +{
> +       return 0;
> +}
> +
> +static void vmi_apic_write(u32 reg, u32 val)
> +{
> +       /* Warn to see if there's any stray references */
> +       WARN_ON(1);
> +}
> +
> +static u64 vmi_apic_icr_read(void)
> +{
> +       return 0;
> +}
> +
> +static void vmi_apic_icr_write(u32 low, u32 id)
> +{
> +       /* Warn to see if there's any stray references */
> +       WARN_ON(1);
> +}
> +
> +static void vmi_apic_wait_icr_idle(void)
> +{
> +       return;
> +}
> +
> +static u32 vmi_safe_apic_wait_icr_idle(void)
> +{
> +       return 0;
> +}
> +
> +static struct apic_ops vmi_basic_apic_ops = {
> +        .read = vmi_apic_read,
> +        .write = vmi_apic_write,
> +        .write_atomic = vmi_apic_write,
> +        .icr_read = vmi_apic_icr_read,
> +        .icr_write = vmi_apic_icr_write,
> +        .wait_icr_idle = vmi_apic_wait_icr_idle,
> +        .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle,
> +};
> +#endif
> +
>  /*
>   * VMI setup common to all processors
>   */
> @@ -904,9 +948,10 @@ static inline int __init activate_vmi(vo
>  #endif
> 
>  #ifdef CONFIG_X86_LOCAL_APIC
> -       para_fill(pv_apic_ops.apic_read, APICRead);
> -       para_fill(pv_apic_ops.apic_write, APICWrite);
> -       para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
> +       para_fill(vmi_basic_apic_ops.read, APICRead);
> +       para_fill(vmi_basic_apic_ops.write, APICWrite);
> +       para_fill(vmi_basic_apic_ops.write_atomic, APICWrite);
> +       apic_ops = &vmi_basic_apic_ops;

Yinghai, Looking more closely at this, based on my understanding this might be
wrong for VMI. Correct patch should be as follows. Any comments?

thanks,
suresh
---
Fix VMI apic_ops.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index b1375fa..3410196 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -145,6 +145,11 @@ static int modern_apic(void)
 	return lapic_get_version() >= 0x14;
 }
 
+/*
+ * Paravirt kernels also might be using these below ops. So we still
+ * use generic apic_read()/apic_write(), which might be pointing to different
+ * ops in PARAVIRT case.
+ */
 void xapic_wait_icr_idle(void)
 {
 	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index cf30743..d6897e4 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -676,50 +676,6 @@ static inline int __init probe_vmi_rom(void)
 	return 0;
 }
 
-#ifdef CONFIG_X86_LOCAL_APIC
-static u32 vmi_apic_read(u32 reg)
-{
-	return 0;
-}
-
-static void vmi_apic_write(u32 reg, u32 val)
-{
-	/* Warn to see if there's any stray references */
-	WARN_ON(1);
-}
-
-static u64 vmi_apic_icr_read(void)
-{
-	return 0;
-}
-
-static void vmi_apic_icr_write(u32 low, u32 id)
-{
-	/* Warn to see if there's any stray references */
-	WARN_ON(1);
-}
-
-static void vmi_apic_wait_icr_idle(void)
-{
-	return;
-}
-
-static u32 vmi_safe_apic_wait_icr_idle(void)
-{
-	return 0;
-}
-
-static struct apic_ops vmi_basic_apic_ops = {
-        .read = vmi_apic_read,
-        .write = vmi_apic_write,
-        .write_atomic = vmi_apic_write,
-        .icr_read = vmi_apic_icr_read,
-        .icr_write = vmi_apic_icr_write,
-        .wait_icr_idle = vmi_apic_wait_icr_idle,
-        .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle,
-};
-#endif
-
 /*
  * VMI setup common to all processors
  */
@@ -948,10 +904,9 @@ static inline int __init activate_vmi(void)
 #endif
 
 #ifdef CONFIG_X86_LOCAL_APIC
-	para_fill(vmi_basic_apic_ops.read, APICRead);
-	para_fill(vmi_basic_apic_ops.write, APICWrite);
-	para_fill(vmi_basic_apic_ops.write_atomic, APICWrite);
-	apic_ops = &vmi_basic_apic_ops;
+	para_fill(apic_ops->read, APICRead);
+	para_fill(apic_ops->write, APICWrite);
+	para_fill(apic_ops->write_atomic, APICWrite);
 #endif
 
 	/*

^ permalink raw reply related	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-15 17:33               ` Suresh Siddha
@ 2008-07-15 18:10                 ` Yinghai Lu
  2008-07-15 18:27                   ` Suresh Siddha
  2008-07-18 17:07                 ` Ingo Molnar
  1 sibling, 1 reply; 84+ messages in thread
From: Yinghai Lu @ 2008-07-15 18:10 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: Ingo Molnar, Thomas Gleixner, H. Peter Anvin, LKML

On Tue, Jul 15, 2008 at 10:33 AM, Suresh Siddha
<suresh.b.siddha@intel.com> wrote:
> On Sun, Jul 13, 2008 at 10:19:35PM -0700, Yinghai Lu wrote:
>>
>> fix for pv.
>>
>> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
>>
>> ---
>>  arch/x86/kernel/paravirt.c |    5 ----
>>  arch/x86/kernel/vmi_32.c   |   51 ++++++++++++++++++++++++++++++++++++++++++---
>>  arch/x86/xen/enlighten.c   |   19 +++++++---------
>>  include/asm-x86/paravirt.h |   29 -------------------------
>>  4 files changed, 57 insertions(+), 47 deletions(-)
>>
>> Index: linux-2.6/arch/x86/kernel/paravirt.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/kernel/paravirt.c
>> +++ linux-2.6/arch/x86/kernel/paravirt.c
>> @@ -373,11 +373,6 @@ struct pv_cpu_ops pv_cpu_ops = {
>>
>>  struct pv_apic_ops pv_apic_ops = {
>>  #ifdef CONFIG_X86_LOCAL_APIC
>> -#ifndef CONFIG_X86_64
>> -       .apic_write = native_apic_mem_write,
>> -       .apic_write_atomic = native_apic_mem_write_atomic,
>> -       .apic_read = native_apic_mem_read,
>> -#endif
>>         .setup_boot_clock = setup_boot_APIC_clock,
>>         .setup_secondary_clock = setup_secondary_APIC_clock,
>>         .startup_ipi_hook = paravirt_nop,
>> Index: linux-2.6/arch/x86/kernel/vmi_32.c
>> ===================================================================
>> --- linux-2.6.orig/arch/x86/kernel/vmi_32.c
>> +++ linux-2.6/arch/x86/kernel/vmi_32.c
>> @@ -676,6 +676,50 @@ static inline int __init probe_vmi_rom(v
>>         return 0;
>>  }
>>
>> +#ifdef CONFIG_X86_LOCAL_APIC
>> +static u32 vmi_apic_read(u32 reg)
>> +{
>> +       return 0;
>> +}
>> +
>> +static void vmi_apic_write(u32 reg, u32 val)
>> +{
>> +       /* Warn to see if there's any stray references */
>> +       WARN_ON(1);
>> +}
>> +
>> +static u64 vmi_apic_icr_read(void)
>> +{
>> +       return 0;
>> +}
>> +
>> +static void vmi_apic_icr_write(u32 low, u32 id)
>> +{
>> +       /* Warn to see if there's any stray references */
>> +       WARN_ON(1);
>> +}
>> +
>> +static void vmi_apic_wait_icr_idle(void)
>> +{
>> +       return;
>> +}
>> +
>> +static u32 vmi_safe_apic_wait_icr_idle(void)
>> +{
>> +       return 0;
>> +}
>> +
>> +static struct apic_ops vmi_basic_apic_ops = {
>> +        .read = vmi_apic_read,
>> +        .write = vmi_apic_write,
>> +        .write_atomic = vmi_apic_write,
>> +        .icr_read = vmi_apic_icr_read,
>> +        .icr_write = vmi_apic_icr_write,
>> +        .wait_icr_idle = vmi_apic_wait_icr_idle,
>> +        .safe_wait_icr_idle = vmi_safe_apic_wait_icr_idle,
>> +};
>> +#endif
>> +
>>  /*
>>   * VMI setup common to all processors
>>   */
>> @@ -904,9 +948,10 @@ static inline int __init activate_vmi(vo
>>  #endif
>>
>>  #ifdef CONFIG_X86_LOCAL_APIC
>> -       para_fill(pv_apic_ops.apic_read, APICRead);
>> -       para_fill(pv_apic_ops.apic_write, APICWrite);
>> -       para_fill(pv_apic_ops.apic_write_atomic, APICWrite);
>> +       para_fill(vmi_basic_apic_ops.read, APICRead);
>> +       para_fill(vmi_basic_apic_ops.write, APICWrite);
>> +       para_fill(vmi_basic_apic_ops.write_atomic, APICWrite);
>> +       apic_ops = &vmi_basic_apic_ops;
>
> Yinghai, Looking more closely at this, based on my understanding this might be
> wrong for VMI. Correct patch should be as follows. Any comments?

so you mean icr related will still use default native member?

YH

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-15 18:10                 ` Yinghai Lu
@ 2008-07-15 18:27                   ` Suresh Siddha
  0 siblings, 0 replies; 84+ messages in thread
From: Suresh Siddha @ 2008-07-15 18:27 UTC (permalink / raw)
  To: Yinghai Lu
  Cc: Siddha, Suresh B, Ingo Molnar, Thomas Gleixner, H. Peter Anvin,
	LKML

On Tue, Jul 15, 2008 at 11:10:37AM -0700, Yinghai Lu wrote:
> On Tue, Jul 15, 2008 at 10:33 AM, Suresh Siddha
> <suresh.b.siddha@intel.com> wrote:
> > Yinghai, Looking more closely at this, based on my understanding this might be
> > wrong for VMI. Correct patch should be as follows. Any comments?
> 
> so you mean icr related will still use default native member?

Yes. This is similar to pre apic_ops.

I think VMI uses apic operations in the paravirt case. For example,
please refer to vmi_time_bsp_init/vmi_time_ap_init.

thanks,
suresh

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-14 16:49                 ` Suresh Siddha
  2008-07-14 17:00                   ` Yinghai Lu
@ 2008-07-18 17:06                   ` Ingo Molnar
  1 sibling, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-18 17:06 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: Yinghai Lu, Thomas Gleixner, H. Peter Anvin, LKML


* Suresh Siddha <suresh.b.siddha@intel.com> wrote:

> Ingo, before you try for the third attempt ;) we need one more lguest 
> apic_ops fix. Patch appended. Thanks.
> 
> ---
> [patch] x86: apic_ops for lguest
> 
> apic_ops for lguest.

applied to tip/x86/x2apic, thanks Suresh.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

* Re: [PATCH] x86: let 32bit use apic_ops too - fix
  2008-07-15 17:33               ` Suresh Siddha
  2008-07-15 18:10                 ` Yinghai Lu
@ 2008-07-18 17:07                 ` Ingo Molnar
  1 sibling, 0 replies; 84+ messages in thread
From: Ingo Molnar @ 2008-07-18 17:07 UTC (permalink / raw)
  To: Suresh Siddha; +Cc: Yinghai Lu, Thomas Gleixner, H. Peter Anvin, LKML


* Suresh Siddha <suresh.b.siddha@intel.com> wrote:

> Yinghai, Looking more closely at this, based on my understanding this 
> might be wrong for VMI. Correct patch should be as follows. Any 
> comments?

applied to tip/x86/x2apic (with some fixups), thanks Suresh.

	Ingo

^ permalink raw reply	[flat|nested] 84+ messages in thread

end of thread, other threads:[~2008-07-18 17:08 UTC | newest]

Thread overview: 84+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-08  8:41 [PATCH] x86: introduce page_size_mask for 64bit Yinghai Lu
2008-07-08  8:43 ` [PATCH] x86: not overmap than end in init_memory_mapping - 64bit Yinghai Lu
2008-07-09  7:38   ` Ingo Molnar
2008-07-09  8:34     ` Ingo Molnar
2008-07-09  8:37       ` Yinghai Lu
2008-07-09  8:46         ` Ingo Molnar
2008-07-09  8:58           ` Yinghai Lu
2008-07-09 10:01           ` Yinghai Lu
2008-07-09 10:30             ` Ingo Molnar
2008-07-09  8:45       ` Ingo Molnar
2008-07-10  3:15   ` [PATCh] x86: overmapped fix when 4K pages on tail " Yinghai Lu
2008-07-10  3:16     ` [PATCH] x86: merge __acpi_map_table Yinghai Lu
2008-07-10  3:17       ` [PATCH] x86: make e820_end return end_of_ram again for 64bit Yinghai Lu
2008-07-10  7:00         ` Ingo Molnar
2008-07-10 11:17         ` [PATCH] x86: e820 remove the range instead of update it to reserved Yinghai Lu
2008-07-11  8:20           ` Ingo Molnar
2008-07-11  3:36         ` [PATCH] x86: save slit Yinghai Lu
2008-07-11  8:22           ` Ingo Molnar
2008-07-11  3:38         ` [PATCH] x86: introduce max_low_pfn_mapped for 64bit Yinghai Lu
2008-07-11  8:26           ` Ingo Molnar
2008-07-11  8:39             ` Yinghai Lu
2008-07-11  8:51               ` Ingo Molnar
2008-07-12  1:41           ` [PATCH] x86: let 32bit use apic_ops too Yinghai Lu
2008-07-12  1:43             ` [PATCH] x86: mach_apicdef.h need to include before smp.h Yinghai Lu
2008-07-12  1:44               ` [PATCH] x86: make read_apic_id return final apicid Yinghai Lu
2008-07-12  8:01                 ` [PATCH] x86: make 64bit have get_apic_id Yinghai Lu
2008-07-13  6:28                   ` Ingo Molnar
2008-07-13  6:59                     ` Ingo Molnar
2008-07-13  7:05                       ` Yinghai Lu
2008-07-13  9:23                         ` Ingo Molnar
2008-07-13  9:28                           ` Ingo Molnar
2008-07-13 16:15                             ` Suresh Siddha
2008-07-13  1:19                 ` [PATCH] x86: make read_apic_id return final apicid Suresh Siddha
2008-07-13  1:08             ` [PATCH] x86: let 32bit use apic_ops too Suresh Siddha
2008-07-13  2:04               ` Yinghai Lu
2008-07-13 16:28                 ` Suresh Siddha
2008-07-13 16:51                   ` Maciej W. Rozycki
2008-07-13 17:16                     ` Cyrill Gorcunov
2008-07-13 23:46                       ` Maciej W. Rozycki
2008-07-14 16:48                         ` Cyrill Gorcunov
2008-07-14 17:20                           ` Maciej W. Rozycki
2008-07-14 18:09                             ` Cyrill Gorcunov
2008-07-14 18:24                               ` Maciej W. Rozycki
2008-07-14 18:32                                 ` Cyrill Gorcunov
2008-07-13  1:43             ` Maciej W. Rozycki
2008-07-13  1:45               ` Yinghai Lu
2008-07-13  1:54                 ` Maciej W. Rozycki
2008-07-13 16:43                   ` Suresh Siddha
2008-07-13 17:05                     ` Maciej W. Rozycki
2008-07-14  5:19             ` [PATCH] x86: let 32bit use apic_ops too - fix Yinghai Lu
2008-07-14  7:12               ` Ingo Molnar
2008-07-14 16:49                 ` Suresh Siddha
2008-07-14 17:00                   ` Yinghai Lu
2008-07-14 18:03                     ` Suresh Siddha
2008-07-18 17:06                   ` Ingo Molnar
2008-07-15 17:33               ` Suresh Siddha
2008-07-15 18:10                 ` Yinghai Lu
2008-07-15 18:27                   ` Suresh Siddha
2008-07-18 17:07                 ` Ingo Molnar
2008-07-12 21:30           ` [PATCH] x86: max_low_pfn_mapped fix #1 Yinghai Lu
2008-07-13  9:45             ` Ingo Molnar
2008-07-12 21:31           ` [PATCH] x86: max_low_pfn_mapped fix #2 Yinghai Lu
2008-07-12 21:32           ` [PATCH] x86: max_low_pfn_mapped fix #3 Yinghai Lu
2008-07-13 21:29             ` [PATCH] x86: max_low_pfn_mapped fix #4 Yinghai Lu
2008-07-13 21:30             ` [PATCH] x86: get x86_phys_bits early Yinghai Lu
2008-07-13 21:32             ` [PATCH] x86: make 64bit hpet_set_mapping to use ioremap too Yinghai Lu
2008-07-13 21:50               ` [PATCH] x86: make 64bit hpet_set_mapping to use ioremap too v2 Yinghai Lu
2008-07-10  6:54       ` [PATCH] x86: merge __acpi_map_table Ingo Molnar
2008-07-10  6:53     ` [PATCh] x86: overmapped fix when 4K pages on tail - 64bit Ingo Molnar
2008-07-10  6:57       ` Yinghai Lu
2008-07-10  7:20         ` Ingo Molnar
2008-07-10  7:32           ` Yinghai Lu
2008-07-10 14:16     ` Arjan van de Ven
2008-07-13 14:57       ` Andi Kleen
2008-07-13 15:33         ` Arjan van de Ven
2008-07-13 18:25           ` Andi Kleen
2008-07-13 18:17         ` Yinghai Lu
2008-07-13 18:48           ` Andi Kleen
2008-07-13 19:00             ` Yinghai Lu
2008-07-13 20:32             ` Ingo Molnar
2008-07-13 20:51               ` Andi Kleen
2008-07-14  0:04                 ` H. Peter Anvin
2008-07-14  6:39                   ` Andi Kleen
2008-07-09  7:38 ` [PATCH] x86: introduce page_size_mask for 64bit Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).