All of lore.kernel.org
 help / color / mirror / Atom feed
From: venkatesh.pallipadi@intel.com
To: ak@muc.de, ebiederm@xmission.com, rdreier@cisco.com,
	torvalds@linux-foundation.org, gregkh@suse.de, airlied@skynet.ie,
	davej@redhat.com, mingo@elte.hu, tglx@linutronix.de,
	hpa@zytor.com, akpm@linux-foundation.org, arjan@infradead.org,
	jesse.barnes@intel.com
Cc: linux-kernel@vger.kernel.org,
	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>,
	Suresh Siddha <suresh.b.siddha@intel.com>
Subject: [RFC PATCH 09/12] PAT 64b: map only usable memory in identity mapping
Date: Thu, 13 Dec 2007 15:55:52 -0800	[thread overview]
Message-ID: <20071213235713.061041000@intel.com> (raw)
In-Reply-To: 20071213235543.568682000@intel.com

[-- Attachment #1: usable_only_map.patch --]
[-- Type: text/plain, Size: 9105 bytes --]

Map only the usable memory, i.e., memory mapped in e820 and not marked as
reserved, in the identity mapping. This includes 'usable' and 'ACPI *' regions.

Mapping reserved regions in identity map, even though it has worked in practise,
can potentially be problematic. With identity map, there can be speculative
access to these reserved regions which can have undetermined behavior.

Caveat is that the legacy ISA address (0xa0000 - 0x100000) is always mapped,
even when it is reserved in e820. VGA seems to depend on this.

TODO:
* Clean up early table space allocation, avoiding overallocation there.
* Avoid mapping 0 - 1M physical addresses in kernel text mapping.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
---

Index: linux-2.6.24-rc/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.24-rc.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6.24-rc/arch/x86/kernel/e820_64.c
@@ -121,6 +121,35 @@ e820_any_mapped(unsigned long start, uns
 }
 EXPORT_SYMBOL_GPL(e820_any_mapped);
 
+int e820_any_non_reserved(unsigned long start, unsigned long end)
+{
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (ei->type == E820_RESERVED)
+			continue;
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(e820_any_non_reserved);
+
+int is_memory_any_valid(unsigned long start, unsigned long end)
+{
+	/*
+	 * Keep low PCI/ISA area always mapped.
+	 * Note: end address is exclusive and start is inclusive here
+	 */
+	if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS)
+		return 1;
+
+	/* Switch to efi or e820 in future here */
+	return e820_any_non_reserved(start, end);
+}
+EXPORT_SYMBOL_GPL(is_memory_any_valid);
+
 /*
  * This function checks if the entire range <start,end> is mapped with type.
  *
@@ -150,6 +179,47 @@ int __init e820_all_mapped(unsigned long
 	return 0;
 }
 
+int e820_all_non_reserved(unsigned long start, unsigned long end)
+{
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (ei->type == E820_RESERVED)
+			continue;
+
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+
+		/*
+		 * if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+
+		/* if start is at or beyond end, we're done, full coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(e820_all_non_reserved);
+
+int is_memory_all_valid(unsigned long start, unsigned long end)
+{
+	/*
+	 * Keep low PCI/ISA area always mapped.
+	 * Note: end address is exclusive and start is inclusive here
+	 */
+	if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS)
+		return 1;
+
+	/* Switch to efi or e820 in future here */
+	return e820_all_non_reserved(start, end);
+}
+EXPORT_SYMBOL_GPL(is_memory_all_valid);
+
 /* 
  * Find a free area in a specific range. 
  */ 
Index: linux-2.6.24-rc/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.24-rc.orig/arch/x86/mm/init_64.c
+++ linux-2.6.24-rc/arch/x86/mm/init_64.c
@@ -250,13 +250,46 @@ __meminit void early_iounmap(void *addr,
 }
 
 static void __meminit
+phys_pte_init(pte_t *pte_page, unsigned long address, unsigned long end)
+{
+	int i = pte_index(address); // (address % PMD_SIZE) >> PAGE_SHIFT;
+
+	for (; i < PTRS_PER_PTE; i++, address += PAGE_SIZE) {
+		unsigned long entry;
+		pte_t *pte = pte_page + i;
+
+		if (address >= end) {
+			if (!after_bootmem)
+				for (; i < PTRS_PER_PTE; i++, pte++)
+					set_pte(pte, __pte(0));
+			break;
+		}
+
+		if (pte_val(*pte))
+			continue;
+
+		/* Nothing to map */
+		if (!is_memory_any_valid(address, address + PAGE_SIZE)) {
+			set_pte(pte, __pte(0));
+			continue;
+		}
+
+		entry = _PAGE_NX|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+		entry &= __supported_pte_mask;
+		set_pte(pte, __pte(entry));
+	}
+}
+
+static void __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 {
 	int i = pmd_index(address);
 
 	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
 		unsigned long entry;
-		pmd_t *pmd = pmd_page + pmd_index(address);
+		pmd_t *pmd = pmd_page + i; // pmd_index(address);
+		pte_t *pte;
+		unsigned long pte_phys;
 
 		if (address >= end) {
 			if (!after_bootmem)
@@ -268,9 +301,27 @@ phys_pmd_init(pmd_t *pmd_page, unsigned 
 		if (pmd_val(*pmd))
 			continue;
 
-		entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address;
-		entry &= __supported_pte_mask;
-		set_pmd(pmd, __pmd(entry));
+		/* Nothing to map */
+		if (!is_memory_any_valid(address, address + PMD_SIZE)) {
+			set_pmd(pmd, __pmd(0));
+			continue;
+		}
+
+		/* Map with 2M pages */
+		if (is_memory_all_valid(address, address + PUD_SIZE)) {
+			entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|
+				_PAGE_GLOBAL|address;
+			entry &= __supported_pte_mask;
+			set_pmd(pmd, __pmd(entry));
+			continue;
+		}
+
+		/* Map with 4k pages */
+		pte = alloc_low_page(&pte_phys);
+		set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
+		phys_pte_init(pte, address, address + PMD_SIZE);
+		unmap_low_page(pte);
+
 	}
 }
 
@@ -291,14 +342,15 @@ static void __meminit phys_pud_init(pud_
 
 	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
 		unsigned long pmd_phys;
-		pud_t *pud = pud_page + pud_index(addr);
+		pud_t *pud = pud_page + i; // pud_index(addr);
 		pmd_t *pmd;
 
 		if (addr >= end)
 			break;
 
-		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
-			set_pud(pud, __pud(0)); 
+		if (!after_bootmem &&
+		    !is_memory_any_valid(addr, addr+PUD_SIZE)) {
+			set_pud(pud, __pud(0));
 			continue;
 		} 
 
@@ -310,7 +362,7 @@ static void __meminit phys_pud_init(pud_
 		pmd = alloc_low_page(&pmd_phys);
 		spin_lock(&init_mm.page_table_lock);
 		set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE));
-		phys_pmd_init(pmd, addr, end);
+		phys_pmd_init(pmd, addr, addr + PUD_SIZE);
 		spin_unlock(&init_mm.page_table_lock);
 		unmap_low_page(pmd);
 	}
@@ -319,12 +371,14 @@ static void __meminit phys_pud_init(pud_
 
 static void __init find_early_table_space(unsigned long end)
 {
-	unsigned long puds, pmds, tables, start;
+	unsigned long puds, pmds, ptes, tables, start;
 
 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
 	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+	ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) +
-		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+		 round_up(pmds * sizeof(pmd_t), PAGE_SIZE) +
+		 round_up(ptes * sizeof(pte_t), PAGE_SIZE);
 
  	/* RED-PEN putting page tables only on node 0 could
  	   cause a hotspot and fill up ZONE_DMA. The page tables
Index: linux-2.6.24-rc/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.24-rc.orig/include/asm-x86/e820_64.h
+++ linux-2.6.24-rc/include/asm-x86/e820_64.h
@@ -24,6 +24,10 @@ extern void e820_mark_nosave_regions(voi
 extern void e820_print_map(char *who);
 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_any_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_any_valid(unsigned long start, unsigned long end);
+extern int e820_all_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_all_valid(unsigned long start, unsigned long end);
 extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
 
 extern void e820_setup_gap(void);
@@ -36,6 +40,10 @@ extern struct e820map e820;
 
 extern unsigned ebda_addr, ebda_size;
 extern unsigned long nodemap_addr, nodemap_size;
+
+#define ISA_START_ADDRESS	0xa0000
+#define ISA_END_ADDRESS		0x100000
+
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/
Index: linux-2.6.24-rc/arch/x86/mm/pageattr_64.c
===================================================================
--- linux-2.6.24-rc.orig/arch/x86/mm/pageattr_64.c
+++ linux-2.6.24-rc/arch/x86/mm/pageattr_64.c
@@ -160,9 +160,6 @@ __change_page_attr(unsigned long address
 	} else
 		BUG();
 
-	/* on x86-64 the direct mapping set at boot is not using 4k pages */
- 	BUG_ON(PageReserved(kpte_page));
-
 	save_page(kpte_page);
 	if (page_private(kpte_page) == 0)
 		revert_page(address, ref_prot);
Index: linux-2.6.24-rc/arch/x86/mm/ioremap_64.c
===================================================================
--- linux-2.6.24-rc.orig/arch/x86/mm/ioremap_64.c
+++ linux-2.6.24-rc/arch/x86/mm/ioremap_64.c
@@ -28,9 +29,6 @@ unsigned long __phys_addr(unsigned long 
 }
 EXPORT_SYMBOL(__phys_addr);
 
-#define ISA_START_ADDRESS      0xa0000
-#define ISA_END_ADDRESS                0x100000
-
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.

-- 

  parent reply	other threads:[~2007-12-14  0:00 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-12-13 23:55 [RFC PATCH 00/12] PAT 64b: PAT support for X86_64 venkatesh.pallipadi
2007-12-13 23:55 ` [RFC PATCH 01/12] PAT 64b: Add cpu_shutdown() support venkatesh.pallipadi
2007-12-13 23:55 ` [RFC PATCH 02/12] PAT 64b: Basic PAT implementation venkatesh.pallipadi
2007-12-14  0:42   ` Andi Kleen
2007-12-14 18:31     ` Venki Pallipadi
2007-12-18  4:50       ` Eric W. Biederman
2007-12-14  3:48   ` Eric W. Biederman
2007-12-14  4:23     ` Eric W. Biederman
2007-12-14 21:10       ` Siddha, Suresh B
2007-12-14 23:34         ` Siddha, Suresh B
2007-12-15  7:55           ` Ingo Molnar
2007-12-14 10:25     ` Andi Kleen
2007-12-14 19:45       ` H. Peter Anvin
2007-12-18  4:42       ` Eric W. Biederman
2007-12-14 21:06     ` Siddha, Suresh B
2007-12-13 23:55 ` [RFC PATCH 03/12] PAT 64b: drm driver changes for PAT venkatesh.pallipadi
2007-12-13 23:55 ` [RFC PATCH 04/12] PAT 64b: reserve_mattr and free_mattr " venkatesh.pallipadi
2007-12-13 23:55 ` [RFC PATCH 05/12] PAT 64b: pci mmap conlfict patch venkatesh.pallipadi
2007-12-13 23:55 ` [RFC PATCH 06/12] PAT 64b: Add ioremap_wc support venkatesh.pallipadi
2007-12-14  4:17   ` Roland Dreier
2007-12-14  4:28     ` Eric W. Biederman
2007-12-14  4:32       ` Roland Dreier
2007-12-14  4:48         ` Eric W. Biederman
2007-12-14 21:40           ` Siddha, Suresh B
2007-12-14 23:19             ` Andi Kleen
2007-12-18  8:29             ` Eric W. Biederman
2007-12-13 23:55 ` [RFC PATCH 07/12] PAT 64b: dev mem chanegs for pat venkatesh.pallipadi
2007-12-13 23:55 ` [RFC PATCH 08/12] PAT 64b: coherent mmap and sysfs bin ioctl venkatesh.pallipadi
2007-12-14  0:19   ` Greg KH
2007-12-14  0:35     ` David Miller
2007-12-14  6:34       ` Greg KH
2007-12-16 21:57         ` Paul Mackerras
2007-12-17 12:41           ` Andi Kleen
2007-12-18  4:30             ` Eric W. Biederman
2007-12-18  4:51               ` H. Peter Anvin
2007-12-18  9:35               ` Andi Kleen
2007-12-18 13:48                 ` Eric W. Biederman
2007-12-14  0:43     ` Andi Kleen
2007-12-14  0:54   ` Jesse Barnes
2007-12-14  3:59   ` Eric W. Biederman
2007-12-14  6:02     ` Greg KH
2007-12-14  6:04       ` Eric W. Biederman
2007-12-14 10:19         ` Andi Kleen
2007-12-13 23:55 ` venkatesh.pallipadi [this message]
2007-12-13 23:55 ` [RFC PATCH 10/12] PAT 64b: Make acpi use early map instead of assuming identity map venkatesh.pallipadi
2007-12-13 23:55 ` [RFC PATCH 11/12] PAT 64b: devmem do not read pages not mapped in " venkatesh.pallipadi
2007-12-13 23:55 ` [RFC PATCH 12/12] PAT 64b: skip attr tracking for RAM venkatesh.pallipadi
2007-12-14  0:28 ` [RFC PATCH 00/12] PAT 64b: PAT support for X86_64 Dave Airlie
2007-12-14 22:00   ` Siddha, Suresh B
2007-12-14 22:27     ` Dave Airlie
2007-12-14 22:32       ` H. Peter Anvin
2007-12-14 22:37         ` Dave Airlie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20071213235713.061041000@intel.com \
    --to=venkatesh.pallipadi@intel.com \
    --cc=airlied@skynet.ie \
    --cc=ak@muc.de \
    --cc=akpm@linux-foundation.org \
    --cc=arjan@infradead.org \
    --cc=davej@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=gregkh@suse.de \
    --cc=hpa@zytor.com \
    --cc=jesse.barnes@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rdreier@cisco.com \
    --cc=suresh.b.siddha@intel.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.