All of lore.kernel.org
 help / color / mirror / Atom feed
From: venkatesh.pallipadi@intel.com
To: ak@muc.de, ebiederm@xmission.com, rdreier@cisco.com,
	torvalds@linux-foundation.org, gregkh@suse.de, airlied@skynet.ie,
	davej@redhat.com, mingo@elte.hu, tglx@linutronix.de,
	hpa@zytor.com, akpm@linux-foundation.org, arjan@infradead.org,
	jesse.barnes@intel.com, davem@davemloft.net
Cc: linux-kernel@vger.kernel.org,
	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>,
	Suresh Siddha <suresh.b.siddha@intel.com>
Subject: [patch 02/11] PAT x86: Map only usable memory in x86_64 identity map and kernel text
Date: Thu, 10 Jan 2008 10:48:42 -0800	[thread overview]
Message-ID: <20080110184854.787474000@intel.com> (raw)
In-Reply-To: 20080110184840.927409000@intel.com

[-- Attachment #1: usable_only_map.patch --]
[-- Type: text/plain, Size: 12057 bytes --]

x86_64: Map only usable memory in identity map. All reserved memory maps to a
zero page. This is done later during the boot process, by pruning the
page table setup earlier to remove mappings for the reserved region. Prune
done after mem_init, so we can allocate pages as needed and before APs start.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>

Index: linux-2.6.git/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/kernel/e820_64.c	2008-01-08 03:41:30.000000000 -0800
+++ linux-2.6.git/arch/x86/kernel/e820_64.c	2008-01-08 04:00:59.000000000 -0800
@@ -121,6 +121,35 @@
 }
 EXPORT_SYMBOL_GPL(e820_any_mapped);
 
+int e820_any_non_reserved(unsigned long start, unsigned long end)
+{
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (ei->type == E820_RESERVED)
+			continue;
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(e820_any_non_reserved);
+
+int is_memory_any_valid(unsigned long start, unsigned long end)
+{
+	/*
+	 * Keep low PCI/ISA area always mapped.
+	 * Note: end address is exclusive and start is inclusive here
+	 */
+	if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS)
+		return 1;
+
+	/* Switch to efi or e820 in future here */
+	return e820_any_non_reserved(start, end);
+}
+EXPORT_SYMBOL_GPL(is_memory_any_valid);
+
 /*
  * This function checks if the entire range <start,end> is mapped with type.
  *
@@ -156,6 +185,47 @@
 	return 0;
 }
 
+int e820_all_non_reserved(unsigned long start, unsigned long end)
+{
+	int i;
+	for (i = 0; i < e820.nr_map; i++) {
+		struct e820entry *ei = &e820.map[i];
+		if (ei->type == E820_RESERVED)
+			continue;
+
+		/* is the region (part) in overlap with the current region ?*/
+		if (ei->addr >= end || ei->addr + ei->size <= start)
+			continue;
+
+		/*
+		 * if the region is at the beginning of <start,end> we move
+		 * start to the end of the region since it's ok until there
+		 */
+		if (ei->addr <= start)
+			start = ei->addr + ei->size;
+
+		/* if start is at or beyond end, we're done, full coverage */
+		if (start >= end)
+			return 1; /* we're done */
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(e820_all_non_reserved);
+
+int is_memory_all_valid(unsigned long start, unsigned long end)
+{
+	/*
+	 * Keep low PCI/ISA area always mapped.
+ 	 * Note: end address is exclusive and start is inclusive here
+	 */
+	if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS)
+		return 1;
+
+	/* Switch to efi or e820 in future here */
+	return e820_all_non_reserved(start, end);
+}
+EXPORT_SYMBOL_GPL(is_memory_all_valid);
+
 /*
  * Find a free area in a specific range.
  */
Index: linux-2.6.git/arch/x86/mm/init_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/init_64.c	2008-01-08 03:43:46.000000000 -0800
+++ linux-2.6.git/arch/x86/mm/init_64.c	2008-01-08 03:59:28.000000000 -0800
@@ -215,8 +215,9 @@
 	int i, pmds;
 
 	pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
-	vaddr = __START_KERNEL_map;
-	pmd = level2_kernel_pgt;
+	/* Skip PMDs meant for kernel text */
+	vaddr = __START_KERNEL_map + KERNEL_TEXT_SIZE;
+	pmd = level2_kernel_pgt + (KERNEL_TEXT_SIZE / PMD_SIZE);
 	last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
 	for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
 		for (i = 0; i < pmds; i++) {
@@ -299,11 +300,6 @@
 		if (addr >= end)
 			break;
 
-		if (!after_bootmem && !e820_any_mapped(addr,addr+PUD_SIZE,0)) {
-			set_pud(pud, __pud(0)); 
-			continue;
-		} 
-
 		if (pud_val(*pud)) {
 			phys_pmd_update(pud, addr, end);
 			continue;
@@ -344,6 +340,8 @@
 		(table_start << PAGE_SHIFT) + tables);
 }
 
+static unsigned long max_addr;
+
 /* Setup the direct mapping of the physical memory at PAGE_OFFSET.
    This runs before bootmem is initialized and gets pages directly from the 
    physical memory. To access them they are temporarily mapped. */
@@ -370,10 +368,13 @@
 		pgd_t *pgd = pgd_offset_k(start);
 		pud_t *pud;
 
-		if (after_bootmem)
+		if (after_bootmem) {
 			pud = pud_offset(pgd, start & PGDIR_MASK);
-		else
+		} else {
 			pud = alloc_low_page(&pud_phys);
+			if (end > max_addr)
+				max_addr = end;
+		}
 
 		next = start + PGDIR_SIZE;
 		if (next > end) 
@@ -489,6 +490,187 @@
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
 			 kcore_vsyscall;
 
+
+static unsigned long __init get_res_page(void)
+{
+	static unsigned long res_phys_page;
+	if (!res_phys_page) {
+		pte_t *pte;
+		pte = alloc_low_page(&res_phys_page);
+		unmap_low_page(pte);
+	}
+	return res_phys_page;
+}
+
+static unsigned long __init get_res_ptepage(void)
+{
+	static unsigned long res_phys_ptepage;
+	if (!res_phys_ptepage) {
+		pte_t *pte_page;
+		unsigned long page_phys;
+		unsigned long entry;
+		int i;
+
+		pte_page = alloc_low_page(&res_phys_ptepage);
+
+		page_phys = get_res_page();
+		entry = _PAGE_NX | _KERNPG_TABLE | _PAGE_GLOBAL | page_phys;
+		entry &= __supported_pte_mask;
+		for (i = 0; i < PTRS_PER_PTE; i++) {
+			pte_t *pte = pte_page + i;
+			set_pte(pte, __pte(entry));
+		}
+
+		unmap_low_page(pte_page);
+	}
+	return res_phys_ptepage;
+}
+
+static void __init phys_pte_prune(pte_t *pte_page, unsigned long address,
+		unsigned long end, unsigned long vaddr, unsigned int exec)
+{
+	int i = pte_index(vaddr);
+
+	for (; i < PTRS_PER_PTE; i++, address = (address & PAGE_MASK) + PAGE_SIZE, vaddr = (vaddr + PAGE_MASK) + PAGE_SIZE) {
+		unsigned long entry;
+		pte_t *pte = pte_page + i;
+
+		if (address >= end)
+			break;
+
+		if (pte_val(*pte))
+			continue;
+
+		/* Nothing to map. Map the null page */
+		if (!(address & (~PAGE_MASK)) &&
+		    (address + PAGE_SIZE <= end) &&
+		    !is_memory_any_valid(address, address + PAGE_SIZE)) {
+			unsigned long phys_page;
+
+			phys_page = get_res_page();
+			entry = _PAGE_NX | _KERNPG_TABLE | _PAGE_GLOBAL |
+				phys_page;
+
+			entry &= __supported_pte_mask;
+			set_pte(pte, __pte(entry));
+
+			continue;
+		}
+
+		if (exec)
+			entry = _PAGE_NX|_KERNPG_TABLE|_PAGE_GLOBAL|address;
+		else
+			entry = _KERNPG_TABLE|_PAGE_GLOBAL|address;
+		entry &= __supported_pte_mask;
+		set_pte(pte, __pte(entry));
+	}
+}
+
+static void __init phys_pmd_prune(pmd_t *pmd_page, unsigned long address,
+		unsigned long end, unsigned long vaddr, unsigned int exec)
+{
+	int i = pmd_index(vaddr);
+
+	for (; i < PTRS_PER_PMD; i++, address = (address & PMD_MASK) + PMD_SIZE,
+			vaddr = (vaddr & PMD_MASK) + PMD_SIZE) {
+		pmd_t *pmd = pmd_page + i;
+		pte_t *pte;
+		unsigned long pte_phys;
+
+		if (address >= end)
+			break;
+
+		if (!pmd_val(*pmd))
+			continue;
+
+		/* Nothing to map. Map the null page */
+		if (!(address & (~PMD_MASK)) &&
+		    (address + PMD_SIZE <= end) &&
+		    !is_memory_any_valid(address, address + PMD_SIZE)) {
+
+			pte_phys = get_res_ptepage();
+			set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
+
+			continue;
+		}
+
+		/* Map with 2M pages */
+		if (is_memory_all_valid(address, address + PUD_SIZE)) {
+			/* Init already done */
+			continue;
+		}
+
+		/* Map with 4k pages */
+		pte = alloc_low_page(&pte_phys);
+		phys_pte_prune(pte, address, address + PMD_SIZE, vaddr, exec);
+		set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
+		unmap_low_page(pte);
+
+	}
+}
+
+static void __init phys_pud_prune(pud_t *pud_page, unsigned long addr,
+	       unsigned long end, unsigned long vaddr, unsigned int exec)
+{
+	int i = pud_index(vaddr);
+
+	for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE,
+			vaddr = (vaddr & PUD_MASK) + PUD_SIZE) {
+		pud_t *pud = pud_page + i;
+
+		if (addr >= end)
+			break;
+
+		if (pud_val(*pud)) {
+			pmd_t *pmd = pmd_offset(pud,0);
+			phys_pmd_prune(pmd, addr, end, vaddr, exec);
+		}
+	}
+}
+
+void __init prune_reserved_region_maps(void)
+{
+	unsigned long start, end, next;
+
+	/* Prune physical memory identity map */
+	start = (unsigned long)__va(0);
+	end = max_addr;
+	for (; start < end; start = next) {
+		pgd_t *pgd = pgd_offset_k(start);
+		pud_t *pud;
+
+		pud = pud_offset(pgd, start & PGDIR_MASK);
+
+		next = start + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+
+		phys_pud_prune(pud, __pa(start), __pa(next), start, 0);
+	}
+
+	/* Prune kernel text region */
+	start = (unsigned long)KERNEL_TEXT_START;
+	end = start + (unsigned long)KERNEL_TEXT_SIZE;
+	for (; start < end; start = next) {
+		pgd_t *pgd = pgd_offset_k(start);
+		pud_t *pud;
+
+		pud = pud_offset(pgd, start & PGDIR_MASK);
+
+		next = (start & PGDIR_MASK) + (unsigned long)PGDIR_SIZE;
+		if (!next || next > end)
+			next = end;
+
+		phys_pud_prune(pud,
+		               start - (unsigned long)KERNEL_TEXT_START,
+		               next - (unsigned long)KERNEL_TEXT_START,
+			       start,
+			       1);
+	}
+
+	__flush_tlb();
+}
+
 void __init mem_init(void)
 {
 	long codesize, reservedpages, datasize, initsize;
@@ -538,6 +720,8 @@
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10);
+
+	prune_reserved_region_maps();
 }
 
 void free_init_pages(char *what, unsigned long begin, unsigned long end)
Index: linux-2.6.git/arch/x86/mm/ioremap_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/ioremap_64.c	2008-01-08 03:41:30.000000000 -0800
+++ linux-2.6.git/arch/x86/mm/ioremap_64.c	2008-01-08 03:59:28.000000000 -0800
@@ -19,6 +19,7 @@
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <asm/proto.h>
+#include <asm/e820.h>
 
 unsigned long __phys_addr(unsigned long x)
 {
@@ -28,9 +29,6 @@
 }
 EXPORT_SYMBOL(__phys_addr);
 
-#define ISA_START_ADDRESS      0xa0000
-#define ISA_END_ADDRESS                0x100000
-
 /*
  * Fix up the linear direct mapping of the kernel to avoid cache attribute
  * conflicts.
Index: linux-2.6.git/arch/x86/mm/pageattr_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/pageattr_64.c	2008-01-08 03:41:30.000000000 -0800
+++ linux-2.6.git/arch/x86/mm/pageattr_64.c	2008-01-08 04:03:33.000000000 -0800
@@ -53,9 +53,11 @@
 	/*
 	 * page_private is used to track the number of entries in
 	 * the page table page have non standard attributes.
+	 * Count of 1 indicates page split by split_large_page(),
+	 * additional count indicates the number of pages with non-std attr.
 	 */
 	SetPagePrivate(base);
-	page_private(base) = 0;
+	page_private(base) = 1;
 
 	address = __pa(address);
 	addr = address & LARGE_PAGE_MASK;
@@ -176,11 +178,8 @@
 			BUG();
 	}
 
-	/* on x86-64 the direct mapping set at boot is not using 4k pages */
-	BUG_ON(PageReserved(kpte_page));
-
 	save_page(kpte_page);
-	if (page_private(kpte_page) == 0)
+	if (page_private(kpte_page) == 1)
 		revert_page(address, ref_prot);
 	return 0;
 }
Index: linux-2.6.git/include/asm-x86/e820_64.h
===================================================================
--- linux-2.6.git.orig/include/asm-x86/e820_64.h	2008-01-08 03:41:30.000000000 -0800
+++ linux-2.6.git/include/asm-x86/e820_64.h	2008-01-08 03:59:28.000000000 -0800
@@ -26,6 +26,10 @@
 extern void e820_mark_nosave_regions(void);
 extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type);
 extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type);
+extern int e820_any_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_any_valid(unsigned long start, unsigned long end);
+extern int e820_all_non_reserved(unsigned long start, unsigned long end);
+extern int is_memory_all_valid(unsigned long start, unsigned long end);
 extern unsigned long e820_hole_size(unsigned long start, unsigned long end);
 
 extern void e820_setup_gap(void);
@@ -38,6 +42,10 @@
 
 extern unsigned ebda_addr, ebda_size;
 extern unsigned long nodemap_addr, nodemap_size;
+
+#define ISA_START_ADDRESS	0xa0000
+#define ISA_END_ADDRESS		0x100000
+
 #endif/*!__ASSEMBLY__*/
 
 #endif/*__E820_HEADER*/

-- 

  parent reply	other threads:[~2008-01-10 18:50 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-01-10 18:48 [patch 00/11] PAT x86: PAT support for x86 venkatesh.pallipadi
2008-01-10 18:48 ` [patch 01/11] PAT x86: Make acpi/other drivers map memory instead of assuming identity map venkatesh.pallipadi
2008-01-10 18:48 ` venkatesh.pallipadi [this message]
2008-01-10 19:06   ` [patch 02/11] PAT x86: Map only usable memory in x86_64 identity map and kernel text Andi Kleen
2008-01-10 19:17     ` Pallipadi, Venkatesh
2008-01-10 19:28       ` Andi Kleen
2008-01-10 20:50         ` Pallipadi, Venkatesh
2008-01-10 21:16           ` Andi Kleen
2008-01-10 22:25             ` Pallipadi, Venkatesh
2008-01-10 22:35               ` Andi Kleen
2008-01-14 16:43           ` Ingo Molnar
2008-01-14 21:21             ` Siddha, Suresh B
2008-01-14 21:28               ` Andi Kleen
2008-01-15 22:17               ` Ingo Molnar
2008-01-15 23:11                 ` Andi Kleen
2008-01-15 23:21                 ` Siddha, Suresh B
2008-01-18 12:01                   ` Ingo Molnar
2008-01-18 13:12                     ` Andi Kleen
2008-01-18 16:46                       ` Jesse Barnes
2008-01-18 18:12                         ` Andi Kleen
2008-01-18 19:02                           ` Jesse Barnes
2008-01-19  2:42                             ` Andi Kleen
2008-01-10 21:05   ` Linus Torvalds
2008-01-10 21:57     ` Pallipadi, Venkatesh
2008-01-10 22:15       ` Linus Torvalds
2008-01-10 22:27         ` Pallipadi, Venkatesh
2008-01-10 22:50         ` Valdis.Kletnieks
2008-01-18 18:27           ` Dave Jones
2008-01-18 20:54             ` Ingo Molnar
2008-01-10 18:48 ` [patch 03/11] PAT x86: Map only usable memory in i386 identity map venkatesh.pallipadi
2008-01-10 19:10   ` Andi Kleen
2008-01-10 18:48 ` [patch 04/11] PAT x86: Basic PAT implementation venkatesh.pallipadi
2008-01-10 18:48 ` [patch 05/11] PAT x86: drm driver changes for PAT venkatesh.pallipadi
2008-01-10 18:48 ` [patch 06/11] PAT x86: Refactoring i386 cpa venkatesh.pallipadi
2008-01-10 19:00   ` Andi Kleen
2008-01-14 16:47     ` Ingo Molnar
2008-01-10 18:48 ` [patch 07/11] PAT x86: pat-conflict resolution using linear list venkatesh.pallipadi
2008-01-10 19:13   ` Andi Kleen
2008-01-10 20:08     ` Pallipadi, Venkatesh
2008-01-10 18:48 ` [patch 08/11] PAT x86: pci mmap conlfict patch venkatesh.pallipadi
2008-01-10 18:48 ` [patch 09/11] PAT x86: Add ioremap_wc support venkatesh.pallipadi
2008-01-10 19:08   ` Andi Kleen
2008-01-10 19:25     ` Pallipadi, Venkatesh
2008-01-12  0:18       ` Roland Dreier
2008-01-10 18:48 ` [patch 10/11] PAT x86: Handle /dev/mem mappings venkatesh.pallipadi
2008-01-10 18:48 ` [patch 11/11] PAT x86: Expose uc and wc interfaces in /sysfs vor pci_mmap_resource venkatesh.pallipadi
2008-01-10 19:43   ` Greg KH
2008-01-10 20:54     ` [patch 11/11] PAT x86: Expose uc and wc interfaces in /sysfsvor pci_mmap_resource Pallipadi, Venkatesh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080110184854.787474000@intel.com \
    --to=venkatesh.pallipadi@intel.com \
    --cc=airlied@skynet.ie \
    --cc=ak@muc.de \
    --cc=akpm@linux-foundation.org \
    --cc=arjan@infradead.org \
    --cc=davej@redhat.com \
    --cc=davem@davemloft.net \
    --cc=ebiederm@xmission.com \
    --cc=gregkh@suse.de \
    --cc=hpa@zytor.com \
    --cc=jesse.barnes@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=rdreier@cisco.com \
    --cc=suresh.b.siddha@intel.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.