public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Jeremy Fitzhardinge <jeremy@goop.org>
To: Ingo Molnar <mingo@elte.hu>
Cc: LKML <linux-kernel@vger.kernel.org>,
	x86@kernel.org, xen-devel <xen-devel@lists.xensource.com>,
	Stephen Tweedie <sct@redhat.com>,
	Eduardo Habkost <ehabkost@redhat.com>,
	Mark McLoughlin <markmc@redhat.com>,
	x86@kernel.org
Subject: [PATCH 23 of 36] x86_64: adjust mapping of physical pagetables to work with Xen
Date: Wed, 25 Jun 2008 00:19:19 -0400	[thread overview]
Message-ID: <aa722b5444fdde7ffa52.1214367559@localhost> (raw)
In-Reply-To: <patchbomb.1214367536@localhost>

This makes a few of changes to the construction of the initial
pagetables to work better with paravirt_ops/Xen.  The main areas
are:

 1. Support non-PSE mapping of memory, since Xen doesn't currently
    allow 2M pages to be mapped in guests.

 2. Make sure that the ioremap alias of all pages are dropped before
    attaching the new page to the pagetable.  This avoids having
    writable aliases of pagetable pages.

 3. Preserve existing pagetable entries, rather than overwriting.  Its
    possible that a fair amount of pagetable has already been constructed,
    so reuse what's already in place rather than ignoring and overwriting it.

The algorithm relies on the invariant that any page which is part of
the kernel pagetable is itself mapped in the linear memory area.  This
way, it can avoid using ioremap on a pagetable page.

The invariant holds because it maps memory from low to high addresses,
and also allocates memory from low to high.  Each allocated page can
map at least 2M of address space, so the mapped area will always
progress much faster than the allocated area.  It relies on the early
boot code mapping enough pages to get started.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
---
 arch/x86/mm/init_64.c |   94 ++++++++++++++++++++++++++++++++++++++++++-------
 arch/x86/mm/ioremap.c |    2 -
 2 files changed, 83 insertions(+), 13 deletions(-)

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -257,6 +257,43 @@
 	early_iounmap(adr, PAGE_SIZE);
 }
 
+static void __meminit
+phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
+{
+	unsigned pages = 0;
+	int i;
+	pte_t *pte = pte_page + pte_index(addr);
+
+	for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
+
+		if (addr >= end) {
+			if (!after_bootmem) {
+				for(; i < PTRS_PER_PTE; i++, pte++)
+					set_pte(pte, __pte(0));
+			}
+			break;
+		}
+
+		if (pte_val(*pte))
+			continue;
+
+		if (0)
+			printk("   pte=%p addr=%lx pte=%016lx\n",
+			       pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte);
+		set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL));
+		pages++;
+	}
+	update_page_count(PG_LEVEL_4K, pages);
+}
+
+static void __meminit
+phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
+{
+	pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
+
+	phys_pte_init(pte, address, end);
+}
+
 static unsigned long __meminit
 phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
 {
@@ -265,7 +302,9 @@
 	int i = pmd_index(address);
 
 	for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
+		unsigned long pte_phys;
 		pmd_t *pmd = pmd_page + pmd_index(address);
+		pte_t *pte;
 
 		if (address >= end) {
 			if (!after_bootmem) {
@@ -275,12 +314,23 @@
 			break;
 		}
 
-		if (pmd_val(*pmd))
+		if (pmd_val(*pmd)) {
+			phys_pte_update(pmd, address, end);
 			continue;
+		}
 
-		pages++;
-		set_pte((pte_t *)pmd,
-			pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+		if (cpu_has_pse) {
+			pages++;
+			set_pte((pte_t *)pmd,
+				pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
+			continue;
+		}
+
+		pte = alloc_low_page(&pte_phys);
+		phys_pte_init(pte, address, end);
+		unmap_low_page(pte);
+
+		pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
 	}
 	update_page_count(PG_LEVEL_2M, pages);
 	return address;
@@ -337,11 +387,11 @@
 		pmd = alloc_low_page(&pmd_phys);
 
 		spin_lock(&init_mm.page_table_lock);
+		last_map_addr = phys_pmd_init(pmd, addr, end);
+		unmap_low_page(pmd);
 		pud_populate(&init_mm, pud, __va(pmd_phys));
-		last_map_addr = phys_pmd_init(pmd, addr, end);
 		spin_unlock(&init_mm.page_table_lock);
 
-		unmap_low_page(pmd);
 	}
 	__flush_tlb_all();
 	update_page_count(PG_LEVEL_1G, pages);
@@ -349,15 +399,29 @@
 	return last_map_addr >> PAGE_SHIFT;
 }
 
+static unsigned long __meminit
+phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+	pud_t *pud;
+
+	pud = (pud_t *)pgd_page_vaddr(*pgd);
+
+	return phys_pud_init(pud, addr, end);
+}
+
 static void __init find_early_table_space(unsigned long end)
 {
-	unsigned long puds, pmds, tables, start;
+	unsigned long puds, tables, start;
 
 	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
 	tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
 	if (!direct_gbpages) {
-		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+		unsigned long pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
 		tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
+	}
+	if (!cpu_has_pse) {
+		unsigned long ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+		tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
 	}
 
 	/*
@@ -529,19 +593,25 @@
 		unsigned long pud_phys;
 		pud_t *pud;
 
+		next = start + PGDIR_SIZE;
+		if (next > end)
+			next = end;
+
+		if (pgd_val(*pgd)) {
+			last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end));
+			continue;
+		}
+
 		if (after_bootmem)
 			pud = pud_offset(pgd, start & PGDIR_MASK);
 		else
 			pud = alloc_low_page(&pud_phys);
 
-		next = start + PGDIR_SIZE;
-		if (next > end)
-			next = end;
 		last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
+		unmap_low_page(pud);
 		if (!after_bootmem)
 			pgd_populate(&init_mm, pgd_offset_k(start),
 				     __va(pud_phys));
-		unmap_low_page(pud);
 	}
 
 	if (!after_bootmem)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -513,7 +513,7 @@
 	if (pgprot_val(flags))
 		set_pte(pte, pfn_pte(phys >> PAGE_SHIFT, flags));
 	else
-		pte_clear(NULL, addr, pte);
+		pte_clear(&init_mm, addr, pte);
 	__flush_tlb_one(addr);
 }
 



  parent reply	other threads:[~2008-06-25  4:40 UTC|newest]

Thread overview: 96+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-06-25  4:18 [PATCH 00 of 36] x86/paravirt: groundwork for 64-bit Xen support Jeremy Fitzhardinge
2008-06-25  4:18 ` [PATCH 01 of 36] x86: asm-x86/pgtable.h: fix compiler warning Jeremy Fitzhardinge
2008-06-25  4:18 ` [PATCH 02 of 36] x86: add memory clobber to save/loadsegment Jeremy Fitzhardinge
2008-06-25  4:18 ` [PATCH 03 of 36] x86: add memory barriers to wrmsr Jeremy Fitzhardinge
2008-06-25  4:44   ` Arjan van de Ven
2008-06-25 21:08     ` Jeremy Fitzhardinge
2008-06-25 22:31       ` Arjan van de Ven
2008-06-25 23:05         ` Jeremy Fitzhardinge
2008-06-25 23:18         ` H. Peter Anvin
2008-06-25 23:37           ` Jeremy Fitzhardinge
2008-06-25 23:42             ` H. Peter Anvin
2008-06-25  4:19 ` [PATCH 04 of 36] x86: remove open-coded save/load segment operations Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 05 of 36] x86_64: use write_gdt_entry in vsyscall_set_cpu Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 06 of 36] x86_64: use p??_populate() to attach pages to pagetable Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 07 of 36] x86_64: unify early_ioremap Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 08 of 36] x86_64: Add gate_offset() and gate_segment() macros Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 09 of 36] x86_64: Use __pgd() on mk_kernel_pgd() Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 10 of 36] x86: unify pgd_index Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 11 of 36] x86: unify mmu_context.h Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 12 of 36] x86_64: replace end_pfn with num_physpages Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 13 of 36] x86_64: add prototype for x86_64_start_kernel() Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 14 of 36] x86_64: add sync_cmpxchg Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 15 of 36] x86: simplify vmalloc_sync_all Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 16 of 36] x86/paravirt: add a pgd_alloc/free hooks Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 17 of 36] x86: preallocate and prepopulate separately Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 18 of 36] x86/paravirt: add debugging for missing operations Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 19 of 36] paravirt_ops: define PARA_INDIRECT for indirect asm calls Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 20 of 36] paravirt/x86_64: move __PAGE_OFFSET to leave a space for hypervisor Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 21 of 36] x86-64: add FIX_PARAVIRT_BOOTMAP fixmap slot Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 22 of 36] x86_64: split x86_64_start_kernel Jeremy Fitzhardinge
2008-06-25  4:19 ` Jeremy Fitzhardinge [this message]
2008-06-25  4:19 ` [PATCH 24 of 36] x86_64: create small vmemmap mappings if PSE not available Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 25 of 36] x86_64: PSE no longer a hard requirement Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 26 of 36] x86_64: Split set_pte_vaddr() Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 27 of 36] x86_64: __switch_to(): Move arch_leave_lazy_cpu_mode() to the right place Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 28 of 36] Save %fs and %gs before load_TLS() and arch_leave_lazy_cpu_mode() Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 29 of 36] Use __KERNEL_DS as SS when returning to a kernel thread (VERIFY) Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 30 of 36] x86/paravirt_ops: split sysret and sysexit Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 31 of 36] x86_64 pvops: don't restore user rsp within sysret Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 32 of 36] Add sysret/sysexit pvops for returning to 32-bit compatibility userspace Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 33 of 36] x86_64: ia32entry: replace privileged instructions with pvops Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 34 of 36] x86_64: swapgs pvop with a user-stack can never be called Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 35 of 36] x86_64/paravirt: add adjust_exception_frame Jeremy Fitzhardinge
2008-06-25  4:19 ` [PATCH 36 of 36] x86_64/paravirt: Make load_gs_index() a paravirt operation Jeremy Fitzhardinge
2008-06-25  8:47   ` Ingo Molnar
2008-06-25 11:48     ` Jeremy Fitzhardinge
2008-06-25  8:42 ` [PATCH 00 of 36] x86/paravirt: groundwork for 64-bit Xen support Ingo Molnar
2008-06-25 11:46   ` Jeremy Fitzhardinge
2008-06-25 15:22   ` Ingo Molnar
2008-06-25 20:12     ` Jeremy Fitzhardinge
2008-06-26 10:57       ` Ingo Molnar
2008-06-26 10:58         ` Ingo Molnar
2008-06-26 14:34           ` [Xen-devel] " Jeremy Fitzhardinge
2008-06-27 15:56             ` Ingo Molnar
2008-06-27 16:02               ` Jeremy Fitzhardinge
2008-06-27 16:06                 ` Ingo Molnar
2008-06-27 16:25                   ` Jeremy Fitzhardinge
2008-06-27 16:03             ` Ingo Molnar
2008-06-27 19:04               ` Jeremy Fitzhardinge
2008-06-29  8:43                 ` Ingo Molnar
2008-06-30  3:02                   ` Jeremy Fitzhardinge
2008-06-30  4:35                     ` Yinghai Lu
2008-06-30  5:32                       ` Jeremy Fitzhardinge
2008-06-30  8:21                     ` Ingo Molnar
2008-06-30  9:22                       ` Ingo Molnar
2008-06-30 17:17                         ` Jeremy Fitzhardinge
2008-06-30 18:12                           ` Ingo Molnar
2008-06-30 18:36                             ` Jeremy Fitzhardinge
2008-06-30 18:44                               ` Ingo Molnar
2008-06-30 17:57                         ` Jeremy Fitzhardinge
2008-06-30 18:03                           ` Ingo Molnar
2008-06-30 23:04                         ` Jeremy Fitzhardinge
2008-07-01  8:52                           ` Ingo Molnar
2008-07-01  9:21                             ` Ingo Molnar
2008-07-01 16:10                               ` Jeremy Fitzhardinge
2008-07-01 16:14                               ` Jeremy Fitzhardinge
2008-07-01 20:31                                 ` Ingo Molnar
2008-07-03  9:10                                   ` Ingo Molnar
2008-07-03 15:47                                     ` Jeremy Fitzhardinge
2008-07-03 18:20                                     ` Yinghai Lu
2008-07-03 18:25                                       ` Jeremy Fitzhardinge
2008-07-03 18:30                                         ` Yinghai Lu
2008-07-03 18:41                                           ` Jeremy Fitzhardinge
2008-07-03 18:51                                             ` Yinghai Lu
2008-07-03 19:19                                               ` Yinghai Lu
2008-07-03 19:29                                                 ` Yinghai Lu
2008-07-09  7:42                                                   ` Ingo Molnar
2008-06-26 14:28         ` Jeremy Fitzhardinge
2008-06-26 18:25         ` Jeremy Fitzhardinge
2008-06-26 19:02         ` Jeremy Fitzhardinge
2008-06-25 12:40 ` Andi Kleen
2008-06-25 18:45   ` [Xen-devel] " Keir Fraser
2008-06-25 19:13     ` Andi Kleen
2008-06-25 19:22       ` Keir Fraser
2008-06-25 20:14         ` Andi Kleen
2008-06-25 20:03   ` Jeremy Fitzhardinge

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aa722b5444fdde7ffa52.1214367559@localhost \
    --to=jeremy@goop.org \
    --cc=ehabkost@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=markmc@redhat.com \
    --cc=mingo@elte.hu \
    --cc=sct@redhat.com \
    --cc=x86@kernel.org \
    --cc=xen-devel@lists.xensource.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox