From: Julien Grall <julien.grall@citrix.com>
To: Ian Campbell <ian.campbell@citrix.com>
Cc: stefano.stabellini@eu.citrix.com, tim@xen.org, xen-devel@lists.xen.org
Subject: Re: [PATCH 10/10] xen: arm: Use a direct mapping of RAM on arm64
Date: Tue, 25 Jun 2013 16:39:56 +0100 [thread overview]
Message-ID: <51C9B9CC.3090405@citrix.com> (raw)
In-Reply-To: <1371562017-5379-10-git-send-email-ian.campbell@citrix.com>
On 06/18/2013 02:26 PM, Ian Campbell wrote:
> We have plenty of virtual address space so we can avoid needing to map and
> unmap pages all the time.
>
> A totally arbitrarily chosen 32GB frame table leads to support for 5TB of RAM.
> I haven't tested with anything near that amount of RAM though. There is plenty
> of room to expand further when that becomes necessary.
>
> Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
> ---
> xen/arch/arm/mm.c | 155 ++++++++++++++++++++++++++++++++++--------
> xen/arch/arm/setup.c | 92 +++++++++++++++++++++++++
> xen/include/asm-arm/config.h | 96 +++++++++++++++++++-------
> xen/include/asm-arm/mm.h | 16 ++++
> 4 files changed, 307 insertions(+), 52 deletions(-)
>
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 5ad687f..35ef56d 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -37,6 +37,7 @@
> #include <public/memory.h>
> #include <xen/sched.h>
> #include <xen/vmap.h>
> +#include <asm/early_printk.h>
> #include <xsm/xsm.h>
> #include <xen/pfn.h>
>
> @@ -48,6 +49,14 @@ struct domain *dom_xen, *dom_io, *dom_cow;
> lpae_t boot_pgtable[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> #ifdef CONFIG_ARM_64
> lpae_t boot_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +/* The first page of the first level mapping of the xenheap. The
> + * subsequent xenheap first level pages are dynamically allocated, but
> + * we need this one to bootstrap ourselves. */
> +lpae_t xenheap_first_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> +/* The zeroeth level slot which uses xenheap_first_first. Used because
> + * setup_xenheap_mappings otherwise relies on mfn_to_virt which isn't
> + * valid for a non-xenheap mapping. */
> +static __initdata int xenheap_first_first_slot = -1;
> #endif
>
> /*
> @@ -57,6 +66,9 @@ lpae_t boot_first[LPAE_ENTRIES] __attribute__((__aligned__(4096)));
> * xen_second, xen_fixmap and xen_xenmap are shared between all PCPUs.
> */
>
> +#ifdef CONFIG_ARM_64
> +#define THIS_CPU_PGTABLE boot_pgtable
> +#else
> /* Per-CPU pagetable pages */
> /* xen_pgtable == root of the trie (zeroeth level on 64-bit, first on 32-bit) */
> static DEFINE_PER_CPU(lpae_t *, xen_pgtable);
> @@ -65,6 +77,7 @@ static DEFINE_PER_CPU(lpae_t *, xen_pgtable);
> * the second level pagetables which mapp the domheap region
> * DOMHEAP_VIRT_START...DOMHEAP_VIRT_END in 2MB chunks. */
> static DEFINE_PER_CPU(lpae_t *, xen_dommap);
> +#endif
>
> /* Common pagetable leaves */
> /* Second level page tables.
> @@ -73,10 +86,16 @@ static DEFINE_PER_CPU(lpae_t *, xen_dommap);
> * addresses from 0 to 0x7fffffff. Offsets into it are calculated
> * with second_linear_offset(), not second_table_offset().
> *
> - * Addresses 0x80000000 to 0xffffffff are covered by the per-cpu
> - * xen_domheap mappings described above. However we allocate 4 pages
> + * On 32bit addresses 0x80000000 to 0xffffffff are covered by the
> + * per-cpu xen_domheap mappings described above. We allocate 4 pages
> * here for use in the boot page tables and the second two pages
> * become the boot CPUs xen_dommap pages.
> + *
> + * On 64bit addresses 0x80000000 to 0xffffffff are unused. However we
> + * allocate 4 pages here for use while relocating Xen, which currently
> + * expects a second level page to exist for all addresses in the first
> + * 4GB. We need to keep these extra mappings in place for seconary CPU
> + * bring up too. For now we just leave them forever.
> */
> lpae_t xen_second[LPAE_ENTRIES*4] __attribute__((__aligned__(4096*4)));
> /* First level page table used for fixmap */
> @@ -92,7 +111,7 @@ uint64_t boot_ttbr;
> static paddr_t phys_offset;
>
> /* Limits of the Xen heap */
> -unsigned long xenheap_mfn_start __read_mostly;
> +unsigned long xenheap_mfn_start __read_mostly = ~0UL;
> unsigned long xenheap_mfn_end __read_mostly;
> unsigned long xenheap_virt_end __read_mostly;
>
> @@ -112,7 +131,9 @@ static inline void check_memory_layout_alignment_constraints(void) {
> BUILD_BUG_ON(BOOT_MISC_VIRT_START & ~SECOND_MASK);
> /* 1GB aligned regions */
> BUILD_BUG_ON(XENHEAP_VIRT_START & ~FIRST_MASK);
> +#ifdef CONFIG_DOMAIN_PAGE
> BUILD_BUG_ON(DOMHEAP_VIRT_START & ~FIRST_MASK);
> +#endif
> }
>
> void dump_pt_walk(lpae_t *first, paddr_t addr)
> @@ -180,6 +201,7 @@ void clear_fixmap(unsigned map)
> flush_xen_data_tlb_range_va(FIXMAP_ADDR(map), PAGE_SIZE);
> }
>
> +#ifdef CONFIG_DOMAIN_PAGE
> void *map_domain_page_global(unsigned long mfn)
> {
> return vmap(&mfn, 1);
> @@ -284,6 +306,7 @@ unsigned long domain_page_map_to_mfn(const void *va)
>
> return map[slot].pt.base + offset;
> }
> +#endif
>
> void __init arch_init_memory(void)
> {
> @@ -431,6 +454,7 @@ void __init setup_pagetables(unsigned long boot_phys_offset, paddr_t xen_paddr)
> /* Flush everything after setting WXN bit. */
> flush_xen_text_tlb();
>
> +#ifdef CONFIG_ARM_32
> per_cpu(xen_pgtable, 0) = boot_pgtable;
> per_cpu(xen_dommap, 0) = xen_second +
> second_linear_offset(DOMHEAP_VIRT_START);
> @@ -440,43 +464,31 @@ void __init setup_pagetables(unsigned long boot_phys_offset, paddr_t xen_paddr)
> memset(this_cpu(xen_dommap), 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
> flush_xen_dcache_va_range(this_cpu(xen_dommap),
> DOMHEAP_SECOND_PAGES*PAGE_SIZE);
> +#endif
> }
>
> int init_secondary_pagetables(int cpu)
> {
> - lpae_t *root, *first, *domheap, pte;
> - int i;
> -
> - root = alloc_xenheap_page();
> #ifdef CONFIG_ARM_64
> - first = alloc_xenheap_page();
> + /* All CPUs share a single page table on 64 bit */
> + return 0;
> #else
> - first = root; /* root == first level on 32-bit 3-level trie */
> -#endif
> + lpae_t *first, *domheap, pte;
> + int i;
> +
> + first = alloc_xenheap_page(); /* root == first level on 32-bit 3-level trie */
> domheap = alloc_xenheap_pages(get_order_from_pages(DOMHEAP_SECOND_PAGES), 0);
>
> - if ( root == NULL || domheap == NULL || first == NULL )
> + if ( domheap == NULL || first == NULL )
> {
> printk("Not enough free memory for secondary CPU%d pagetables\n", cpu);
> free_xenheap_pages(domheap, get_order_from_pages(DOMHEAP_SECOND_PAGES));
> -#ifdef CONFIG_ARM_64
> free_xenheap_page(first);
> -#endif
> - free_xenheap_page(root);
> return -ENOMEM;
> }
>
> /* Initialise root pagetable from root of boot tables */
> - memcpy(root, boot_pgtable, PAGE_SIZE);
> -
> -#ifdef CONFIG_ARM_64
> - /* Initialise first pagetable from first level of boot tables, and
> - * hook into the new root. */
> - memcpy(first, boot_first, PAGE_SIZE);
> - pte = mfn_to_xen_entry(virt_to_mfn(first));
> - pte.pt.table = 1;
> - write_pte(root, pte);
> -#endif
> + memcpy(first, boot_pgtable, PAGE_SIZE);
>
> /* Ensure the domheap has no stray mappings */
> memset(domheap, 0, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
> @@ -490,16 +502,14 @@ int init_secondary_pagetables(int cpu)
> write_pte(&first[first_table_offset(DOMHEAP_VIRT_START+i*FIRST_SIZE)], pte);
> }
>
> - flush_xen_dcache_va_range(root, PAGE_SIZE);
> -#ifdef CONFIG_ARM_64
> flush_xen_dcache_va_range(first, PAGE_SIZE);
> -#endif
> flush_xen_dcache_va_range(domheap, DOMHEAP_SECOND_PAGES*PAGE_SIZE);
>
> - per_cpu(xen_pgtable, cpu) = root;
> + per_cpu(xen_pgtable, cpu) = first;
> per_cpu(xen_dommap, cpu) = domheap;
>
> return 0;
> +#endif
> }
>
> /* MMU setup for secondary CPUS (which already have paging enabled) */
> @@ -544,6 +554,7 @@ static void __init create_32mb_mappings(lpae_t *second,
> flush_xen_data_tlb();
> }
>
> +#ifdef CONFIG_ARM_32
> /* Set up the xenheap: up to 1GB of contiguous, always-mapped memory. */
> void __init setup_xenheap_mappings(unsigned long base_mfn,
> unsigned long nr_mfns)
> @@ -556,19 +567,107 @@ void __init setup_xenheap_mappings(unsigned long base_mfn,
> xenheap_mfn_end = base_mfn + nr_mfns;
> }
>
> +#else /* CONFIG_ARM_64 */
> +
> +void __init setup_xenheap_mappings(unsigned long base_mfn,
> + unsigned long nr_mfns)
> +{
> + lpae_t *first, pte;
> + unsigned long offset, end_mfn;
> + vaddr_t vaddr;
> +
> + /* First call sets the xenheap physical offset. */
> + if ( xenheap_mfn_start == ~0UL )
> + xenheap_mfn_start = base_mfn;
> +
> + if ( base_mfn < xenheap_mfn_start )
> + early_panic("cannot add xenheap mapping at %lx below heap start %lx\n",
> + base_mfn, xenheap_mfn_start);
> +
> + end_mfn = base_mfn + nr_mfns;
> +
> + /* Align to previous 1GB boundary */
> + base_mfn &= ~FIRST_MASK;
> +
> + offset = base_mfn - xenheap_mfn_start;
> + vaddr = DIRECTMAP_VIRT_START + offset*PAGE_SIZE;
> +
> + while ( base_mfn < end_mfn )
> + {
> + int slot = zeroeth_table_offset(vaddr);
> + lpae_t *p = &boot_pgtable[slot];
> +
> + if ( p->pt.valid )
> + {
> + /* mfn_to_virt is not valid on the 1st 1st mfn, since it
> + * is not within the xenheap. */
> + first = slot == xenheap_first_first_slot ?
> + xenheap_first_first : mfn_to_virt(p->pt.base);
> + }
> + else if ( xenheap_first_first_slot == -1)
> + {
> + /* Use xenheap_first_first to bootstrap the mappings */
> + first = xenheap_first_first;
> +
> + pte = pte_of_xenaddr((vaddr_t)xenheap_first_first);
> + pte.pt.table = 1;
> + write_pte(p, pte);
> +
> + xenheap_first_first_slot = slot;
> + }
> + else
> + {
> + unsigned long first_mfn = alloc_boot_pages(1, 1);
> + pte = mfn_to_xen_entry(first_mfn);
> + pte.pt.table = 1;
> + write_pte(p, pte);
> + first = mfn_to_virt(first_mfn);
> + }
> +
> + pte = mfn_to_xen_entry(base_mfn);
> + //pte.pt.contig = /* XXX need to think about the logic */
> + write_pte(&first[first_table_offset(vaddr)], pte);
> +
> + base_mfn += FIRST_SIZE>>PAGE_SHIFT;
> + vaddr += FIRST_SIZE;
> + }
> +
> + flush_xen_data_tlb();
> +}
> +#endif
> +
> /* Map a frame table to cover physical addresses ps through pe */
> void __init setup_frametable_mappings(paddr_t ps, paddr_t pe)
> {
> unsigned long nr_pages = (pe - ps) >> PAGE_SHIFT;
> unsigned long frametable_size = nr_pages * sizeof(struct page_info);
> unsigned long base_mfn;
> +#ifdef CONFIG_ARM_64
> + lpae_t *second, pte;
> + unsigned long nr_second, second_base;
> + int i;
> +#endif
>
> frametable_base_mfn = ps >> PAGE_SHIFT;
>
> /* Round up to 32M boundary */
> frametable_size = (frametable_size + 0x1ffffff) & ~0x1ffffff;
> base_mfn = alloc_boot_pages(frametable_size >> PAGE_SHIFT, 32<<(20-12));
> +
> +#ifdef CONFIG_ARM_64
> + nr_second = frametable_size >> SECOND_SHIFT;
> + second_base = alloc_boot_pages(nr_second, 1);
> + second = mfn_to_virt(second_base);
> + for ( i = 0; i < nr_second; i++ )
> + {
> + pte = mfn_to_xen_entry(second_base + i);
> + pte.pt.table = 1;
> + write_pte(&boot_first[first_table_offset(FRAMETABLE_VIRT_START)+i], pte);
> + }
> + create_32mb_mappings(second, 0, base_mfn, frametable_size >> PAGE_SHIFT);
> +#else
> create_32mb_mappings(xen_second, FRAMETABLE_VIRT_START, base_mfn, frametable_size >> PAGE_SHIFT);
> +#endif
>
> memset(&frame_table[0], 0, nr_pages * sizeof(struct page_info));
> memset(&frame_table[nr_pages], -1,
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index da2a734..3efa063 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -279,6 +279,7 @@ static paddr_t __init get_xen_paddr(void)
> return paddr;
> }
>
> +#ifdef CONFIG_ARM_32
> static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size)
> {
> paddr_t ram_start;
> @@ -392,6 +393,97 @@ static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size)
>
> end_boot_allocator();
> }
> +#else /* CONFIG_ARM_64 */
> +static void __init setup_mm(unsigned long dtb_paddr, size_t dtb_size)
> +{
> + paddr_t ram_start = ~0;
> + paddr_t ram_end = 0;
> + int bank;
> + unsigned long xenheap_pages = 0;
> + unsigned long dtb_pages;
> +
> + /*
> + * TODO: only using the first RAM bank for now. The heaps and the
> + * frame table assume RAM is physically contiguous.
> + */
I think this comment can be removed.
> + total_pages = 0;
> + for ( bank = 0 ; bank < early_info.mem.nr_banks; bank++ )
> + {
> + paddr_t bank_start = early_info.mem.bank[bank].start;
> + paddr_t bank_size = early_info.mem.bank[bank].size;
> + paddr_t bank_end = bank_start + bank_size;
> + unsigned long bank_pages = bank_size >> PAGE_SHIFT;
> + paddr_t s, e;
> +
> + total_pages += bank_pages;
> +
> + if ( bank_start < ram_start )
> + ram_start = bank_start;
> + if ( bank_end > ram_end )
> + ram_end = bank_end;
> +
> + /*
> + * Locate the xenheap using these constraints:
> + *
> + * - must be 32 MiB aligned
> + * - must not include Xen itself or the boot modules
> + * - must be at most 1 GiB
> + * - must be at least 128M
> + *
> + * We try to allocate the largest xenheap possible within these
> + * constraints.
> + */
> + xenheap_pages += (bank_size >> PAGE_SHIFT);
> +
> + /* XXX we assume that the ram regions are ordered */
> + s = bank_start;
> + while ( s < bank_end )
> + {
> + paddr_t n = bank_end;
> +
> + e = next_module(s, &n);
> +
> + if ( e == ~(paddr_t)0 )
> + {
> + e = n = bank_end;
> + }
> +
> + setup_xenheap_mappings(s>>PAGE_SHIFT, (e-s)>>PAGE_SHIFT);
> +
> + xenheap_mfn_end = e;
> +
> + init_boot_pages(s, e);
> + s = n;
> + }
> + }
> +
> + xenheap_virt_end = XENHEAP_VIRT_START + ram_end - ram_start;
> + xenheap_mfn_start = ram_start >> PAGE_SHIFT;
> + xenheap_mfn_end = ram_end >> PAGE_SHIFT;
> + xenheap_max_mfn(xenheap_mfn_end);
> +
> + /*
> + * Need enough mapped pages for copying the DTB.
> + *
> + * TODO: The DTB (and other payloads) are assumed to be towards
> + * the start of RAM.
> + */
> + dtb_pages = (dtb_size + PAGE_SIZE-1) >> PAGE_SHIFT;
> +
> + /*
> + * Copy the DTB.
> + *
> + * TODO: handle other payloads too.
> + */
> + device_tree_flattened = mfn_to_virt(alloc_boot_pages(dtb_pages, 1));
> + copy_from_paddr(device_tree_flattened, dtb_paddr, dtb_size, BUFFERABLE);
> +
> + setup_frametable_mappings(ram_start, ram_end);
What happen if the RAM is not contiguous? Do we have hole in the frametable?
BTW, I was looking to the mfn_valid function and I think the function
can possibly consider device memory region as valid MFN. Is it the right
behaviour?
--
Julien
next prev parent reply other threads:[~2013-06-25 15:39 UTC|newest]
Thread overview: 33+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-06-18 13:26 [PATCH 00/10] xen: arm: direct 1:1 map on 64 bit Ian Campbell
2013-06-18 13:26 ` [PATCH 01/10] xen: arm: Use _AC macros in config.h Ian Campbell
2013-06-20 16:29 ` Stefano Stabellini
2013-06-18 13:26 ` [PATCH 02/10] xen: arm: Add zeroeth level page table macros and defines Ian Campbell
2013-06-18 13:26 ` [PATCH 03/10] xen: arm: Rename page table "hint" field to slightly more descriptive "contig" Ian Campbell
2013-06-20 16:29 ` Stefano Stabellini
2013-06-18 13:26 ` [PATCH 04/10] xen: arm: refactor create_mappings Ian Campbell
2013-06-20 16:40 ` Stefano Stabellini
2013-06-18 13:26 ` [PATCH 05/10] xen: arm: define a macro to get this CPUs page table root Ian Campbell
2013-06-20 16:30 ` Stefano Stabellini
2013-06-25 10:49 ` Ian Campbell
2013-06-18 13:26 ` [PATCH 06/10] xen: arm: Add a pte_of_xenaddr helper Ian Campbell
2013-06-20 16:32 ` Stefano Stabellini
2013-06-25 10:50 ` Ian Campbell
2013-06-26 18:21 ` Stefano Stabellini
2013-06-18 13:26 ` [PATCH 07/10] xen: arm: fix is_xen_fixed_mfn Ian Campbell
2013-06-20 16:33 ` Stefano Stabellini
2013-06-18 13:26 ` [PATCH 08/10] xen: arm: allow virt_to_maddr to take either a pointer or an integer Ian Campbell
2013-06-20 16:34 ` Stefano Stabellini
2013-06-18 13:26 ` [PATCH 09/10] xen: gate split heap code on its own config option rather than !X86 Ian Campbell
2013-06-18 16:01 ` Jan Beulich
2013-06-20 16:35 ` Stefano Stabellini
2013-06-18 13:26 ` [PATCH 10/10] xen: arm: Use a direct mapping of RAM on arm64 Ian Campbell
2013-06-18 14:05 ` Julien Grall
2013-06-18 14:13 ` Ian Campbell
2013-06-20 16:43 ` Stefano Stabellini
2013-07-22 22:23 ` Ian Campbell
2013-06-21 14:10 ` Stefano Stabellini
2013-06-25 11:04 ` Ian Campbell
2013-06-26 18:32 ` Stefano Stabellini
2013-06-27 8:41 ` Ian Campbell
2013-06-25 15:39 ` Julien Grall [this message]
2013-06-26 11:22 ` Ian Campbell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=51C9B9CC.3090405@citrix.com \
--to=julien.grall@citrix.com \
--cc=ian.campbell@citrix.com \
--cc=stefano.stabellini@eu.citrix.com \
--cc=tim@xen.org \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).