From: Baoquan He <bhe@redhat.com>
To: Thomas Garnier <thgarnie@google.com>
Cc: "Herbert Xu" <herbert@gondor.apana.org.au>,
"David S . Miller" <davem@davemloft.net>,
"Thomas Gleixner" <tglx@linutronix.de>,
"Ingo Molnar" <mingo@redhat.com>,
"H . Peter Anvin" <hpa@zytor.com>,
"Peter Zijlstra" <peterz@infradead.org>,
"Josh Poimboeuf" <jpoimboe@redhat.com>,
"Arnd Bergmann" <arnd@arndb.de>,
"Matthias Kaehlcke" <mka@chromium.org>,
"Boris Ostrovsky" <boris.ostrovsky@oracle.com>,
"Juergen Gross" <jgross@suse.com>,
"Paolo Bonzini" <pbonzini@redhat.com>,
"Radim Krčmář" <rkrcmar@redhat.com>,
"Joerg Roedel" <joro@8bytes.org>,
"Andy Lutomirski" <luto@kernel.org>,
"Borislav Petkov" <bp@alien8.de>,
"Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>,
"Brian Gerst" <brgerst@gmail.com>, "Borislav Petkov" <bp@suse.de>,
"Christian Borntraeger" <borntraeger@de.ibm.com>,
"Rafael J . Wysocki" <rjw@rjwysocki.net>,
"Len Brown" <len.brown@intel.com>, "Pavel Machek" <pavel@ucw.cz>
Subject: Re: [RFC 22/22] x86/kaslr: Add option to extend KASLR range from 1GB to 3GB
Date: Wed, 19 Jul 2017 21:49:32 +0800 [thread overview]
Message-ID: <20170719134932.GF2344@x1> (raw)
In-Reply-To: <20170719121021.GE2344@x1>
On 07/19/17 at 08:10pm, Baoquan He wrote:
> On 07/18/17 at 03:33pm, Thomas Garnier wrote:
>
> > quiet_cmd_relocs = RELOCS $@
> > cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
> > $(obj)/vmlinux.relocs: vmlinux FORCE
> > diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
> > index a0838ab929f2..0a0c80ab1842 100644
> > --- a/arch/x86/boot/compressed/misc.c
> > +++ b/arch/x86/boot/compressed/misc.c
> > @@ -170,10 +170,18 @@ void __puthex(unsigned long value)
> > }
> >
> > #if CONFIG_X86_NEED_RELOCS
> > +
> > +/* Large randomization go lower than -2G and use large relocation table */
> > +#ifdef CONFIG_RANDOMIZE_BASE_LARGE
> > +typedef long rel_t;
> > +#else
> > +typedef int rel_t;
> > +#endif
> > +
> > static void handle_relocations(void *output, unsigned long output_len,
> > unsigned long virt_addr)
> > {
> > - int *reloc;
> > + rel_t *reloc;
> > unsigned long delta, map, ptr;
> > unsigned long min_addr = (unsigned long)output;
> > unsigned long max_addr = min_addr + (VO___bss_start - VO__text);
> > diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
> > index 3f5f08b010d0..6b65f846dd64 100644
> > --- a/arch/x86/include/asm/page_64_types.h
> > +++ b/arch/x86/include/asm/page_64_types.h
> > @@ -48,7 +48,11 @@
> > #define __PAGE_OFFSET __PAGE_OFFSET_BASE
> > #endif /* CONFIG_RANDOMIZE_MEMORY */
> >
> > +#ifdef CONFIG_RANDOMIZE_BASE_LARGE
> > +#define __START_KERNEL_map _AC(0xffffffff00000000, UL)
> > +#else
> > #define __START_KERNEL_map _AC(0xffffffff80000000, UL)
> > +#endif /* CONFIG_RANDOMIZE_BASE_LARGE */
> >
> > /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
> > #ifdef CONFIG_X86_5LEVEL
> > @@ -65,9 +69,14 @@
> > * 512MiB by default, leaving 1.5GiB for modules once the page tables
> > * are fully set up. If kernel ASLR is configured, it can extend the
> > * kernel page table mapping, reducing the size of the modules area.
> > + * On PIE, we relocate the binary 2G lower so add this extra space.
> > */
> > #if defined(CONFIG_RANDOMIZE_BASE)
> > +#ifdef CONFIG_RANDOMIZE_BASE_LARGE
> > +#define KERNEL_IMAGE_SIZE (_AC(3, UL) * 1024 * 1024 * 1024)
> > +#else
> > #define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024)
> > +#endif
> > #else
> > #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
> > #endif
> > diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
> > index 4103e90ff128..235c3f7b46c7 100644
> > --- a/arch/x86/kernel/head64.c
> > +++ b/arch/x86/kernel/head64.c
> > @@ -39,6 +39,7 @@ static unsigned int __initdata next_early_pgt;
> > pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
> >
> > #define __head __section(.head.text)
> > +#define pud_count(x) (((x + (PUD_SIZE - 1)) & ~(PUD_SIZE - 1)) >> PUD_SHIFT)
> >
> > static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
> > {
> > @@ -54,6 +55,8 @@ unsigned long _text_offset = (unsigned long)(_text - __START_KERNEL_map);
> > void __head notrace __startup_64(unsigned long physaddr)
> > {
> > unsigned long load_delta, *p;
> > + unsigned long level3_kernel_start, level3_kernel_count;
> > + unsigned long level3_fixmap_start;
> > pgdval_t *pgd;
> > p4dval_t *p4d;
> > pudval_t *pud;
> > @@ -74,6 +77,11 @@ void __head notrace __startup_64(unsigned long physaddr)
> > if (load_delta & ~PMD_PAGE_MASK)
> > for (;;);
> >
> > + /* Look at the randomization spread to adapt page table used */
> > + level3_kernel_start = pud_index(__START_KERNEL_map);
> > + level3_kernel_count = pud_count(KERNEL_IMAGE_SIZE);
> > + level3_fixmap_start = level3_kernel_start + level3_kernel_count;
> > +
> > /* Fixup the physical addresses in the page table */
> >
> > pgd = fixup_pointer(&early_top_pgt, physaddr);
> > @@ -85,8 +93,9 @@ void __head notrace __startup_64(unsigned long physaddr)
> > }
> >
> > pud = fixup_pointer(&level3_kernel_pgt, physaddr);
> > - pud[510] += load_delta;
> > - pud[511] += load_delta;
> > + for (i = 0; i < level3_kernel_count; i++)
> > + pud[level3_kernel_start + i] += load_delta;
> > + pud[level3_fixmap_start] += load_delta;
> >
> > pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
> > pmd[506] += load_delta;
> > @@ -137,7 +146,7 @@ void __head notrace __startup_64(unsigned long physaddr)
> > */
> >
> > pmd = fixup_pointer(level2_kernel_pgt, physaddr);
> > - for (i = 0; i < PTRS_PER_PMD; i++) {
> > + for (i = 0; i < PTRS_PER_PMD * level3_kernel_count; i++) {
> > if (pmd[i] & _PAGE_PRESENT)
> > pmd[i] += load_delta;
>
> Wow, this is dangerous. Three pud entries of level3_kernel_pgt all point
> to level2_kernel_pgt, it's out of bound of level2_kernel_pgt and
> overwrite the next data.
>
> And if only use one page for level2_kernel_pgt, and kernel is randomized
> to cross the pud entry of -4G to -1G, it won't work well.
Sorry, I was wrong, the size of level2_kernel_pgt is decided by
KERNEL_IMAGE_SIZE. So it's not a problem, please ignore this comment.
>
> > }
> > @@ -268,7 +277,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
> > */
> > BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map);
> > BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE);
> > - BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
> > + BUILD_BUG_ON(!IS_ENABLED(CONFIG_RANDOMIZE_BASE_LARGE) &&
> > + MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
> > BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
> > BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
> > BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
> > diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
> > index 4d0a7e68bfe8..e8b2d6706eca 100644
> > --- a/arch/x86/kernel/head_64.S
> > +++ b/arch/x86/kernel/head_64.S
> > @@ -39,11 +39,15 @@
> >
> > #define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
> > #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
> > +#define pud_count(x) (((x + (PUD_SIZE - 1)) & ~(PUD_SIZE - 1)) >> PUD_SHIFT)
> >
> > PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
> > PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
> > L3_START_KERNEL = pud_index(__START_KERNEL_map)
> >
> > +/* Adapt page table L3 space based on range of randomization */
> > +L3_KERNEL_ENTRY_COUNT = pud_count(KERNEL_IMAGE_SIZE)
> > +
> > .text
> > __HEAD
> > .code64
> > @@ -396,7 +400,12 @@ NEXT_PAGE(level4_kernel_pgt)
> > NEXT_PAGE(level3_kernel_pgt)
> > .fill L3_START_KERNEL,8,0
> > /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
> > - .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE
> > + i = 0
> > + .rept L3_KERNEL_ENTRY_COUNT
> > + .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE \
> > + + PAGE_SIZE*i
> > + i = i + 1
> > + .endr
> > .quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE
> >
> > NEXT_PAGE(level2_kernel_pgt)
> > --
> > 2.13.2.932.g7449e964c-goog
> >
next prev parent reply other threads:[~2017-07-19 13:49 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-07-18 22:33 x86: PIE support and option to extend KASLR randomization Thomas Garnier
2017-07-18 22:33 ` [RFC 01/22] x86/crypto: Adapt assembly for PIE support Thomas Garnier
2017-07-18 22:33 ` [RFC 02/22] x86: Use symbol name on bug table " Thomas Garnier
2017-07-18 22:33 ` [RFC 03/22] x86: Use symbol name in jump " Thomas Garnier
2017-07-18 22:33 ` [RFC 04/22] x86: Add macro to get symbol address " Thomas Garnier
2017-07-18 22:33 ` [RFC 05/22] xen: Adapt assembly " Thomas Garnier
2017-07-18 22:33 ` [RFC 06/22] kvm: " Thomas Garnier
2017-07-19 2:49 ` Brian Gerst
2017-07-19 15:40 ` Thomas Garnier
2017-07-19 22:27 ` H. Peter Anvin
2017-07-19 22:44 ` Thomas Garnier
2017-07-19 22:58 ` Ard Biesheuvel
2017-07-19 23:47 ` H. Peter Anvin
2017-07-19 23:47 ` H. Peter Anvin
2017-07-18 22:33 ` [RFC 07/22] x86: relocate_kernel - " Thomas Garnier
2017-07-19 22:58 ` H. Peter Anvin
2017-07-19 23:23 ` Thomas Garnier
2017-07-18 22:33 ` [RFC 08/22] x86/entry/64: " Thomas Garnier
2017-07-18 22:33 ` [RFC 09/22] x86: pm-trace - " Thomas Garnier
2017-07-18 22:33 ` [RFC 10/22] x86/CPU: " Thomas Garnier
2017-07-18 22:33 ` [RFC 11/22] x86/acpi: " Thomas Garnier
2017-07-18 22:33 ` [RFC 12/22] x86/boot/64: " Thomas Garnier
2017-07-18 22:33 ` [RFC 13/22] x86/power/64: " Thomas Garnier
2017-07-19 18:41 ` Pavel Machek
2017-07-18 22:33 ` [RFC 14/22] x86/paravirt: " Thomas Garnier
2017-07-18 22:33 ` [RFC 15/22] x86/boot/64: Use _text in a global " Thomas Garnier
2017-07-18 22:33 ` [RFC 16/22] x86/percpu: Adapt percpu " Thomas Garnier
2017-07-19 3:08 ` Brian Gerst
2017-07-19 18:26 ` Thomas Garnier
2017-07-19 23:33 ` H. Peter Anvin
2017-07-20 2:21 ` H. Peter Anvin
2017-07-20 3:03 ` H. Peter Anvin
2017-07-20 14:26 ` Thomas Garnier
2017-08-02 16:42 ` Thomas Garnier
2017-08-02 16:56 ` Kees Cook
2017-08-02 18:05 ` Thomas Garnier
2017-07-18 22:33 ` [RFC 17/22] compiler: Option to default to hidden symbols Thomas Garnier
2017-07-18 22:33 ` [RFC 18/22] x86/relocs: Handle DYN relocations for PIE support Thomas Garnier
2017-07-18 22:33 ` [RFC 19/22] x86/pie: Add option to build the kernel as PIE for x86_64 Thomas Garnier
2017-07-18 22:33 ` [RFC 20/22] x86/relocs: Add option to generate 64-bit relocations Thomas Garnier
2017-07-19 22:33 ` H. Peter Anvin
2017-07-19 22:47 ` Thomas Garnier
2017-07-19 23:08 ` H. Peter Anvin
2017-07-19 23:25 ` Thomas Garnier
2017-07-19 23:45 ` H. Peter Anvin
2017-07-19 23:45 ` H. Peter Anvin
2017-07-18 22:33 ` [RFC 21/22] x86/module: Add support for mcmodel large and PLTs Thomas Garnier
2017-07-19 1:35 ` H. Peter Anvin
2017-07-19 3:59 ` Brian Gerst
2017-07-19 15:58 ` Thomas Garnier
2017-07-19 17:34 ` Brian Gerst
2017-07-24 16:32 ` Thomas Garnier
2017-07-18 22:33 ` [RFC 22/22] x86/kaslr: Add option to extend KASLR range from 1GB to 3GB Thomas Garnier
2017-07-19 12:10 ` Baoquan He
2017-07-19 13:49 ` Baoquan He [this message]
2017-07-19 14:08 ` x86: PIE support and option to extend KASLR randomization Christopher Lameter
2017-07-19 19:21 ` Kees Cook
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170719134932.GF2344@x1 \
--to=bhe@redhat.com \
--cc=arnd@arndb.de \
--cc=boris.ostrovsky@oracle.com \
--cc=borntraeger@de.ibm.com \
--cc=bp@alien8.de \
--cc=bp@suse.de \
--cc=brgerst@gmail.com \
--cc=davem@davemloft.net \
--cc=herbert@gondor.apana.org.au \
--cc=hpa@zytor.com \
--cc=jgross@suse.com \
--cc=joro@8bytes.org \
--cc=jpoimboe@redhat.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=len.brown@intel.com \
--cc=luto@kernel.org \
--cc=mingo@redhat.com \
--cc=mka@chromium.org \
--cc=pavel@ucw.cz \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=rjw@rjwysocki.net \
--cc=rkrcmar@redhat.com \
--cc=tglx@linutronix.de \
--cc=thgarnie@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).