* [PATCH v3 11/27] x86/power/64: Adapt assembly for PIE support
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible.
Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/power/hibernate_asm_64.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index ce8da3a0412c..6fdd7bbc3c33 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -24,7 +24,7 @@
#include <asm/frame.h>
ENTRY(swsusp_arch_suspend)
- movq $saved_context, %rax
+ leaq saved_context(%rip), %rax
movq %rsp, pt_regs_sp(%rax)
movq %rbp, pt_regs_bp(%rax)
movq %rsi, pt_regs_si(%rax)
@@ -115,7 +115,7 @@ ENTRY(restore_registers)
movq %rax, %cr4; # turn PGE back on
/* We don't restore %rax, it must be 0 anyway */
- movq $saved_context, %rax
+ leaq saved_context(%rip), %rax
movq pt_regs_sp(%rax), %rsp
movq pt_regs_bp(%rax), %rbp
movq pt_regs_si(%rax), %rsi
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 12/27] x86/paravirt: Adapt assembly for PIE support
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
if PIE is enabled, switch the paravirt assembly constraints to be
compatible. The %c/i constrains generate smaller code so is kept by
default.
Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/include/asm/paravirt_types.h | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 180bc0bff0fb..140747a98d94 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -337,9 +337,17 @@ extern struct pv_lock_ops pv_lock_ops;
#define PARAVIRT_PATCH(x) \
(offsetof(struct paravirt_patch_template, x) / sizeof(void *))
+#ifdef CONFIG_X86_PIE
+#define paravirt_opptr_call "a"
+#define paravirt_opptr_type "p"
+#else
+#define paravirt_opptr_call "c"
+#define paravirt_opptr_type "i"
+#endif
+
#define paravirt_type(op) \
[paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \
- [paravirt_opptr] "i" (&(op))
+ [paravirt_opptr] paravirt_opptr_type (&(op))
#define paravirt_clobber(clobber) \
[paravirt_clobber] "i" (clobber)
@@ -395,7 +403,7 @@ int paravirt_disable_iospace(void);
*/
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
- "call *%c[paravirt_opptr];"
+ "call *%" paravirt_opptr_call "[paravirt_opptr];"
/*
* These macros are intended to wrap calls through one of the paravirt
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 13/27] x86/boot/64: Build head64.c as mcmodel large when PIE is enabled
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
The __startup_64 function assumes all symbols have relocated addresses
instead of the current boot virtual address. PIE generated code favor
relative addresses making all virtual and physical address math incorrect.
If PIE is enabled, build head64.c as mcmodel large instead to ensure absolute
references on all memory access. Add a global __force_order variable required
when using a large model with read_cr* functions.
To build head64.c as mcmodel=large, disable the retpoline gcc flags.
This code is used at early boot and removed later, it doesn't need
retpoline mitigation.
Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/kernel/Makefile | 6 ++++++
arch/x86/kernel/head64.c | 3 +++
2 files changed, 9 insertions(+)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 02d6f5cf4e70..0f6da4b216e0 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -22,6 +22,12 @@ CFLAGS_REMOVE_early_printk.o = -pg
CFLAGS_REMOVE_head64.o = -pg
endif
+ifdef CONFIG_X86_PIE
+# Remove PIE and retpoline flags that are incompatible with mcmodel=large
+CFLAGS_REMOVE_head64.o += -fPIE -mindirect-branch=thunk-extern -mindirect-branch-register
+CFLAGS_head64.o = -mcmodel=large
+endif
+
KASAN_SANITIZE_head$(BITS).o := n
KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2d29e47c056e..fa661fb97127 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -64,6 +64,9 @@ EXPORT_SYMBOL(vmemmap_base);
#define __head __section(.head.text)
+/* Required for read_cr3 when building as PIE */
+unsigned long __force_order;
+
static void __head *fixup_pointer(void *ptr, unsigned long physaddr)
{
return ptr - (void *)_text + (void *)physaddr;
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 14/27] x86/percpu: Adapt percpu for PIE support
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Perpcu uses a clever design where the .percu ELF section has a virtual
address of zero and the relocation code avoid relocating specific
symbols. It makes the code simple and easily adaptable with or without
SMP support.
This design is incompatible with PIE because generated code always try to
access the zero virtual address relative to the default mapping address.
It becomes impossible when KASLR is configured to go below -2G. This
patch solves this problem by removing the zero mapping and adapting the GS
base to be relative to the expected address. These changes are done only
when PIE is enabled. The original implementation is kept as-is
by default.
The assembly and PER_CPU macros are changed to use relative references
when PIE is enabled.
The KALLSYMS_ABSOLUTE_PERCPU configuration is disabled with PIE given
percpu symbols are not absolute in this case.
Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/entry/calling.h | 2 +-
arch/x86/entry/entry_64.S | 4 ++--
arch/x86/include/asm/percpu.h | 25 +++++++++++++++++++------
arch/x86/include/asm/processor.h | 4 +++-
arch/x86/kernel/head_64.S | 4 ++++
arch/x86/kernel/setup_percpu.c | 5 ++++-
arch/x86/kernel/vmlinux.lds.S | 13 +++++++++++--
arch/x86/lib/cmpxchg16b_emu.S | 8 ++++----
arch/x86/xen/xen-asm.S | 12 ++++++------
init/Kconfig | 2 +-
10 files changed, 55 insertions(+), 24 deletions(-)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 43c79e78770c..56d403366c5e 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -218,7 +218,7 @@ For 32-bit we have the following conventions - kernel is built with
.endm
#define THIS_CPU_user_pcid_flush_mask \
- PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
+ PER_CPU_VAR(cpu_tlbstate + TLB_STATE_user_pcid_flush_mask)
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 8638dca78191..c1700b00b1b6 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -361,7 +361,7 @@ ENTRY(__switch_to_asm)
#ifdef CONFIG_CC_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+ movq %rbx, PER_CPU_VAR(irq_stack_union + stack_canary_offset)
#endif
#ifdef CONFIG_RETPOLINE
@@ -900,7 +900,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
* Exception entry points.
*/
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw + (TSS_ist + ((x) - 1) * 8))
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index a06b07399d17..7d1271b536ea 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -5,9 +5,11 @@
#ifdef CONFIG_X86_64
#define __percpu_seg gs
#define __percpu_mov_op movq
+#define __percpu_rel (%rip)
#else
#define __percpu_seg fs
#define __percpu_mov_op movl
+#define __percpu_rel
#endif
#ifdef __ASSEMBLY__
@@ -28,10 +30,14 @@
#define PER_CPU(var, reg) \
__percpu_mov_op %__percpu_seg:this_cpu_off, reg; \
lea var(reg), reg
-#define PER_CPU_VAR(var) %__percpu_seg:var
+/* Compatible with Position Independent Code */
+#define PER_CPU_VAR(var) %__percpu_seg:(var)##__percpu_rel
+/* Rare absolute reference */
+#define PER_CPU_VAR_ABS(var) %__percpu_seg:var
#else /* ! SMP */
#define PER_CPU(var, reg) __percpu_mov_op $var, reg
-#define PER_CPU_VAR(var) var
+#define PER_CPU_VAR(var) (var)##__percpu_rel
+#define PER_CPU_VAR_ABS(var) var
#endif /* SMP */
#ifdef CONFIG_X86_64_SMP
@@ -209,27 +215,34 @@ do { \
pfo_ret__; \
})
+/* Position Independent code uses relative addresses only */
+#ifdef CONFIG_X86_PIE
+#define __percpu_stable_arg __percpu_arg(a1)
+#else
+#define __percpu_stable_arg __percpu_arg(P1)
+#endif
+
#define percpu_stable_op(op, var) \
({ \
typeof(var) pfo_ret__; \
switch (sizeof(var)) { \
case 1: \
- asm(op "b "__percpu_arg(P1)",%0" \
+ asm(op "b "__percpu_stable_arg ",%0" \
: "=q" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 2: \
- asm(op "w "__percpu_arg(P1)",%0" \
+ asm(op "w "__percpu_stable_arg ",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 4: \
- asm(op "l "__percpu_arg(P1)",%0" \
+ asm(op "l "__percpu_stable_arg ",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
case 8: \
- asm(op "q "__percpu_arg(P1)",%0" \
+ asm(op "q "__percpu_stable_arg ",%0" \
: "=r" (pfo_ret__) \
: "p" (&(var))); \
break; \
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 81ae6877df29..5cf36fa30254 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -24,6 +24,7 @@ struct vm86;
#include <asm/special_insns.h>
#include <asm/fpu/types.h>
#include <asm/unwind_hints.h>
+#include <asm/sections.h>
#include <linux/personality.h>
#include <linux/cache.h>
@@ -400,7 +401,8 @@ DECLARE_INIT_PER_CPU(irq_stack_union);
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+ return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu) -
+ (unsigned long)__per_cpu_start;
}
DECLARE_PER_CPU(char *, irq_stack_ptr);
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7c8f7ce93b9e..f44b259b26d3 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -266,7 +266,11 @@ ENDPROC(start_cpu0)
GLOBAL(initial_code)
.quad x86_64_start_kernel
GLOBAL(initial_gs)
+#ifdef CONFIG_X86_PIE
+ .quad 0
+#else
.quad INIT_PER_CPU_VAR(irq_stack_union)
+#endif
GLOBAL(initial_stack)
/*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index ea554f812ee1..d61ecc3d2b6f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -26,7 +26,7 @@
DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
EXPORT_PER_CPU_SYMBOL(cpu_number);
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_PIE)
#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
#else
#define BOOT_PERCPU_OFFSET 0
@@ -40,6 +40,9 @@ unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
};
EXPORT_SYMBOL(__per_cpu_offset);
+/* Used to calculate gs_base for each CPU */
+EXPORT_SYMBOL(__per_cpu_start);
+
/*
* On x86_64 symbols referenced from code should be reachable using
* 32bit relocations. Reserve space for static percpu variables in
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 5e1458f609a1..f582fc4776dd 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -211,9 +211,14 @@ SECTIONS
/*
* percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
* output PHDR, so the next output section - .init.text - should
- * start another segment - init.
+ * start another segment - init. For Position Independent Code, the
+ * per-cpu section cannot be zero-based because everything is relative.
*/
+#ifdef CONFIG_X86_PIE
+ PERCPU_SECTION(INTERNODE_CACHE_BYTES)
+#else
PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
+#endif
ASSERT(SIZEOF(.data..percpu) < CONFIG_PHYSICAL_START,
"per-CPU data too large - increase CONFIG_PHYSICAL_START")
#endif
@@ -389,7 +394,11 @@ SECTIONS
* Per-cpu symbols which need to be offset from __per_cpu_load
* for the boot processor.
*/
+#ifdef CONFIG_X86_PIE
+#define INIT_PER_CPU(x) init_per_cpu__##x = x
+#else
#define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load
+#endif
INIT_PER_CPU(gdt_page);
INIT_PER_CPU(irq_stack_union);
@@ -399,7 +408,7 @@ INIT_PER_CPU(irq_stack_union);
. = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
"kernel image bigger than KERNEL_IMAGE_SIZE");
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && !defined(CONFIG_X86_PIE)
. = ASSERT((irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index 9b330242e740..254950604ae4 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -33,13 +33,13 @@ ENTRY(this_cpu_cmpxchg16b_emu)
pushfq
cli
- cmpq PER_CPU_VAR((%rsi)), %rax
+ cmpq PER_CPU_VAR_ABS((%rsi)), %rax
jne .Lnot_same
- cmpq PER_CPU_VAR(8(%rsi)), %rdx
+ cmpq PER_CPU_VAR_ABS(8(%rsi)), %rdx
jne .Lnot_same
- movq %rbx, PER_CPU_VAR((%rsi))
- movq %rcx, PER_CPU_VAR(8(%rsi))
+ movq %rbx, PER_CPU_VAR_ABS((%rsi))
+ movq %rcx, PER_CPU_VAR_ABS(8(%rsi))
popfq
mov $1, %al
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 8019edd0125c..a5d73d3218be 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -21,7 +21,7 @@
ENTRY(xen_irq_enable_direct)
FRAME_BEGIN
/* Unmask events */
- movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ movb $0, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
/*
* Preempt here doesn't matter because that will deal with any
@@ -30,7 +30,7 @@ ENTRY(xen_irq_enable_direct)
*/
/* Test for pending */
- testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending)
jz 1f
call check_events
@@ -45,7 +45,7 @@ ENTRY(xen_irq_enable_direct)
* non-zero.
*/
ENTRY(xen_irq_disable_direct)
- movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ movb $1, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
ret
ENDPROC(xen_irq_disable_direct)
@@ -59,7 +59,7 @@ ENDPROC(xen_irq_disable_direct)
* x86 use opposite senses (mask vs enable).
*/
ENTRY(xen_save_fl_direct)
- testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ testb $0xff, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
setz %ah
addb %ah, %ah
ret
@@ -80,7 +80,7 @@ ENTRY(xen_restore_fl_direct)
#else
testb $X86_EFLAGS_IF>>8, %ah
#endif
- setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
+ setz PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_mask)
/*
* Preempt here doesn't matter because that will deal with any
* pending interrupts. The pending check may end up being run
@@ -88,7 +88,7 @@ ENTRY(xen_restore_fl_direct)
*/
/* check for unmasked and pending */
- cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
+ cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info + XEN_vcpu_info_pending)
jnz 1f
call check_events
1:
diff --git a/init/Kconfig b/init/Kconfig
index 44e62e0dc51f..8915a3ce5f0c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1391,7 +1391,7 @@ config KALLSYMS_ALL
config KALLSYMS_ABSOLUTE_PERCPU
bool
depends on KALLSYMS
- default X86_64 && SMP
+ default X86_64 && SMP && !X86_PIE
config KALLSYMS_BASE_RELATIVE
bool
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 15/27] compiler: Option to default to hidden symbols
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Provide an option to default visibility to hidden except for key
symbols. This option is disabled by default and will be used by x86_64
PIE support to remove errors between compilation units.
The default visibility is also enabled for external symbols that are
compared as they maybe equals (start/end of sections). In this case,
older versions of GCC will remove the comparison if the symbols are
hidden. This issue exists at least on gcc 4.9 and before.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/boot/boot.h | 2 +-
arch/x86/include/asm/setup.h | 2 +-
arch/x86/kernel/cpu/microcode/core.c | 4 ++--
drivers/base/firmware_loader/main.c | 4 ++--
include/asm-generic/sections.h | 6 ++++++
include/linux/compiler.h | 7 +++++++
init/Kconfig | 7 +++++++
kernel/kallsyms.c | 16 ++++++++--------
kernel/trace/trace.h | 4 ++--
lib/dynamic_debug.c | 4 ++--
10 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index ef5a9cc66fb8..d726c35bdd96 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -193,7 +193,7 @@ static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len)
}
/* Heap -- available for dynamic lists. */
-extern char _end[];
+extern char _end[] __default_visibility;
extern char *HEAP;
extern char *heap_end;
#define RESET_HEAP() ((void *)( HEAP = _end ))
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ae13bc974416..083a6e99b884 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -68,7 +68,7 @@ static inline void x86_ce4100_early_setup(void) { }
* This is set up by the setup-routine at boot-time
*/
extern struct boot_params boot_params;
-extern char _text[];
+extern char _text[] __default_visibility;
static inline bool kaslr_enabled(void)
{
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 77e201301528..6a4f5d9d7eb6 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -149,8 +149,8 @@ static bool __init check_loader_disabled_bsp(void)
return *res;
}
-extern struct builtin_fw __start_builtin_fw[];
-extern struct builtin_fw __end_builtin_fw[];
+extern struct builtin_fw __start_builtin_fw[] __default_visibility;
+extern struct builtin_fw __end_builtin_fw[] __default_visibility;
bool get_builtin_firmware(struct cpio_data *cd, const char *name)
{
diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index 0943e7065e0e..2ffd019af2d4 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -94,8 +94,8 @@ static struct firmware_cache fw_cache;
#ifdef CONFIG_FW_LOADER
-extern struct builtin_fw __start_builtin_fw[];
-extern struct builtin_fw __end_builtin_fw[];
+extern struct builtin_fw __start_builtin_fw[] __default_visibility;
+extern struct builtin_fw __end_builtin_fw[] __default_visibility;
static void fw_copy_to_prealloc_buf(struct firmware *fw,
void *buf, size_t size)
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 849cd8eb5ca0..0a0e23405ddd 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -32,6 +32,9 @@
* __softirqentry_text_start, __softirqentry_text_end
* __start_opd, __end_opd
*/
+#ifdef CONFIG_DEFAULT_HIDDEN
+#pragma GCC visibility push(default)
+#endif
extern char _text[], _stext[], _etext[];
extern char _data[], _sdata[], _edata[];
extern char __bss_start[], __bss_stop[];
@@ -49,6 +52,9 @@ extern char __start_once[], __end_once[];
/* Start and end of .ctors section - used for constructor calls. */
extern char __ctors_start[], __ctors_end[];
+#ifdef CONFIG_DEFAULT_HIDDEN
+#pragma GCC visibility pop
+#endif
/* Start and end of .opd section - used for function descriptors. */
extern char __start_opd[], __end_opd[];
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 341b6cf8c029..81a9986cad78 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -278,6 +278,13 @@ unsigned long read_word_at_a_time(const void *addr)
__u.__val; \
})
+#ifdef CONFIG_DEFAULT_HIDDEN
+#pragma GCC visibility push(hidden)
+#define __default_visibility __attribute__((visibility ("default")))
+#else
+#define __default_visibility
+#endif
+
#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
diff --git a/init/Kconfig b/init/Kconfig
index 8915a3ce5f0c..0fc3a58d9f2f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1694,6 +1694,13 @@ config PROFILING
config TRACEPOINTS
bool
+#
+# Default to hidden visibility for all symbols.
+# Useful for Position Independent Code to reduce global references.
+#
+config DEFAULT_HIDDEN
+ bool
+
source "arch/Kconfig"
endmenu # General setup
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index a23e21ada81b..f4e58b7a6daf 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -29,24 +29,24 @@
* These will be re-linked against their real values
* during the second link stage.
*/
-extern const unsigned long kallsyms_addresses[] __weak;
-extern const int kallsyms_offsets[] __weak;
-extern const u8 kallsyms_names[] __weak;
+extern const unsigned long kallsyms_addresses[] __weak __default_visibility;
+extern const int kallsyms_offsets[] __weak __default_visibility;
+extern const u8 kallsyms_names[] __weak __default_visibility;
/*
* Tell the compiler that the count isn't in the small data section if the arch
* has one (eg: FRV).
*/
extern const unsigned long kallsyms_num_syms
-__attribute__((weak, section(".rodata")));
+__attribute__((weak, section(".rodata"))) __default_visibility;
extern const unsigned long kallsyms_relative_base
-__attribute__((weak, section(".rodata")));
+__attribute__((weak, section(".rodata"))) __default_visibility;
-extern const u8 kallsyms_token_table[] __weak;
-extern const u16 kallsyms_token_index[] __weak;
+extern const u8 kallsyms_token_table[] __weak __default_visibility;
+extern const u16 kallsyms_token_index[] __weak __default_visibility;
-extern const unsigned long kallsyms_markers[] __weak;
+extern const unsigned long kallsyms_markers[] __weak __default_visibility;
/*
* Expand a compressed symbol data into the resulting uncompressed string,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 6fb46a06c9dc..e659f452cf8c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1746,8 +1746,8 @@ extern int trace_event_enable_disable(struct trace_event_file *file,
int enable, int soft_disable);
extern int tracing_alloc_snapshot(void);
-extern const char *__start___trace_bprintk_fmt[];
-extern const char *__stop___trace_bprintk_fmt[];
+extern const char *__start___trace_bprintk_fmt[] __default_visibility;
+extern const char *__stop___trace_bprintk_fmt[] __default_visibility;
extern const char *__start___tracepoint_str[];
extern const char *__stop___tracepoint_str[];
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index c7c96bc7654a..40b752b53627 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -37,8 +37,8 @@
#include <linux/device.h>
#include <linux/netdevice.h>
-extern struct _ddebug __start___verbose[];
-extern struct _ddebug __stop___verbose[];
+extern struct _ddebug __start___verbose[] __default_visibility;
+extern struct _ddebug __stop___verbose[] __default_visibility;
struct ddebug_table {
struct list_head link;
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 16/27] compiler: Option to add PROVIDE_HIDDEN replacement for weak symbols
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Provide an option to have a PROVIDE_HIDDEN (linker script) entry for
each weak symbol. This option solve an error in x86_64 where the linker
optimizes pie generate code to be non-pie because --emit-relocs was used
instead of -pie (to reduce dynamic relocations).
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
init/Kconfig | 7 +++++++
scripts/link-vmlinux.sh | 14 ++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig
index 0fc3a58d9f2f..2866cca86b4a 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1954,6 +1954,13 @@ config ASN1
inform it as to what tags are to be expected in a stream and what
functions to call on what tags.
+config WEAK_PROVIDE_HIDDEN
+ bool
+ help
+ Generate linker script PROVIDE_HIDDEN entries for all weak symbols. It
+ allows to prevent non-pie code being replaced by the linker if the
+ emit-relocs option is used instead of pie (useful for x86_64 pie).
+
source "kernel/Kconfig.locks"
config ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 4bf811c09f59..f5d31119b9d7 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -142,6 +142,17 @@ kallsyms()
${CC} ${aflags} -c -o ${2} ${afile}
}
+gen_weak_provide_hidden()
+{
+ if [ -n "${CONFIG_WEAK_PROVIDE_HIDDEN}" ]; then
+ local pattern="s/^\s\+ w \(\w\+\)$/PROVIDE_HIDDEN(\1 = .);/gp"
+ echo -e "SECTIONS {\n. = _end;" > .tmp_vmlinux_hiddenld
+ ${NM} ${1} | sed -n "${pattern}" >> .tmp_vmlinux_hiddenld
+ echo "}" >> .tmp_vmlinux_hiddenld
+ LDFLAGS_vmlinux="${LDFLAGS_vmlinux} -T .tmp_vmlinux_hiddenld"
+ fi
+}
+
# Create map file with all symbols from ${1}
# See mksymap for additional details
mksysmap()
@@ -226,6 +237,9 @@ modpost_link vmlinux.o
# modpost vmlinux.o to check for section mismatches
${MAKE} -f "${srctree}/scripts/Makefile.modpost" vmlinux.o
+# Generate weak linker script
+gen_weak_provide_hidden vmlinux.o
+
kallsymso=""
kallsyms_vmlinux=""
if [ -n "${CONFIG_KALLSYMS}" ]; then
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 17/27] x86/relocs: Handle PIE relocations
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Change the relocation tool to correctly handle relocations generated by
-fPIE option:
- Add relocation for each entry of the .got section given the linker does not
generate R_X86_64_GLOB_DAT on a simple link.
- Ignore R_X86_64_GOTPCREL.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/tools/relocs.c | 93 ++++++++++++++++++++++++++++++++++++++++-
1 file changed, 92 insertions(+), 1 deletion(-)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 220e97841e49..a35cc337f883 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -32,6 +32,7 @@ struct section {
Elf_Sym *symtab;
Elf_Rel *reltab;
char *strtab;
+ Elf_Addr *got;
};
static struct section *secs;
@@ -293,6 +294,35 @@ static Elf_Sym *sym_lookup(const char *symname)
return 0;
}
+static Elf_Sym *sym_lookup_addr(Elf_Addr addr, const char **name)
+{
+ int i;
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
+ long nsyms;
+ Elf_Sym *symtab;
+ Elf_Sym *sym;
+
+ if (sec->shdr.sh_type != SHT_SYMTAB)
+ continue;
+
+ nsyms = sec->shdr.sh_size/sizeof(Elf_Sym);
+ symtab = sec->symtab;
+
+ for (sym = symtab; --nsyms >= 0; sym++) {
+ if (sym->st_value == addr) {
+ if (name) {
+ *name = sym_name(sec->link->strtab,
+ sym);
+ }
+ return sym;
+ }
+ }
+ }
+ return 0;
+}
+
+
#if BYTE_ORDER == LITTLE_ENDIAN
#define le16_to_cpu(val) (val)
#define le32_to_cpu(val) (val)
@@ -513,6 +543,33 @@ static void read_relocs(FILE *fp)
}
}
+static void read_got(FILE *fp)
+{
+ int i;
+ for (i = 0; i < ehdr.e_shnum; i++) {
+ struct section *sec = &secs[i];
+ sec->got = NULL;
+ if (sec->shdr.sh_type != SHT_PROGBITS ||
+ strcmp(sec_name(i), ".got")) {
+ continue;
+ }
+ sec->got = malloc(sec->shdr.sh_size);
+ if (!sec->got) {
+ die("malloc of %d bytes for got failed\n",
+ sec->shdr.sh_size);
+ }
+ if (fseek(fp, sec->shdr.sh_offset, SEEK_SET) < 0) {
+ die("Seek to %d failed: %s\n",
+ sec->shdr.sh_offset, strerror(errno));
+ }
+ if (fread(sec->got, 1, sec->shdr.sh_size, fp)
+ != sec->shdr.sh_size) {
+ die("Cannot read got: %s\n",
+ strerror(errno));
+ }
+ }
+}
+
static void print_absolute_symbols(void)
{
@@ -643,6 +700,32 @@ static void add_reloc(struct relocs *r, uint32_t offset)
r->offset[r->count++] = offset;
}
+/*
+ * The linker does not generate relocations for the GOT for the kernel.
+ * If a GOT is found, simulate the relocations that should have been included.
+ */
+static void walk_got_table(int (*process)(struct section *sec, Elf_Rel *rel,
+ Elf_Sym *sym, const char *symname),
+ struct section *sec)
+{
+ int i;
+ Elf_Addr entry;
+ Elf_Sym *sym;
+ const char *symname;
+ Elf_Rel rel;
+
+ for (i = 0; i < sec->shdr.sh_size/sizeof(Elf_Addr); i++) {
+ entry = sec->got[i];
+ sym = sym_lookup_addr(entry, &symname);
+ if (!sym)
+ die("Could not found got symbol for entry %d\n", i);
+ rel.r_offset = sec->shdr.sh_addr + i * sizeof(Elf_Addr);
+ rel.r_info = ELF_BITS == 64 ? R_X86_64_GLOB_DAT
+ : R_386_GLOB_DAT;
+ process(sec, &rel, sym, symname);
+ }
+}
+
static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
Elf_Sym *sym, const char *symname))
{
@@ -656,6 +739,8 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
struct section *sec = &secs[i];
if (sec->shdr.sh_type != SHT_REL_TYPE) {
+ if (sec->got)
+ walk_got_table(process, sec);
continue;
}
sec_symtab = sec->link;
@@ -765,6 +850,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
offset += per_cpu_load_addr;
switch (r_type) {
+ case R_X86_64_GOTPCREL:
case R_X86_64_NONE:
/* NONE can be ignored. */
break;
@@ -809,7 +895,7 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
* the relocations are processed.
* Make sure that the offset will fit.
*/
- if ((int32_t)offset != (int64_t)offset)
+ if (r_type != R_X86_64_64 && (int32_t)offset != (int64_t)offset)
die("Relocation offset doesn't fit in 32 bits\n");
if (r_type == R_X86_64_64)
@@ -818,6 +904,10 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
add_reloc(&relocs32, offset);
break;
+ case R_X86_64_GLOB_DAT:
+ add_reloc(&relocs64, offset);
+ break;
+
default:
die("Unsupported relocation type: %s (%d)\n",
rel_type(r_type), r_type);
@@ -1087,6 +1177,7 @@ void process(FILE *fp, int use_real_mode, int as_text,
read_strtabs(fp);
read_symtabs(fp);
read_relocs(fp);
+ read_got(fp);
if (ELF_BITS == 64)
percpu_init();
if (show_absolute_syms) {
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 18/27] xen: Adapt assembly for PIE support
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Change the assembly code to use the new _ASM_MOVABS macro which get a
symbol reference while being PIE compatible. Adapt the relocation tool
to ignore 32-bit Xen code.
Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/tools/relocs.c | 16 +++++++++++++++-
arch/x86/xen/xen-head.S | 11 ++++++-----
arch/x86/xen/xen-pvh.S | 13 +++++++++----
3 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index a35cc337f883..29283ad3950f 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -832,6 +832,16 @@ static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
strncmp(symname, "init_per_cpu_", 13);
}
+/*
+ * Check if the 32-bit relocation is within the xenpvh 32-bit code.
+ * If so, ignores it.
+ */
+static int is_in_xenpvh_assembly(ElfW(Addr) offset)
+{
+ ElfW(Sym) *sym = sym_lookup("pvh_start_xen");
+ return sym && (offset >= sym->st_value) &&
+ (offset < (sym->st_value + sym->st_size));
+}
static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
const char *symname)
@@ -895,8 +905,12 @@ static int do_reloc64(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym,
* the relocations are processed.
* Make sure that the offset will fit.
*/
- if (r_type != R_X86_64_64 && (int32_t)offset != (int64_t)offset)
+ if (r_type != R_X86_64_64 &&
+ (int32_t)offset != (int64_t)offset) {
+ if (is_in_xenpvh_assembly(offset))
+ break;
die("Relocation offset doesn't fit in 32 bits\n");
+ }
if (r_type == R_X86_64_64)
add_reloc(&relocs64, offset);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 5077ead5e59c..4418ff0a1d96 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,14 +28,15 @@ ENTRY(startup_xen)
/* Clear .bss */
xor %eax,%eax
- mov $__bss_start, %_ASM_DI
- mov $__bss_stop, %_ASM_CX
+ _ASM_MOVABS $__bss_start, %_ASM_DI
+ _ASM_MOVABS $__bss_stop, %_ASM_CX
sub %_ASM_DI, %_ASM_CX
shr $__ASM_SEL(2, 3), %_ASM_CX
rep __ASM_SIZE(stos)
- mov %_ASM_SI, xen_start_info
- mov $init_thread_union+THREAD_SIZE, %_ASM_SP
+ _ASM_MOVABS $xen_start_info, %_ASM_AX
+ _ASM_MOV %_ASM_SI, (%_ASM_AX)
+ _ASM_MOVABS $init_thread_union+THREAD_SIZE, %_ASM_SP
#ifdef CONFIG_X86_64
/* Set up %gs.
@@ -46,7 +47,7 @@ ENTRY(startup_xen)
* init data section till per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
+ movabsq $INIT_PER_CPU_VAR(irq_stack_union),%rax
cdq
wrmsr
#endif
diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S
index e1a5fbeae08d..43e234c7c2de 100644
--- a/arch/x86/xen/xen-pvh.S
+++ b/arch/x86/xen/xen-pvh.S
@@ -101,8 +101,8 @@ ENTRY(pvh_start_xen)
call xen_prepare_pvh
/* startup_64 expects boot_params in %rsi. */
- mov $_pa(pvh_bootparams), %rsi
- mov $_pa(startup_64), %rax
+ movabs $_pa(pvh_bootparams), %rsi
+ movabs $_pa(startup_64), %rax
jmp *%rax
#else /* CONFIG_X86_64 */
@@ -137,10 +137,15 @@ END(pvh_start_xen)
.section ".init.data","aw"
.balign 8
+ /*
+ * Use a quad for _pa(gdt_start) because PIE does not understand a
+ * long is enough. The resulting value will still be in the lower long
+ * part.
+ */
gdt:
.word gdt_end - gdt_start
- .long _pa(gdt_start)
- .word 0
+ .quad _pa(gdt_start)
+ .balign 8
gdt_start:
.quad 0x0000000000000000 /* NULL descriptor */
.quad 0x0000000000000000 /* reserved */
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 19/27] kvm: Adapt assembly for PIE support
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Change the assembly code to use only relative references of symbols for the
kernel to be PIE compatible. The new __ASM_MOVABS macro is used to
get the address of a symbol on both 32 and 64-bit with PIE support.
Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/include/asm/kvm_host.h | 8 ++++++--
arch/x86/kernel/kvm.c | 6 ++++--
arch/x86/kvm/svm.c | 4 ++--
3 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b27de80f5870..312a398465e8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1389,9 +1389,13 @@ asmlinkage void kvm_spurious_fault(void);
".pushsection .fixup, \"ax\" \n" \
"667: \n\t" \
cleanup_insn "\n\t" \
- "cmpb $0, kvm_rebooting \n\t" \
+ "cmpb $0, kvm_rebooting" __ASM_SEL(,(%%rip)) " \n\t" \
"jne 668b \n\t" \
- __ASM_SIZE(push) " $666b \n\t" \
+ __ASM_SIZE(push) "$0 \n\t" \
+ __ASM_SIZE(push) "%%" _ASM_AX " \n\t" \
+ _ASM_MOVABS " $666b, %%" _ASM_AX "\n\t" \
+ _ASM_MOV " %%" _ASM_AX ", " __ASM_SEL(4,8) "(%%" _ASM_SP ") \n\t" \
+ __ASM_SIZE(pop) "%%" _ASM_AX " \n\t" \
"call kvm_spurious_fault \n\t" \
".popsection \n\t" \
_ASM_EXTABLE(666b, 667b)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 7867417cfaff..394c00f21f05 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -726,8 +726,10 @@ asm(
".global __raw_callee_save___kvm_vcpu_is_preempted;"
".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
"__raw_callee_save___kvm_vcpu_is_preempted:"
-"movq __per_cpu_offset(,%rdi,8), %rax;"
-"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
+"leaq __per_cpu_offset(%rip), %rax;"
+"movq (%rax,%rdi,8), %rax;"
+"addq " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rip), %rax;"
+"cmpb $0, (%rax);"
"setne %al;"
"ret;"
".popsection");
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 220e5a89465a..2b0b25be5236 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -701,12 +701,12 @@ static u32 svm_msrpm_offset(u32 msr)
static inline void clgi(void)
{
- asm volatile (__ex(SVM_CLGI));
+ asm volatile (__ex(SVM_CLGI) : :);
}
static inline void stgi(void)
{
- asm volatile (__ex(SVM_STGI));
+ asm volatile (__ex(SVM_STGI) : :);
}
static inline void invlpga(unsigned long addr, u32 asid)
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 20/27] x86: Support global stack cookie
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Add an off-by-default configuration option to use a global stack cookie
instead of the default TLS. This configuration option will only be used
with PIE binaries.
For kernel stack cookie, the compiler uses the mcmodel=kernel to switch
between the fs segment to gs segment. A PIE binary does not use
mcmodel=kernel because it can be relocated anywhere, therefore the
compiler will default to the fs segment register. This is fixed on the
latest version of gcc.
If the segment selector is available, it will be automatically added. If
the automatic configuration was selected, a warning is written and the
global variable stack cookie is used. If a specific stack mode was
selected (regular or strong) and the compiler does not support selecting
the segment register, an error is emitted.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/Kconfig | 12 ++++++++++++
arch/x86/Makefile | 9 +++++++++
arch/x86/entry/entry_32.S | 3 ++-
arch/x86/entry/entry_64.S | 3 ++-
arch/x86/include/asm/processor.h | 3 ++-
arch/x86/include/asm/stackprotector.h | 19 ++++++++++++++-----
arch/x86/kernel/asm-offsets.c | 3 ++-
arch/x86/kernel/asm-offsets_32.c | 3 ++-
arch/x86/kernel/asm-offsets_64.c | 3 ++-
arch/x86/kernel/cpu/common.c | 3 ++-
arch/x86/kernel/head_32.S | 3 ++-
arch/x86/kernel/process.c | 5 +++++
12 files changed, 56 insertions(+), 13 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dda87a331a7e..0fc2e981458d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2199,6 +2199,18 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
If unsure, leave at the default value.
+config X86_GLOBAL_STACKPROTECTOR
+ bool "Stack cookie using a global variable"
+ depends on CC_STACKPROTECTOR_AUTO
+ default n
+ ---help---
+ This option turns on the "stack-protector" GCC feature using a global
+ variable instead of a segment register. It is useful when the
+ compiler does not support custom segment registers when building a
+ position independent (PIE) binary.
+
+ If unsure, say N
+
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 60135cbd905c..277ffc57ae13 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -141,6 +141,15 @@ else
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)
endif
+ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
+ ifeq ($(call cc-option, -mstack-protector-guard=global),)
+ $(error Cannot use CONFIG_X86_GLOBAL_STACKPROTECTOR: \
+ -mstack-protector-guard=global not supported \
+ by compiler)
+ endif
+ KBUILD_CFLAGS += -mstack-protector-guard=global
+endif
+
ifdef CONFIG_X86_X32
x32_ld_ok := $(call try-run,\
/bin/echo -e '1: .quad 1b' | \
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index bb4f540be234..2f9bdbc6be6d 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -241,7 +241,8 @@ ENTRY(__switch_to_asm)
movl %esp, TASK_threadsp(%eax)
movl TASK_threadsp(%edx), %esp
-#ifdef CONFIG_CC_STACKPROTECTOR
+#if defined(CONFIG_CC_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
movl TASK_stack_canary(%edx), %ebx
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c1700b00b1b6..c8b4e8a7d1e1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -359,7 +359,8 @@ ENTRY(__switch_to_asm)
movq %rsp, TASK_threadsp(%rdi)
movq TASK_threadsp(%rsi), %rsp
-#ifdef CONFIG_CC_STACKPROTECTOR
+#if defined(CONFIG_CC_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
movq TASK_stack_canary(%rsi), %rbx
movq %rbx, PER_CPU_VAR(irq_stack_union + stack_canary_offset)
#endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 5cf36fa30254..6e5d9ac3bf17 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -414,7 +414,8 @@ extern asmlinkage void ignore_sysret(void);
void save_fsgs_for_kvm(void);
#endif
#else /* X86_64 */
-#ifdef CONFIG_CC_STACKPROTECTOR
+#if defined(CONFIG_CC_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
/*
* Make sure stack canary segment base is cached-aligned:
* "For Intel Atom processors, avoid non zero segment base address
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 371b3a4af000..5063f57d99f5 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -52,6 +52,10 @@
#define GDT_STACK_CANARY_INIT \
[GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),
+#ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
+extern unsigned long __stack_chk_guard;
+#endif
+
/*
* Initialize the stackprotector canary value.
*
@@ -63,7 +67,7 @@ static __always_inline void boot_init_stack_canary(void)
u64 canary;
u64 tsc;
-#ifdef CONFIG_X86_64
+#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
#endif
/*
@@ -77,17 +81,22 @@ static __always_inline void boot_init_stack_canary(void)
canary += tsc + (tsc << 32UL);
canary &= CANARY_MASK;
+#ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
+ if (__stack_chk_guard == 0)
+ __stack_chk_guard = canary ?: 1;
+#else /* !CONFIG_X86_GLOBAL_STACKPROTECTOR */
current->stack_canary = canary;
#ifdef CONFIG_X86_64
this_cpu_write(irq_stack_union.stack_canary, canary);
-#else
+#else /* CONFIG_X86_32 */
this_cpu_write(stack_canary.canary, canary);
#endif
+#endif
}
static inline void setup_stack_canary_segment(int cpu)
{
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
struct desc_struct desc;
@@ -100,7 +109,7 @@ static inline void setup_stack_canary_segment(int cpu)
static inline void load_stack_canary_segment(void)
{
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
#endif
}
@@ -116,7 +125,7 @@ static inline void setup_stack_canary_segment(int cpu)
static inline void load_stack_canary_segment(void)
{
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
asm volatile ("mov %0, %%gs" : : "r" (0));
#endif
}
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 76417a9aab73..4c9e1b667bda 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -32,7 +32,8 @@
void common(void) {
BLANK();
OFFSET(TASK_threadsp, task_struct, thread.sp);
-#ifdef CONFIG_CC_STACKPROTECTOR
+#if defined(CONFIG_CC_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
OFFSET(TASK_stack_canary, task_struct, stack_canary);
#endif
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index f91ba53e06c8..cf8ef55a8b82 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -50,7 +50,8 @@ void foo(void)
DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
offsetofend(struct cpu_entry_area, entry_stack_page.stack));
-#ifdef CONFIG_CC_STACKPROTECTOR
+#if defined(CONFIG_CC_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
BLANK();
OFFSET(stack_canary_offset, stack_canary, canary);
#endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index bf51e51d808d..a3c7e14f6434 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -69,7 +69,8 @@ int main(void)
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
BLANK();
-#ifdef CONFIG_CC_STACKPROTECTOR
+#if defined(CONFIG_CC_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
BLANK();
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 39ed2e6ff8a0..d279a7df5018 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1540,7 +1540,8 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
-#ifdef CONFIG_CC_STACKPROTECTOR
+#if defined(CONFIG_CC_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index b59e4fb40fd9..0e849242de91 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -375,7 +375,8 @@ ENDPROC(startup_32_smp)
*/
__INIT
setup_once:
-#ifdef CONFIG_CC_STACKPROTECTOR
+#if defined(CONFIG_CC_STACKPROTECTOR) && \
+ !defined(CONFIG_X86_GLOBAL_STACKPROTECTOR)
/*
* Configure the stack canary. The linker can't handle this by
* relocation. Manually set base address in stack canary
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 03408b942adb..ebe21d258a82 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -86,6 +86,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
DEFINE_PER_CPU(bool, __tss_limit_invalid);
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
+#ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
/*
* this gets called so that we can store lazy state into memory and copy the
* current task into the new thread.
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 21/27] x86/ftrace: Adapt function tracing for PIE support
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
When using -fPIE/PIC with function tracing, the compiler generates a
call through the GOT (call *__fentry__@GOTPCREL). This instruction
takes 6 bytes instead of 5 on the usual relative call.
If PIE is enabled, replace the 6th byte of the GOT call by a 1-byte nop
so ftrace can handle the previous 5-bytes as before.
Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/include/asm/ftrace.h | 6 +++--
arch/x86/include/asm/sections.h | 4 ++++
arch/x86/kernel/ftrace.c | 42 +++++++++++++++++++++++++++++++--
3 files changed, 48 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index c18ed65287d5..8f2decce38d8 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -25,9 +25,11 @@ extern void __fentry__(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
/*
- * addr is the address of the mcount call instruction.
- * recordmcount does the necessary offset calculation.
+ * addr is the address of the mcount call instruction. PIE has always a
+ * byte added to the start of the function.
*/
+ if (IS_ENABLED(CONFIG_X86_PIE))
+ addr -= 1;
return addr;
}
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 5c019d23d06b..da3d98bb2bcb 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -13,4 +13,8 @@ extern char __end_rodata_hpage_align[];
extern char __entry_trampoline_start[], __entry_trampoline_end[];
#endif
+#if defined(CONFIG_X86_PIE)
+extern char __start_got[], __end_got[];
+#endif
+
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 01ebcb6f263e..73b3c30cb7a3 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -102,7 +102,7 @@ static const unsigned char *ftrace_nop_replace(void)
static int
ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
- unsigned const char *new_code)
+ unsigned const char *new_code)
{
unsigned char replaced[MCOUNT_INSN_SIZE];
@@ -135,6 +135,44 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
return 0;
}
+/* Bytes before call GOT offset */
+const unsigned char got_call_preinsn[] = { 0xff, 0x15 };
+
+static int
+ftrace_modify_initial_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code)
+{
+ unsigned char replaced[MCOUNT_INSN_SIZE + 1];
+
+ ftrace_expected = old_code;
+
+ /*
+ * If PIE is not enabled or no GOT call was found, default to the
+ * original approach to code modification.
+ */
+ if (!IS_ENABLED(CONFIG_X86_PIE) ||
+ probe_kernel_read(replaced, (void *)ip, sizeof(replaced)) ||
+ memcmp(replaced, got_call_preinsn, sizeof(got_call_preinsn)))
+ return ftrace_modify_code_direct(ip, old_code, new_code);
+
+ /*
+ * Build a nop slide with a 5-byte nop and 1-byte nop to keep the ftrace
+ * hooking algorithm working with the expected 5 bytes instruction.
+ */
+ memcpy(replaced, new_code, MCOUNT_INSN_SIZE);
+ replaced[MCOUNT_INSN_SIZE] = ideal_nops[1][0];
+
+ ip = text_ip_addr(ip);
+
+ if (probe_kernel_write((void *)ip, replaced, sizeof(replaced)))
+ return -EPERM;
+
+ sync_core();
+
+ return 0;
+
+}
+
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
@@ -153,7 +191,7 @@ int ftrace_make_nop(struct module *mod,
* just modify the code directly.
*/
if (addr == MCOUNT_ADDR)
- return ftrace_modify_code_direct(rec->ip, old, new);
+ return ftrace_modify_initial_code(rec->ip, old, new);
ftrace_expected = NULL;
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 22/27] x86/modules: Add option to start module section after kernel
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Add an option so the module section is just after the mapped kernel. It
will ensure position independent modules are always at the right
distance from the kernel and do not require mcmodule=large. It also
optimize the available size for modules by getting rid of the empty
space on kernel randomization range.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
Documentation/x86/x86_64/mm.txt | 3 +++
arch/x86/Kconfig | 4 ++++
arch/x86/include/asm/pgtable_64_types.h | 6 ++++++
arch/x86/kernel/head64.c | 5 ++++-
arch/x86/mm/dump_pagetables.c | 3 ++-
5 files changed, 19 insertions(+), 2 deletions(-)
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index 600bc2afa27d..e3810a1db74b 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -79,3 +79,6 @@ Their order is preserved but their base will be offset early at boot time.
Be very careful vs. KASLR when changing anything here. The KASLR address
range must not overlap with anything except the KASAN shadow area, which is
correct as KASAN disables KASLR.
+
+If CONFIG_DYNAMIC_MODULE_BASE is enabled, the module section follows the end of
+the mapped kernel.
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0fc2e981458d..28eb2b3757bf 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2199,6 +2199,10 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
If unsure, leave at the default value.
+# Module section starts just after the end of the kernel module
+config DYNAMIC_MODULE_BASE
+ bool
+
config X86_GLOBAL_STACKPROTECTOR
bool "Stack cookie using a global variable"
depends on CC_STACKPROTECTOR_AUTO
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index adb47552e6bb..3ab25b908879 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -7,6 +7,7 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <asm/kaslr.h>
+#include <asm/sections.h>
/*
* These are used to make use of C type-checking..
@@ -126,7 +127,12 @@ extern unsigned int ptrs_per_p4d;
#define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1)
+#ifdef CONFIG_DYNAMIC_MODULE_BASE
+#define MODULES_VADDR ALIGN(((unsigned long)_end + PAGE_SIZE), PMD_SIZE)
+#else
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
+#endif
+
/* The module sections ends with the start of the fixmap */
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index fa661fb97127..3a1ce822e1c0 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -394,12 +394,15 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
* Build-time sanity checks on the kernel image and module
* area mappings. (these are purely build-time and produce no code)
*/
+#ifndef CONFIG_DYNAMIC_MODULE_BASE
BUILD_BUG_ON(MODULES_VADDR < __START_KERNEL_map);
BUILD_BUG_ON(MODULES_VADDR - __START_KERNEL_map < KERNEL_IMAGE_SIZE);
- BUILD_BUG_ON(MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
+ BUILD_BUG_ON(!IS_ENABLED(CONFIG_RANDOMIZE_BASE_LARGE) &&
+ MODULES_LEN + KERNEL_IMAGE_SIZE > 2*PUD_SIZE);
BUILD_BUG_ON((__START_KERNEL_map & ~PMD_MASK) != 0);
BUILD_BUG_ON((MODULES_VADDR & ~PMD_MASK) != 0);
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
+#endif
MAYBE_BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
(__START_KERNEL & PGDIR_MASK)));
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index cc7ff5957194..dca4098ce4fd 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -105,7 +105,7 @@ static struct addr_marker address_markers[] = {
[EFI_END_NR] = { EFI_VA_END, "EFI Runtime Services" },
#endif
[HIGH_KERNEL_NR] = { __START_KERNEL_map, "High Kernel Mapping" },
- [MODULES_VADDR_NR] = { MODULES_VADDR, "Modules" },
+ [MODULES_VADDR_NR] = { 0/*MODULES_VADDR*/, "Modules" },
[MODULES_END_NR] = { MODULES_END, "End Modules" },
[FIXADDR_START_NR] = { FIXADDR_START, "Fixmap Area" },
[END_OF_SPACE_NR] = { -1, NULL }
@@ -600,6 +600,7 @@ static int __init pt_dump_init(void)
address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
#endif
+ address_markers[MODULES_VADDR_NR].start_address = MODULES_VADDR;
#endif
#ifdef CONFIG_X86_32
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 23/27] x86/modules: Adapt module loading for PIE support
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Adapt module loading to support PIE relocations. Generate dynamic GOT if
a symbol requires it but no entry exist in the kernel GOT.
Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/Makefile | 4 +
arch/x86/include/asm/module.h | 11 ++
arch/x86/include/asm/sections.h | 4 +
arch/x86/kernel/module.c | 181 +++++++++++++++++++++++++++++++-
arch/x86/kernel/module.lds | 3 +
5 files changed, 198 insertions(+), 5 deletions(-)
create mode 100644 arch/x86/kernel/module.lds
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 277ffc57ae13..20bb6cbd8938 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -134,7 +134,11 @@ else
KBUILD_CFLAGS += $(cflags-y)
KBUILD_CFLAGS += -mno-red-zone
+ifdef CONFIG_X86_PIE
+ KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/x86/kernel/module.lds
+else
KBUILD_CFLAGS += -mcmodel=kernel
+endif
# -funit-at-a-time shrinks the kernel .text considerably
# unfortunately it makes reading oopses harder.
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 7948a17febb4..68ff05e14288 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -5,12 +5,23 @@
#include <asm-generic/module.h>
#include <asm/orc_types.h>
+#ifdef CONFIG_X86_PIE
+struct mod_got_sec {
+ struct elf64_shdr *got;
+ int got_num_entries;
+ int got_max_entries;
+};
+#endif
+
struct mod_arch_specific {
#ifdef CONFIG_UNWINDER_ORC
unsigned int num_orcs;
int *orc_unwind_ip;
struct orc_entry *orc_unwind;
#endif
+#ifdef CONFIG_X86_PIE
+ struct mod_got_sec core;
+#endif
};
#ifdef CONFIG_X86_64
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index da3d98bb2bcb..89b3a95c8d11 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -17,4 +17,8 @@ extern char __entry_trampoline_start[], __entry_trampoline_end[];
extern char __start_got[], __end_got[];
#endif
+#if defined(CONFIG_X86_PIE)
+extern char __start_got[], __end_got[];
+#endif
+
#endif /* _ASM_X86_SECTIONS_H */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index f58336af095c..88895f3d474b 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -30,6 +30,7 @@
#include <linux/gfp.h>
#include <linux/jump_label.h>
#include <linux/random.h>
+#include <linux/sort.h>
#include <asm/text-patching.h>
#include <asm/page.h>
@@ -77,6 +78,173 @@ static unsigned long int get_module_load_offset(void)
}
#endif
+#ifdef CONFIG_X86_PIE
+static u64 find_got_kernel_entry(Elf64_Sym *sym, const Elf64_Rela *rela)
+{
+ u64 *pos;
+
+ for (pos = (u64*)__start_got; pos < (u64*)__end_got; pos++) {
+ if (*pos == sym->st_value)
+ return (u64)pos + rela->r_addend;
+ }
+
+ return 0;
+}
+
+static u64 module_emit_got_entry(struct module *mod, void *loc,
+ const Elf64_Rela *rela, Elf64_Sym *sym)
+{
+ struct mod_got_sec *gotsec = &mod->arch.core;
+ u64 *got = (u64*)gotsec->got->sh_addr;
+ int i = gotsec->got_num_entries;
+ u64 ret;
+
+ /* Check if we can use the kernel GOT */
+ ret = find_got_kernel_entry(sym, rela);
+ if (ret)
+ return ret;
+
+ got[i] = sym->st_value;
+
+ /*
+ * Check if the entry we just created is a duplicate. Given that the
+ * relocations are sorted, this will be the last entry we allocated.
+ * (if one exists).
+ */
+ if (i > 0 && got[i] == got[i - 2]) {
+ ret = (u64)&got[i - 1];
+ } else {
+ gotsec->got_num_entries++;
+ BUG_ON(gotsec->got_num_entries > gotsec->got_max_entries);
+ ret = (u64)&got[i];
+ }
+
+ return ret + rela->r_addend;
+}
+
+#define cmp_3way(a,b) ((a) < (b) ? -1 : (a) > (b))
+
+static int cmp_rela(const void *a, const void *b)
+{
+ const Elf64_Rela *x = a, *y = b;
+ int i;
+
+ /* sort by type, symbol index and addend */
+ i = cmp_3way(ELF64_R_TYPE(x->r_info), ELF64_R_TYPE(y->r_info));
+ if (i == 0)
+ i = cmp_3way(ELF64_R_SYM(x->r_info), ELF64_R_SYM(y->r_info));
+ if (i == 0)
+ i = cmp_3way(x->r_addend, y->r_addend);
+ return i;
+}
+
+static bool duplicate_rel(const Elf64_Rela *rela, int num)
+{
+ /*
+ * Entries are sorted by type, symbol index and addend. That means
+ * that, if a duplicate entry exists, it must be in the preceding
+ * slot.
+ */
+ return num > 0 && cmp_rela(rela + num, rela + num - 1) == 0;
+}
+
+static unsigned int count_gots(Elf64_Sym *syms, Elf64_Rela *rela, int num)
+{
+ unsigned int ret = 0;
+ Elf64_Sym *s;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ switch (ELF64_R_TYPE(rela[i].r_info)) {
+ case R_X86_64_GOTPCREL:
+ s = syms + ELF64_R_SYM(rela[i].r_info);
+
+ /*
+ * Use the kernel GOT when possible, else reserve a
+ * custom one for this module.
+ */
+ if (!duplicate_rel(rela, i) &&
+ !find_got_kernel_entry(s, rela + i))
+ ret++;
+ break;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Generate GOT entries for GOTPCREL relocations that do not exists in the
+ * kernel GOT. Based on arm64 module-plts implementation.
+ */
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *mod)
+{
+ unsigned long gots = 0;
+ Elf_Shdr *symtab = NULL;
+ Elf64_Sym *syms = NULL;
+ char *strings, *name;
+ int i;
+
+ /*
+ * Find the empty .got section so we can expand it to store the PLT
+ * entries. Record the symtab address as well.
+ */
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) {
+ mod->arch.core.got = sechdrs + i;
+ } else if (sechdrs[i].sh_type == SHT_SYMTAB) {
+ symtab = sechdrs + i;
+ syms = (Elf64_Sym *)symtab->sh_addr;
+ }
+ }
+
+ if (!mod->arch.core.got) {
+ pr_err("%s: module GOT section missing\n", mod->name);
+ return -ENOEXEC;
+ }
+ if (!syms) {
+ pr_err("%s: module symtab section missing\n", mod->name);
+ return -ENOEXEC;
+ }
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ Elf64_Rela *rels = (void *)ehdr + sechdrs[i].sh_offset;
+ int numrels = sechdrs[i].sh_size / sizeof(Elf64_Rela);
+
+ if (sechdrs[i].sh_type != SHT_RELA)
+ continue;
+
+ /* sort by type, symbol index and addend */
+ sort(rels, numrels, sizeof(Elf64_Rela), cmp_rela, NULL);
+
+ gots += count_gots(syms, rels, numrels);
+ }
+
+ mod->arch.core.got->sh_type = SHT_NOBITS;
+ mod->arch.core.got->sh_flags = SHF_ALLOC;
+ mod->arch.core.got->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.core.got->sh_size = (gots + 1) * sizeof(u64);
+ mod->arch.core.got_num_entries = 0;
+ mod->arch.core.got_max_entries = gots;
+
+ /*
+ * If a _GLOBAL_OFFSET_TABLE_ symbol exists, make it absolute for
+ * modules to correctly reference it. Similar to s390 implementation.
+ */
+ strings = (void *) ehdr + sechdrs[symtab->sh_link].sh_offset;
+ for (i = 0; i < symtab->sh_size/sizeof(Elf_Sym); i++) {
+ if (syms[i].st_shndx != SHN_UNDEF)
+ continue;
+ name = strings + syms[i].st_name;
+ if (!strcmp(name, "_GLOBAL_OFFSET_TABLE_")) {
+ syms[i].st_shndx = SHN_ABS;
+ break;
+ }
+ }
+ return 0;
+}
+#endif
+
void *module_alloc(unsigned long size)
{
void *p;
@@ -190,16 +358,20 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
if ((s64)val != *(s32 *)loc)
goto overflow;
break;
+#ifdef CONFIG_X86_PIE
+ case R_X86_64_GOTPCREL:
+ val = module_emit_got_entry(me, loc, rel + i, sym);
+ /* fallthrough */
+#endif
case R_X86_64_PC32:
case R_X86_64_PLT32:
if (*(u32 *)loc != 0)
goto invalid_relocation;
val -= (u64)loc;
*(u32 *)loc = val;
-#if 0
- if ((s64)val != *(s32 *)loc)
+ if (IS_ENABLED(CONFIG_X86_PIE) &&
+ (s64)val != *(s32 *)loc)
goto overflow;
-#endif
break;
default:
pr_err("%s: Unknown rela relocation: %llu\n",
@@ -217,8 +389,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
overflow:
pr_err("overflow in relocation type %d val %Lx\n",
(int)ELF64_R_TYPE(rel[i].r_info), val);
- pr_err("`%s' likely not compiled with -mcmodel=kernel\n",
- me->name);
+ pr_err("`%s' likely too far from the kernel\n", me->name);
return -ENOEXEC;
}
#endif
diff --git a/arch/x86/kernel/module.lds b/arch/x86/kernel/module.lds
new file mode 100644
index 000000000000..fd6e95a4b454
--- /dev/null
+++ b/arch/x86/kernel/module.lds
@@ -0,0 +1,3 @@
+SECTIONS {
+ .got (NOLOAD) : { BYTE(0) }
+}
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 24/27] x86/mm: Make the x86 GOT read-only
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
The GOT is changed during early boot when relocations are applied. Make
it read-only directly. This table exists only for PIE binary.
Position Independent Executable (PIE) support will allow to extended the
KASLR randomization range below the -2G memory limit.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
include/asm-generic/vmlinux.lds.h | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index e373e2e10f6a..e5b0710fe693 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -314,6 +314,17 @@
__end_ro_after_init = .;
#endif
+#ifdef CONFIG_X86_PIE
+#define RO_GOT_X86 \
+ .got : AT(ADDR(.got) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start_got) = .; \
+ *(.got); \
+ VMLINUX_SYMBOL(__end_got) = .; \
+ }
+#else
+#define RO_GOT_X86
+#endif
+
/*
* Read only Data
*/
@@ -370,6 +381,7 @@
__end_builtin_fw = .; \
} \
\
+ RO_GOT_X86 \
TRACEDATA \
\
/* Kernel symbol table: Normal symbols */ \
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 25/27] x86/pie: Add option to build the kernel as PIE
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Add the CONFIG_X86_PIE option which builds the kernel as a Position
Independent Executable (PIE). The kernel is currently build with the
mcmodel=kernel option which forces it to stay on the top 2G of the
virtual address space. With PIE, the kernel will be able to move below
the current limit.
The --emit-relocs linker option was kept instead of using -pie to limit
the impact on mapped sections. Any incompatible relocation will be
catch by the arch/x86/tools/relocs binary at compile time.
If segment based stack cookies are enabled, try to use the compiler
option to select the segment register. If not available, automatically
enabled global stack cookie in auto mode. Otherwise, recommend
compiler update or global stack cookie option.
Performance/Size impact:
Size of vmlinux (Default configuration):
File size:
- PIE disabled: +0.18%
- PIE enabled: -1.977% (less relocations)
.text section:
- PIE disabled: same
- PIE enabled: same
Size of vmlinux (Ubuntu configuration):
File size:
- PIE disabled: +0.21%
- PIE enabled: +10%
.text section:
- PIE disabled: same
- PIE enabled: +0.001%
The size increase is mainly due to not having access to the 32-bit signed
relocation that can be used with mcmodel=kernel. A small part is due to reduced
optimization for PIE code. This bug [1] was opened with gcc to provide a better
code generation for kernel PIE.
Hackbench (50% and 1600% on thread/process for pipe/sockets):
- PIE disabled: no significant change (avg -/+ 0.5% on latest test).
- PIE enabled: between -1% to +1% in average (default and Ubuntu config).
Kernbench (average of 10 Half and Optimal runs):
Elapsed Time:
- PIE disabled: no significant change (avg -0.5%)
- PIE enabled: average -0.5% to +0.5%
System Time:
- PIE disabled: no significant change (avg -0.1%)
- PIE enabled: average -0.4% to +0.4%.
[1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82303
Signed-off-by: Thomas Garnier <thgarnie@google.com>
merge pie
---
arch/x86/Kconfig | 8 ++++++++
arch/x86/Makefile | 45 ++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 28eb2b3757bf..26d5d4942777 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2215,6 +2215,14 @@ config X86_GLOBAL_STACKPROTECTOR
If unsure, say N
+config X86_PIE
+ bool
+ depends on X86_64
+ select DEFAULT_HIDDEN
+ select WEAK_PROVIDE_HIDDEN
+ select DYNAMIC_MODULE_BASE
+ select MODULE_REL_CRCS if MODVERSIONS
+
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 20bb6cbd8938..c92bcca4400c 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -60,6 +60,8 @@ endif
KBUILD_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
+stackglobal := $(call cc-option-yn, -mstack-protector-guard=global)
+
ifeq ($(CONFIG_X86_32),y)
BITS := 32
UTS_MACHINE := i386
@@ -135,7 +137,48 @@ else
KBUILD_CFLAGS += -mno-red-zone
ifdef CONFIG_X86_PIE
+ KBUILD_CFLAGS += -fPIE
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/x86/kernel/module.lds
+
+ # Relax relocation in both CFLAGS and LDFLAGS to support older compilers
+ KBUILD_CFLAGS += $(call cc-option,-Wa$(comma)-mrelax-relocations=no)
+ LDFLAGS_vmlinux += $(call ld-option,--no-relax)
+ KBUILD_LDFLAGS_MODULE += $(call ld-option,--no-relax)
+
+ # Stack validation is not yet support due to self-referenced switches
+ifdef CONFIG_STACK_VALIDATION
+ $(warning CONFIG_STACK_VALIDATION is not yet supported for x86_64 pie \
+ build.)
+ SKIP_STACK_VALIDATION := 1
+ export SKIP_STACK_VALIDATION
+endif
+
+ifndef CONFIG_CC_STACKPROTECTOR_NONE
+ifndef CONFIG_X86_GLOBAL_STACKPROTECTOR
+ stackseg-flag := -mstack-protector-guard-reg=%gs
+ ifeq ($(call cc-option-yn,$(stackseg-flag)),n)
+ # Try to enable global stack cookie if possible
+ ifeq ($(stackglobal), y)
+ $(warning Cannot use CONFIG_CC_STACKPROTECTOR_* while \
+ building a position independent kernel. \
+ Default to global stack protector \
+ (CONFIG_X86_GLOBAL_STACKPROTECTOR).)
+ CONFIG_X86_GLOBAL_STACKPROTECTOR := y
+ KBUILD_CFLAGS += -DCONFIG_X86_GLOBAL_STACKPROTECTOR
+ KBUILD_AFLAGS += -DCONFIG_X86_GLOBAL_STACKPROTECTOR
+ else
+ $(error echo Cannot use \
+ CONFIG_CC_STACKPROTECTOR_(REGULAR|STRONG|AUTO) \
+ while building a position independent binary. \
+ Update your compiler or use \
+ CONFIG_X86_GLOBAL_STACKPROTECTOR)
+ endif
+ else
+ KBUILD_CFLAGS += $(stackseg-flag)
+ endif
+endif
+endif
+
else
KBUILD_CFLAGS += -mcmodel=kernel
endif
@@ -146,7 +189,7 @@ endif
endif
ifdef CONFIG_X86_GLOBAL_STACKPROTECTOR
- ifeq ($(call cc-option, -mstack-protector-guard=global),)
+ ifeq ($(stackglobal), n)
$(error Cannot use CONFIG_X86_GLOBAL_STACKPROTECTOR: \
-mstack-protector-guard=global not supported \
by compiler)
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 26/27] x86/relocs: Add option to generate 64-bit relocations
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
The x86 relocation tool generates a list of 32-bit signed integers. There
was no need to use 64-bit integers because all addresses where above the 2G
top of the memory.
This change add a large-reloc option to generate 64-bit unsigned integers.
It can be used when the kernel plan to go below the top 2G and 32-bit
integers are not enough.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/tools/relocs.c | 60 +++++++++++++++++++++++++++-------
arch/x86/tools/relocs.h | 4 +--
arch/x86/tools/relocs_common.c | 15 ++++++---
3 files changed, 60 insertions(+), 19 deletions(-)
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 29283ad3950f..a29cccceaac6 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -13,8 +13,14 @@
static Elf_Ehdr ehdr;
+#if ELF_BITS == 64
+typedef uint64_t rel_off_t;
+#else
+typedef uint32_t rel_off_t;
+#endif
+
struct relocs {
- uint32_t *offset;
+ rel_off_t *offset;
unsigned long count;
unsigned long size;
};
@@ -685,7 +691,7 @@ static void print_absolute_relocs(void)
printf("\n");
}
-static void add_reloc(struct relocs *r, uint32_t offset)
+static void add_reloc(struct relocs *r, rel_off_t offset)
{
if (r->count == r->size) {
unsigned long newsize = r->size + 50000;
@@ -1061,26 +1067,48 @@ static void sort_relocs(struct relocs *r)
qsort(r->offset, r->count, sizeof(r->offset[0]), cmp_relocs);
}
-static int write32(uint32_t v, FILE *f)
+static int write32(rel_off_t rel, FILE *f)
{
- unsigned char buf[4];
+ unsigned char buf[sizeof(uint32_t)];
+ uint32_t v = (uint32_t)rel;
put_unaligned_le32(v, buf);
- return fwrite(buf, 1, 4, f) == 4 ? 0 : -1;
+ return fwrite(buf, 1, sizeof(buf), f) == sizeof(buf) ? 0 : -1;
}
-static int write32_as_text(uint32_t v, FILE *f)
+static int write32_as_text(rel_off_t rel, FILE *f)
{
+ uint32_t v = (uint32_t)rel;
return fprintf(f, "\t.long 0x%08"PRIx32"\n", v) > 0 ? 0 : -1;
}
-static void emit_relocs(int as_text, int use_real_mode)
+static int write64(rel_off_t rel, FILE *f)
+{
+ unsigned char buf[sizeof(uint64_t)];
+ uint64_t v = (uint64_t)rel;
+
+ put_unaligned_le64(v, buf);
+ return fwrite(buf, 1, sizeof(buf), f) == sizeof(buf) ? 0 : -1;
+}
+
+static int write64_as_text(rel_off_t rel, FILE *f)
+{
+ uint64_t v = (uint64_t)rel;
+ return fprintf(f, "\t.quad 0x%016"PRIx64"\n", v) > 0 ? 0 : -1;
+}
+
+static void emit_relocs(int as_text, int use_real_mode, int use_large_reloc)
{
int i;
- int (*write_reloc)(uint32_t, FILE *) = write32;
+ int (*write_reloc)(rel_off_t, FILE *);
int (*do_reloc)(struct section *sec, Elf_Rel *rel, Elf_Sym *sym,
const char *symname);
+ if (use_large_reloc)
+ write_reloc = write64;
+ else
+ write_reloc = write32;
+
#if ELF_BITS == 64
if (!use_real_mode)
do_reloc = do_reloc64;
@@ -1091,6 +1119,9 @@ static void emit_relocs(int as_text, int use_real_mode)
do_reloc = do_reloc32;
else
do_reloc = do_reloc_real;
+
+ /* Large relocations only for 64-bit */
+ use_large_reloc = 0;
#endif
/* Collect up the relocations */
@@ -1114,8 +1145,13 @@ static void emit_relocs(int as_text, int use_real_mode)
* gas will like.
*/
printf(".section \".data.reloc\",\"a\"\n");
- printf(".balign 4\n");
- write_reloc = write32_as_text;
+ if (use_large_reloc) {
+ printf(".balign 8\n");
+ write_reloc = write64_as_text;
+ } else {
+ printf(".balign 4\n");
+ write_reloc = write32_as_text;
+ }
}
if (use_real_mode) {
@@ -1183,7 +1219,7 @@ static void print_reloc_info(void)
void process(FILE *fp, int use_real_mode, int as_text,
int show_absolute_syms, int show_absolute_relocs,
- int show_reloc_info)
+ int show_reloc_info, int use_large_reloc)
{
regex_init(use_real_mode);
read_ehdr(fp);
@@ -1206,5 +1242,5 @@ void process(FILE *fp, int use_real_mode, int as_text,
print_reloc_info();
return;
}
- emit_relocs(as_text, use_real_mode);
+ emit_relocs(as_text, use_real_mode, use_large_reloc);
}
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h
index 43c83c0fd22c..3d401da59df7 100644
--- a/arch/x86/tools/relocs.h
+++ b/arch/x86/tools/relocs.h
@@ -31,8 +31,8 @@ enum symtype {
void process_32(FILE *fp, int use_real_mode, int as_text,
int show_absolute_syms, int show_absolute_relocs,
- int show_reloc_info);
+ int show_reloc_info, int use_large_reloc);
void process_64(FILE *fp, int use_real_mode, int as_text,
int show_absolute_syms, int show_absolute_relocs,
- int show_reloc_info);
+ int show_reloc_info, int use_large_reloc);
#endif /* RELOCS_H */
diff --git a/arch/x86/tools/relocs_common.c b/arch/x86/tools/relocs_common.c
index 6634352a20bc..11f49adf1c06 100644
--- a/arch/x86/tools/relocs_common.c
+++ b/arch/x86/tools/relocs_common.c
@@ -12,14 +12,14 @@ void die(char *fmt, ...)
static void usage(void)
{
- die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode]" \
- " vmlinux\n");
+ die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode|" \
+ "--large-reloc] vmlinux\n");
}
int main(int argc, char **argv)
{
int show_absolute_syms, show_absolute_relocs, show_reloc_info;
- int as_text, use_real_mode;
+ int as_text, use_real_mode, use_large_reloc;
const char *fname;
FILE *fp;
int i;
@@ -30,6 +30,7 @@ int main(int argc, char **argv)
show_reloc_info = 0;
as_text = 0;
use_real_mode = 0;
+ use_large_reloc = 0;
fname = NULL;
for (i = 1; i < argc; i++) {
char *arg = argv[i];
@@ -54,6 +55,10 @@ int main(int argc, char **argv)
use_real_mode = 1;
continue;
}
+ if (strcmp(arg, "--large-reloc") == 0) {
+ use_large_reloc = 1;
+ continue;
+ }
}
else if (!fname) {
fname = arg;
@@ -75,11 +80,11 @@ int main(int argc, char **argv)
if (e_ident[EI_CLASS] == ELFCLASS64)
process_64(fp, use_real_mode, as_text,
show_absolute_syms, show_absolute_relocs,
- show_reloc_info);
+ show_reloc_info, use_large_reloc);
else
process_32(fp, use_real_mode, as_text,
show_absolute_syms, show_absolute_relocs,
- show_reloc_info);
+ show_reloc_info, use_large_reloc);
fclose(fp);
return 0;
}
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH v3 27/27] x86/kaslr: Add option to extend KASLR range from 1GB to 3GB
From: Thomas Garnier via Virtualization @ 2018-05-23 19:54 UTC (permalink / raw)
To: Herbert Xu, David S . Miller, Thomas Gleixner, Ingo Molnar,
H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Thomas Garnier, Philippe Ombredanne,
Kate Stewart, Arnaldo Carvalho de Melo, Yonghong Song,
Andrey Ryabinin, Kees Cook, Tom Lendacky, Kirill A . Shutemov,
Andy Lutomirski, Dominik Brodowski, Borislav Petkov,
Borislav Petkov, Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-1-thgarnie@google.com>
Add a new CONFIG_RANDOMIZE_BASE_LARGE option to benefit from PIE
support. It increases the KASLR range from 1GB to 3GB. The new range
stars at 0xffffffff00000000 just above the EFI memory region. This
option is off by default.
The boot code is adapted to create the appropriate page table spanning
three PUD pages.
The relocation table uses 64-bit integers generated with the updated
relocation tool with the large-reloc option.
Signed-off-by: Thomas Garnier <thgarnie@google.com>
---
arch/x86/Kconfig | 21 +++++++++++++++++++++
arch/x86/boot/compressed/Makefile | 5 +++++
arch/x86/boot/compressed/misc.c | 10 +++++++++-
arch/x86/include/asm/page_64_types.h | 9 +++++++++
arch/x86/kernel/head64.c | 15 ++++++++++++---
arch/x86/kernel/head_64.S | 11 ++++++++++-
6 files changed, 66 insertions(+), 5 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 26d5d4942777..3596a7a76ff0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2223,6 +2223,27 @@ config X86_PIE
select DYNAMIC_MODULE_BASE
select MODULE_REL_CRCS if MODVERSIONS
+config RANDOMIZE_BASE_LARGE
+ bool "Increase the randomization range of the kernel image"
+ depends on X86_64 && RANDOMIZE_BASE
+ select X86_PIE
+ select X86_MODULE_PLTS if MODULES
+ default n
+ ---help---
+ Build the kernel as a Position Independent Executable (PIE) and
+ increase the available randomization range from 1GB to 3GB.
+
+ This option impacts performance on kernel CPU intensive workloads up
+ to 10% due to PIE generated code. Impact on user-mode processes and
+ typical usage would be significantly less (0.50% when you build the
+ kernel).
+
+ The kernel and modules will generate slightly more assembly (1 to 2%
+ increase on the .text sections). The vmlinux binary will be
+ significantly smaller due to less relocations.
+
+ If unsure say N
+
config HOTPLUG_CPU
bool "Support for hot-pluggable CPUs"
depends on SMP
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index fa42f895fdde..8497ebd5e078 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -116,7 +116,12 @@ $(obj)/vmlinux.bin: vmlinux FORCE
targets += $(patsubst $(obj)/%,%,$(vmlinux-objs-y)) vmlinux.bin.all vmlinux.relocs
+# Large randomization require bigger relocation table
+ifeq ($(CONFIG_RANDOMIZE_BASE_LARGE),y)
+CMD_RELOCS = arch/x86/tools/relocs --large-reloc
+else
CMD_RELOCS = arch/x86/tools/relocs
+endif
quiet_cmd_relocs = RELOCS $@
cmd_relocs = $(CMD_RELOCS) $< > $@;$(CMD_RELOCS) --abs-relocs $<
$(obj)/vmlinux.relocs: vmlinux FORCE
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 8dd1d5ccae58..28d17bd5bad8 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -171,10 +171,18 @@ void __puthex(unsigned long value)
}
#if CONFIG_X86_NEED_RELOCS
+
+/* Large randomization go lower than -2G and use large relocation table */
+#ifdef CONFIG_RANDOMIZE_BASE_LARGE
+typedef long rel_t;
+#else
+typedef int rel_t;
+#endif
+
static void handle_relocations(void *output, unsigned long output_len,
unsigned long virt_addr)
{
- int *reloc;
+ rel_t *reloc;
unsigned long delta, map, ptr;
unsigned long min_addr = (unsigned long)output;
unsigned long max_addr = min_addr + (VO___bss_start - VO__text);
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 2c5a966dc222..85ea681421d2 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -46,7 +46,11 @@
#define __PAGE_OFFSET __PAGE_OFFSET_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
+#ifdef CONFIG_RANDOMIZE_BASE_LARGE
+#define __START_KERNEL_map _AC(0xffffffff00000000, UL)
+#else
#define __START_KERNEL_map _AC(0xffffffff80000000, UL)
+#endif /* CONFIG_RANDOMIZE_BASE_LARGE */
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
@@ -64,9 +68,14 @@
* 512MiB by default, leaving 1.5GiB for modules once the page tables
* are fully set up. If kernel ASLR is configured, it can extend the
* kernel page table mapping, reducing the size of the modules area.
+ * On PIE, we relocate the binary 2G lower so add this extra space.
*/
#if defined(CONFIG_RANDOMIZE_BASE)
+#ifdef CONFIG_RANDOMIZE_BASE_LARGE
+#define KERNEL_IMAGE_SIZE (_AC(3, UL) * 1024 * 1024 * 1024)
+#else
#define KERNEL_IMAGE_SIZE (1024 * 1024 * 1024)
+#endif
#else
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
#endif
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 3a1ce822e1c0..e18cc23b9d99 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -63,6 +63,7 @@ EXPORT_SYMBOL(vmemmap_base);
#endif
#define __head __section(.head.text)
+#define pud_count(x) (((x + (PUD_SIZE - 1)) & ~(PUD_SIZE - 1)) >> PUD_SHIFT)
/* Required for read_cr3 when building as PIE */
unsigned long __force_order;
@@ -118,6 +119,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
{
unsigned long load_delta, *p;
unsigned long pgtable_flags;
+ unsigned long level3_kernel_start, level3_kernel_count;
+ unsigned long level3_fixmap_start;
pgdval_t *pgd;
p4dval_t *p4d;
pudval_t *pud;
@@ -149,6 +152,11 @@ unsigned long __head __startup_64(unsigned long physaddr,
/* Include the SME encryption mask in the fixup value */
load_delta += sme_get_me_mask();
+ /* Look at the randomization spread to adapt page table used */
+ level3_kernel_start = pud_index(__START_KERNEL_map);
+ level3_kernel_count = pud_count(KERNEL_IMAGE_SIZE);
+ level3_fixmap_start = level3_kernel_start + level3_kernel_count;
+
/* Fixup the physical addresses in the page table */
pgd = fixup_pointer(&early_top_pgt, physaddr);
@@ -165,8 +173,9 @@ unsigned long __head __startup_64(unsigned long physaddr,
}
pud = fixup_pointer(&level3_kernel_pgt, physaddr);
- pud[510] += load_delta;
- pud[511] += load_delta;
+ for (i = 0; i < level3_kernel_count; i++)
+ pud[level3_kernel_start + i] += load_delta;
+ pud[level3_fixmap_start] += load_delta;
pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
pmd[506] += load_delta;
@@ -224,7 +233,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
*/
pmd = fixup_pointer(level2_kernel_pgt, physaddr);
- for (i = 0; i < PTRS_PER_PMD; i++) {
+ for (i = 0; i < PTRS_PER_PMD * level3_kernel_count; i++) {
if (pmd[i] & _PAGE_PRESENT)
pmd[i] += load_delta;
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index f44b259b26d3..50343b9ba5da 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -41,12 +41,16 @@
#define l4_index(x) (((x) >> 39) & 511)
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pud_count(x) (((x + (PUD_SIZE - 1)) & ~(PUD_SIZE - 1)) >> PUD_SHIFT)
L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
L4_START_KERNEL = l4_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
+/* Adapt page table L3 space based on range of randomization */
+L3_KERNEL_ENTRY_COUNT = pud_count(KERNEL_IMAGE_SIZE)
+
.text
__HEAD
.code64
@@ -441,7 +445,12 @@ NEXT_PAGE(level4_kernel_pgt)
NEXT_PAGE(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
- .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
+ i = 0
+ .rept L3_KERNEL_ENTRY_COUNT
+ .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC \
+ + PAGE_SIZE*i
+ i = i + 1
+ .endr
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
NEXT_PAGE(level2_kernel_pgt)
--
2.17.0.441.gb46fe60e1d-goog
^ permalink raw reply related
* [PATCH] block drivers/block: Use octal not symbolic permissions
From: Joe Perches @ 2018-05-23 20:05 UTC (permalink / raw)
To: Jens Axboe, Ed L. Cashin, Philipp Reisner, Lars Ellenberg,
Jiri Kosina, Josef Bacik, linux-block, Ilya Dryomov, Sage Weil,
Alex Elder, Joshua Morris, Philip Kelleher, Michael S. Tsirkin,
Jason Wang, Konrad Rzeszutek Wilk, Roger Pau Monné,
Boris Ostrovsky, Juergen Gross
Cc: linux-kernel, nbd, xen-devel, ceph-devel, virtualization,
drbd-dev
Convert the S_<FOO> symbolic permissions to their octal equivalents as
using octal and not symbolic permissions is preferred by many as more
readable.
see: https://lkml.org/lkml/2016/8/2/1945
Done with automated conversion via:
$ ./scripts/checkpatch.pl -f --types=SYMBOLIC_PERMS --fix-inplace <files...>
Miscellanea:
o Wrapped modified multi-line calls to a single line where appropriate
o Realign modified multi-line calls to open parenthesis
Signed-off-by: Joe Perches <joe@perches.com>
---
block/blk-integrity.c | 12 +++----
block/blk-mq-sysfs.c | 6 ++--
block/blk-sysfs.c | 68 ++++++++++++++++++-------------------
block/cfq-iosched.c | 2 +-
block/deadline-iosched.c | 3 +-
block/genhd.c | 37 ++++++++++----------
block/mq-deadline.c | 3 +-
block/partition-generic.c | 22 ++++++------
drivers/block/DAC960.c | 3 +-
drivers/block/aoe/aoeblk.c | 10 +++---
drivers/block/brd.c | 6 ++--
drivers/block/drbd/drbd_debugfs.c | 20 +++++------
drivers/block/drbd/drbd_main.c | 4 +--
drivers/block/floppy.c | 2 +-
drivers/block/loop.c | 6 ++--
drivers/block/mtip32xx/mtip32xx.c | 11 +++---
drivers/block/nbd.c | 2 +-
drivers/block/null_blk.c | 30 ++++++++--------
drivers/block/pktcdvd.c | 4 +--
drivers/block/rbd.c | 44 ++++++++++++------------
drivers/block/rsxx/core.c | 6 ++--
drivers/block/virtio_blk.c | 6 ++--
drivers/block/xen-blkback/blkback.c | 2 +-
drivers/block/xen-blkback/xenbus.c | 4 +--
drivers/block/xen-blkfront.c | 7 ++--
25 files changed, 156 insertions(+), 164 deletions(-)
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index feb30570eaf5..6121611e1316 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -333,34 +333,34 @@ static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
}
static struct integrity_sysfs_entry integrity_format_entry = {
- .attr = { .name = "format", .mode = S_IRUGO },
+ .attr = { .name = "format", .mode = 0444 },
.show = integrity_format_show,
};
static struct integrity_sysfs_entry integrity_tag_size_entry = {
- .attr = { .name = "tag_size", .mode = S_IRUGO },
+ .attr = { .name = "tag_size", .mode = 0444 },
.show = integrity_tag_size_show,
};
static struct integrity_sysfs_entry integrity_interval_entry = {
- .attr = { .name = "protection_interval_bytes", .mode = S_IRUGO },
+ .attr = { .name = "protection_interval_bytes", .mode = 0444 },
.show = integrity_interval_show,
};
static struct integrity_sysfs_entry integrity_verify_entry = {
- .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
+ .attr = { .name = "read_verify", .mode = 0644 },
.show = integrity_verify_show,
.store = integrity_verify_store,
};
static struct integrity_sysfs_entry integrity_generate_entry = {
- .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
+ .attr = { .name = "write_generate", .mode = 0644 },
.show = integrity_generate_show,
.store = integrity_generate_store,
};
static struct integrity_sysfs_entry integrity_device_entry = {
- .attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
+ .attr = { .name = "device_is_integrity_capable", .mode = 0444 },
.show = integrity_device_show,
};
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index a54b4b070f1c..aafb44224c89 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -166,15 +166,15 @@ static struct attribute *default_ctx_attrs[] = {
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = {
- .attr = {.name = "nr_tags", .mode = S_IRUGO },
+ .attr = {.name = "nr_tags", .mode = 0444 },
.show = blk_mq_hw_sysfs_nr_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_reserved_tags = {
- .attr = {.name = "nr_reserved_tags", .mode = S_IRUGO },
+ .attr = {.name = "nr_reserved_tags", .mode = 0444 },
.show = blk_mq_hw_sysfs_nr_reserved_tags_show,
};
static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_cpus = {
- .attr = {.name = "cpu_list", .mode = S_IRUGO },
+ .attr = {.name = "cpu_list", .mode = 0444 },
.show = blk_mq_hw_sysfs_cpus_show,
};
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index cae525b7aae6..31347e31daa3 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -502,187 +502,187 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
}
static struct queue_sysfs_entry queue_requests_entry = {
- .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "nr_requests", .mode = 0644 },
.show = queue_requests_show,
.store = queue_requests_store,
};
static struct queue_sysfs_entry queue_ra_entry = {
- .attr = {.name = "read_ahead_kb", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "read_ahead_kb", .mode = 0644 },
.show = queue_ra_show,
.store = queue_ra_store,
};
static struct queue_sysfs_entry queue_max_sectors_entry = {
- .attr = {.name = "max_sectors_kb", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "max_sectors_kb", .mode = 0644 },
.show = queue_max_sectors_show,
.store = queue_max_sectors_store,
};
static struct queue_sysfs_entry queue_max_hw_sectors_entry = {
- .attr = {.name = "max_hw_sectors_kb", .mode = S_IRUGO },
+ .attr = {.name = "max_hw_sectors_kb", .mode = 0444 },
.show = queue_max_hw_sectors_show,
};
static struct queue_sysfs_entry queue_max_segments_entry = {
- .attr = {.name = "max_segments", .mode = S_IRUGO },
+ .attr = {.name = "max_segments", .mode = 0444 },
.show = queue_max_segments_show,
};
static struct queue_sysfs_entry queue_max_discard_segments_entry = {
- .attr = {.name = "max_discard_segments", .mode = S_IRUGO },
+ .attr = {.name = "max_discard_segments", .mode = 0444 },
.show = queue_max_discard_segments_show,
};
static struct queue_sysfs_entry queue_max_integrity_segments_entry = {
- .attr = {.name = "max_integrity_segments", .mode = S_IRUGO },
+ .attr = {.name = "max_integrity_segments", .mode = 0444 },
.show = queue_max_integrity_segments_show,
};
static struct queue_sysfs_entry queue_max_segment_size_entry = {
- .attr = {.name = "max_segment_size", .mode = S_IRUGO },
+ .attr = {.name = "max_segment_size", .mode = 0444 },
.show = queue_max_segment_size_show,
};
static struct queue_sysfs_entry queue_iosched_entry = {
- .attr = {.name = "scheduler", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "scheduler", .mode = 0644 },
.show = elv_iosched_show,
.store = elv_iosched_store,
};
static struct queue_sysfs_entry queue_hw_sector_size_entry = {
- .attr = {.name = "hw_sector_size", .mode = S_IRUGO },
+ .attr = {.name = "hw_sector_size", .mode = 0444 },
.show = queue_logical_block_size_show,
};
static struct queue_sysfs_entry queue_logical_block_size_entry = {
- .attr = {.name = "logical_block_size", .mode = S_IRUGO },
+ .attr = {.name = "logical_block_size", .mode = 0444 },
.show = queue_logical_block_size_show,
};
static struct queue_sysfs_entry queue_physical_block_size_entry = {
- .attr = {.name = "physical_block_size", .mode = S_IRUGO },
+ .attr = {.name = "physical_block_size", .mode = 0444 },
.show = queue_physical_block_size_show,
};
static struct queue_sysfs_entry queue_chunk_sectors_entry = {
- .attr = {.name = "chunk_sectors", .mode = S_IRUGO },
+ .attr = {.name = "chunk_sectors", .mode = 0444 },
.show = queue_chunk_sectors_show,
};
static struct queue_sysfs_entry queue_io_min_entry = {
- .attr = {.name = "minimum_io_size", .mode = S_IRUGO },
+ .attr = {.name = "minimum_io_size", .mode = 0444 },
.show = queue_io_min_show,
};
static struct queue_sysfs_entry queue_io_opt_entry = {
- .attr = {.name = "optimal_io_size", .mode = S_IRUGO },
+ .attr = {.name = "optimal_io_size", .mode = 0444 },
.show = queue_io_opt_show,
};
static struct queue_sysfs_entry queue_discard_granularity_entry = {
- .attr = {.name = "discard_granularity", .mode = S_IRUGO },
+ .attr = {.name = "discard_granularity", .mode = 0444 },
.show = queue_discard_granularity_show,
};
static struct queue_sysfs_entry queue_discard_max_hw_entry = {
- .attr = {.name = "discard_max_hw_bytes", .mode = S_IRUGO },
+ .attr = {.name = "discard_max_hw_bytes", .mode = 0444 },
.show = queue_discard_max_hw_show,
};
static struct queue_sysfs_entry queue_discard_max_entry = {
- .attr = {.name = "discard_max_bytes", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "discard_max_bytes", .mode = 0644 },
.show = queue_discard_max_show,
.store = queue_discard_max_store,
};
static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
- .attr = {.name = "discard_zeroes_data", .mode = S_IRUGO },
+ .attr = {.name = "discard_zeroes_data", .mode = 0444 },
.show = queue_discard_zeroes_data_show,
};
static struct queue_sysfs_entry queue_write_same_max_entry = {
- .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
+ .attr = {.name = "write_same_max_bytes", .mode = 0444 },
.show = queue_write_same_max_show,
};
static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
- .attr = {.name = "write_zeroes_max_bytes", .mode = S_IRUGO },
+ .attr = {.name = "write_zeroes_max_bytes", .mode = 0444 },
.show = queue_write_zeroes_max_show,
};
static struct queue_sysfs_entry queue_nonrot_entry = {
- .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "rotational", .mode = 0644 },
.show = queue_show_nonrot,
.store = queue_store_nonrot,
};
static struct queue_sysfs_entry queue_zoned_entry = {
- .attr = {.name = "zoned", .mode = S_IRUGO },
+ .attr = {.name = "zoned", .mode = 0444 },
.show = queue_zoned_show,
};
static struct queue_sysfs_entry queue_nomerges_entry = {
- .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "nomerges", .mode = 0644 },
.show = queue_nomerges_show,
.store = queue_nomerges_store,
};
static struct queue_sysfs_entry queue_rq_affinity_entry = {
- .attr = {.name = "rq_affinity", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "rq_affinity", .mode = 0644 },
.show = queue_rq_affinity_show,
.store = queue_rq_affinity_store,
};
static struct queue_sysfs_entry queue_iostats_entry = {
- .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "iostats", .mode = 0644 },
.show = queue_show_iostats,
.store = queue_store_iostats,
};
static struct queue_sysfs_entry queue_random_entry = {
- .attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "add_random", .mode = 0644 },
.show = queue_show_random,
.store = queue_store_random,
};
static struct queue_sysfs_entry queue_poll_entry = {
- .attr = {.name = "io_poll", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "io_poll", .mode = 0644 },
.show = queue_poll_show,
.store = queue_poll_store,
};
static struct queue_sysfs_entry queue_poll_delay_entry = {
- .attr = {.name = "io_poll_delay", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "io_poll_delay", .mode = 0644 },
.show = queue_poll_delay_show,
.store = queue_poll_delay_store,
};
static struct queue_sysfs_entry queue_wc_entry = {
- .attr = {.name = "write_cache", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "write_cache", .mode = 0644 },
.show = queue_wc_show,
.store = queue_wc_store,
};
static struct queue_sysfs_entry queue_fua_entry = {
- .attr = {.name = "fua", .mode = S_IRUGO },
+ .attr = {.name = "fua", .mode = 0444 },
.show = queue_fua_show,
};
static struct queue_sysfs_entry queue_dax_entry = {
- .attr = {.name = "dax", .mode = S_IRUGO },
+ .attr = {.name = "dax", .mode = 0444 },
.show = queue_dax_show,
};
static struct queue_sysfs_entry queue_wb_lat_entry = {
- .attr = {.name = "wbt_lat_usec", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "wbt_lat_usec", .mode = 0644 },
.show = queue_wb_lat_show,
.store = queue_wb_lat_store,
};
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
static struct queue_sysfs_entry throtl_sample_time_entry = {
- .attr = {.name = "throttle_sample_time", .mode = S_IRUGO | S_IWUSR },
+ .attr = {.name = "throttle_sample_time", .mode = 0644 },
.show = blk_throtl_sample_time_show,
.store = blk_throtl_sample_time_store,
};
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6b9f6b1cd33b..82b6c27b3245 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -4786,7 +4786,7 @@ USEC_STORE_FUNCTION(cfq_target_latency_us_store, &cfqd->cfq_target_latency, 1, U
#undef USEC_STORE_FUNCTION
#define CFQ_ATTR(name) \
- __ATTR(name, S_IRUGO|S_IWUSR, cfq_##name##_show, cfq_##name##_store)
+ __ATTR(name, 0644, cfq_##name##_show, cfq_##name##_store)
static struct elv_fs_entry cfq_attrs[] = {
CFQ_ATTR(quantum),
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c
index 9de9f156e203..ef2f1f09e9b3 100644
--- a/block/deadline-iosched.c
+++ b/block/deadline-iosched.c
@@ -512,8 +512,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
#undef STORE_FUNCTION
#define DD_ATTR(name) \
- __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
- deadline_##name##_store)
+ __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
static struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(read_expire),
diff --git a/block/genhd.c b/block/genhd.c
index 066de6d321c7..cb65a3013480 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1127,28 +1127,25 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
}
-static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
-static DEVICE_ATTR(ext_range, S_IRUGO, disk_ext_range_show, NULL);
-static DEVICE_ATTR(removable, S_IRUGO, disk_removable_show, NULL);
-static DEVICE_ATTR(hidden, S_IRUGO, disk_hidden_show, NULL);
-static DEVICE_ATTR(ro, S_IRUGO, disk_ro_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
-static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
-static DEVICE_ATTR(discard_alignment, S_IRUGO, disk_discard_alignment_show,
- NULL);
-static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
-static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
-static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show,
- disk_badblocks_store);
+static DEVICE_ATTR(range, 0444, disk_range_show, NULL);
+static DEVICE_ATTR(ext_range, 0444, disk_ext_range_show, NULL);
+static DEVICE_ATTR(removable, 0444, disk_removable_show, NULL);
+static DEVICE_ATTR(hidden, 0444, disk_hidden_show, NULL);
+static DEVICE_ATTR(ro, 0444, disk_ro_show, NULL);
+static DEVICE_ATTR(size, 0444, part_size_show, NULL);
+static DEVICE_ATTR(alignment_offset, 0444, disk_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, 0444, disk_discard_alignment_show, NULL);
+static DEVICE_ATTR(capability, 0444, disk_capability_show, NULL);
+static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
+static DEVICE_ATTR(badblocks, 0644, disk_badblocks_show, disk_badblocks_store);
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
- __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+ __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
#endif
#ifdef CONFIG_FAIL_IO_TIMEOUT
static struct device_attribute dev_attr_fail_timeout =
- __ATTR(io-timeout-fail, S_IRUGO|S_IWUSR, part_timeout_show,
- part_timeout_store);
+ __ATTR(io-timeout-fail, 0644, part_timeout_show, part_timeout_store);
#endif
static struct attribute *disk_attrs[] = {
@@ -1900,9 +1897,9 @@ static ssize_t disk_events_poll_msecs_store(struct device *dev,
return count;
}
-static const DEVICE_ATTR(events, S_IRUGO, disk_events_show, NULL);
-static const DEVICE_ATTR(events_async, S_IRUGO, disk_events_async_show, NULL);
-static const DEVICE_ATTR(events_poll_msecs, S_IRUGO|S_IWUSR,
+static const DEVICE_ATTR(events, 0444, disk_events_show, NULL);
+static const DEVICE_ATTR(events_async, 0444, disk_events_async_show, NULL);
+static const DEVICE_ATTR(events_poll_msecs, 0644,
disk_events_poll_msecs_show,
disk_events_poll_msecs_store);
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 8ec0ba9f5386..099a9e05854c 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -630,8 +630,7 @@ STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
#undef STORE_FUNCTION
#define DD_ATTR(name) \
- __ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
- deadline_##name##_store)
+ __ATTR(name, 0644, deadline_##name##_show, deadline_##name##_store)
static struct elv_fs_entry deadline_attrs[] = {
DD_ATTR(read_expire),
diff --git a/block/partition-generic.c b/block/partition-generic.c
index db57cced9b98..93ef8ce568a1 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -179,18 +179,17 @@ ssize_t part_fail_store(struct device *dev,
}
#endif
-static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL);
-static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
-static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
-static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL);
-static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
-static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show,
- NULL);
-static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
-static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(partition, 0444, part_partition_show, NULL);
+static DEVICE_ATTR(start, 0444, part_start_show, NULL);
+static DEVICE_ATTR(size, 0444, part_size_show, NULL);
+static DEVICE_ATTR(ro, 0444, part_ro_show, NULL);
+static DEVICE_ATTR(alignment_offset, 0444, part_alignment_offset_show, NULL);
+static DEVICE_ATTR(discard_alignment, 0444, part_discard_alignment_show, NULL);
+static DEVICE_ATTR(stat, 0444, part_stat_show, NULL);
+static DEVICE_ATTR(inflight, 0444, part_inflight_show, NULL);
#ifdef CONFIG_FAIL_MAKE_REQUEST
static struct device_attribute dev_attr_fail =
- __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
+ __ATTR(make-it-fail, 0644, part_fail_show, part_fail_store);
#endif
static struct attribute *part_attrs[] = {
@@ -291,8 +290,7 @@ static ssize_t whole_disk_show(struct device *dev,
{
return 0;
}
-static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH,
- whole_disk_show, NULL);
+static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
/*
* Must be called either with bd_mutex held, before a disk can be opened or
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 55d9ff54ce15..b7e4aa624667 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6552,7 +6552,8 @@ static void DAC960_CreateProcEntries(DAC960_Controller_T *Controller)
dac960_initial_status_proc_show, Controller);
proc_create_single_data("current_status", 0, ControllerProcEntry,
dac960_current_status_proc_show, Controller);
- proc_create_data("user_command", S_IWUSR | S_IRUSR, ControllerProcEntry, &dac960_user_command_proc_fops, Controller);
+ proc_create_data("user_command", 0600, ControllerProcEntry,
+ &dac960_user_command_proc_fops, Controller);
Controller->ControllerProcEntry = ControllerProcEntry;
}
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 027b876370bc..429ebb84b592 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -159,14 +159,14 @@ static int aoe_debugfs_open(struct inode *inode, struct file *file)
return single_open(file, aoedisk_debugfs_show, inode->i_private);
}
-static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
-static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
-static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
+static DEVICE_ATTR(state, 0444, aoedisk_show_state, NULL);
+static DEVICE_ATTR(mac, 0444, aoedisk_show_mac, NULL);
+static DEVICE_ATTR(netif, 0444, aoedisk_show_netif, NULL);
static struct device_attribute dev_attr_firmware_version = {
- .attr = { .name = "firmware-version", .mode = S_IRUGO },
+ .attr = { .name = "firmware-version", .mode = 0444 },
.show = aoedisk_show_fwver,
};
-static DEVICE_ATTR(payload, S_IRUGO, aoedisk_show_payload, NULL);
+static DEVICE_ATTR(payload, 0444, aoedisk_show_payload, NULL);
static struct attribute *aoe_attrs[] = {
&dev_attr_state.attr,
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 39c5b90cc187..bb976598ee43 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -331,15 +331,15 @@ static const struct block_device_operations brd_fops = {
* And now the modules code and kernel interface.
*/
static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT;
-module_param(rd_nr, int, S_IRUGO);
+module_param(rd_nr, int, 0444);
MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices");
unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE;
-module_param(rd_size, ulong, S_IRUGO);
+module_param(rd_size, ulong, 0444);
MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes.");
static int max_part = 1;
-module_param(max_part, int, S_IRUGO);
+module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Num Minors to reserve between devices");
MODULE_LICENSE("GPL");
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index ab21976a87b2..5d5e8d6a8a56 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -481,9 +481,9 @@ void drbd_debugfs_resource_add(struct drbd_resource *resource)
goto fail;
resource->debugfs_res_connections = dentry;
- dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
- resource->debugfs_res, resource,
- &in_flight_summary_fops);
+ dentry = debugfs_create_file("in_flight_summary", 0440,
+ resource->debugfs_res, resource,
+ &in_flight_summary_fops);
if (IS_ERR_OR_NULL(dentry))
goto fail;
resource->debugfs_res_in_flight_summary = dentry;
@@ -645,16 +645,16 @@ void drbd_debugfs_connection_add(struct drbd_connection *connection)
goto fail;
connection->debugfs_conn = dentry;
- dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
- connection->debugfs_conn, connection,
- &connection_callback_history_fops);
+ dentry = debugfs_create_file("callback_history", 0440,
+ connection->debugfs_conn, connection,
+ &connection_callback_history_fops);
if (IS_ERR_OR_NULL(dentry))
goto fail;
connection->debugfs_conn_callback_history = dentry;
- dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
- connection->debugfs_conn, connection,
- &connection_oldest_requests_fops);
+ dentry = debugfs_create_file("oldest_requests", 0440,
+ connection->debugfs_conn, connection,
+ &connection_oldest_requests_fops);
if (IS_ERR_OR_NULL(dentry))
goto fail;
connection->debugfs_conn_oldest_requests = dentry;
@@ -824,7 +824,7 @@ void drbd_debugfs_device_add(struct drbd_device *device)
device->debugfs_minor = dentry;
#define DCF(name) do { \
- dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \
+ dentry = debugfs_create_file(#name, 0440, \
device->debugfs_vol, device, \
&device_ ## name ## _fops); \
if (IS_ERR_OR_NULL(dentry)) \
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c2d154faac02..7916e0af25ec 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3010,8 +3010,8 @@ static int __init drbd_init(void)
goto fail;
err = -ENOMEM;
- drbd_proc = proc_create_single("drbd", S_IFREG | S_IRUGO , NULL,
- drbd_seq_show);
+ drbd_proc = proc_create_single("drbd", S_IFREG | 0444, NULL,
+ drbd_seq_show);
if (!drbd_proc) {
pr_err("unable to register proc file\n");
goto fail;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 8ec7235fc93b..8871b5044d9e 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4450,7 +4450,7 @@ static ssize_t floppy_cmos_show(struct device *dev,
return sprintf(buf, "%X\n", UDP->cmos);
}
-static DEVICE_ATTR(cmos, S_IRUGO, floppy_cmos_show, NULL);
+static DEVICE_ATTR(cmos, 0444, floppy_cmos_show, NULL);
static struct attribute *floppy_dev_attrs[] = {
&dev_attr_cmos.attr,
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 5f0df2efc26c..66c1f0786ae8 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -732,7 +732,7 @@ static ssize_t loop_attr_do_show_##_name(struct device *d, \
return loop_attr_show(d, b, loop_attr_##_name##_show); \
} \
static struct device_attribute loop_attr_##_name = \
- __ATTR(_name, S_IRUGO, loop_attr_do_show_##_name, NULL);
+ __ATTR(_name, 0444, loop_attr_do_show_##_name, NULL);
static ssize_t loop_attr_backing_file_show(struct loop_device *lo, char *buf)
{
@@ -1677,9 +1677,9 @@ static const struct block_device_operations lo_fops = {
* And now the modules code and kernel interface.
*/
static int max_loop;
-module_param(max_loop, int, S_IRUGO);
+module_param(max_loop, int, 0444);
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
-module_param(max_part, int, S_IRUGO);
+module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
MODULE_LICENSE("GPL");
MODULE_ALIAS_BLOCKDEV_MAJOR(LOOP_MAJOR);
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 6df5b0b1517a..e873daca9d13 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2285,7 +2285,7 @@ static ssize_t mtip_hw_show_status(struct device *dev,
return size;
}
-static DEVICE_ATTR(status, S_IRUGO, mtip_hw_show_status, NULL);
+static DEVICE_ATTR(status, 0444, mtip_hw_show_status, NULL);
/* debugsfs entries */
@@ -2566,10 +2566,9 @@ static int mtip_hw_debugfs_init(struct driver_data *dd)
return -1;
}
- debugfs_create_file("flags", S_IRUGO, dd->dfs_node, dd,
- &mtip_flags_fops);
- debugfs_create_file("registers", S_IRUGO, dd->dfs_node, dd,
- &mtip_regs_fops);
+ debugfs_create_file("flags", 0444, dd->dfs_node, dd, &mtip_flags_fops);
+ debugfs_create_file("registers", 0444, dd->dfs_node, dd,
+ &mtip_regs_fops);
return 0;
}
@@ -4613,7 +4612,7 @@ static int __init mtip_init(void)
}
if (dfs_parent) {
dfs_device_status = debugfs_create_file("device_status",
- S_IRUGO, dfs_parent, NULL,
+ 0444, dfs_parent, NULL,
&mtip_device_status_fops);
if (IS_ERR_OR_NULL(dfs_device_status)) {
pr_err("Error creating device_status node\n");
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index abc0a815354f..54dd91a67e03 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -166,7 +166,7 @@ static ssize_t pid_show(struct device *dev,
}
static const struct device_attribute pid_attr = {
- .attr = { .name = "pid", .mode = S_IRUGO},
+ .attr = { .name = "pid", .mode = 0444},
.show = pid_show,
};
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index a76553293a31..b4d368e3ddcd 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -157,23 +157,23 @@ enum {
};
static int g_no_sched;
-module_param_named(no_sched, g_no_sched, int, S_IRUGO);
+module_param_named(no_sched, g_no_sched, int, 0444);
MODULE_PARM_DESC(no_sched, "No io scheduler");
static int g_submit_queues = 1;
-module_param_named(submit_queues, g_submit_queues, int, S_IRUGO);
+module_param_named(submit_queues, g_submit_queues, int, 0444);
MODULE_PARM_DESC(submit_queues, "Number of submission queues");
static int g_home_node = NUMA_NO_NODE;
-module_param_named(home_node, g_home_node, int, S_IRUGO);
+module_param_named(home_node, g_home_node, int, 0444);
MODULE_PARM_DESC(home_node, "Home node for the device");
#ifdef CONFIG_BLK_DEV_NULL_BLK_FAULT_INJECTION
static char g_timeout_str[80];
-module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), S_IRUGO);
+module_param_string(timeout, g_timeout_str, sizeof(g_timeout_str), 0444);
static char g_requeue_str[80];
-module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), S_IRUGO);
+module_param_string(requeue, g_requeue_str, sizeof(g_requeue_str), 0444);
#endif
static int g_queue_mode = NULL_Q_MQ;
@@ -203,27 +203,27 @@ static const struct kernel_param_ops null_queue_mode_param_ops = {
.get = param_get_int,
};
-device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, S_IRUGO);
+device_param_cb(queue_mode, &null_queue_mode_param_ops, &g_queue_mode, 0444);
MODULE_PARM_DESC(queue_mode, "Block interface to use (0=bio,1=rq,2=multiqueue)");
static int g_gb = 250;
-module_param_named(gb, g_gb, int, S_IRUGO);
+module_param_named(gb, g_gb, int, 0444);
MODULE_PARM_DESC(gb, "Size in GB");
static int g_bs = 512;
-module_param_named(bs, g_bs, int, S_IRUGO);
+module_param_named(bs, g_bs, int, 0444);
MODULE_PARM_DESC(bs, "Block size (in bytes)");
static int nr_devices = 1;
-module_param(nr_devices, int, S_IRUGO);
+module_param(nr_devices, int, 0444);
MODULE_PARM_DESC(nr_devices, "Number of devices to register");
static bool g_blocking;
-module_param_named(blocking, g_blocking, bool, S_IRUGO);
+module_param_named(blocking, g_blocking, bool, 0444);
MODULE_PARM_DESC(blocking, "Register as a blocking blk-mq driver device");
static bool shared_tags;
-module_param(shared_tags, bool, S_IRUGO);
+module_param(shared_tags, bool, 0444);
MODULE_PARM_DESC(shared_tags, "Share tag set between devices for blk-mq");
static int g_irqmode = NULL_IRQ_SOFTIRQ;
@@ -239,19 +239,19 @@ static const struct kernel_param_ops null_irqmode_param_ops = {
.get = param_get_int,
};
-device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, S_IRUGO);
+device_param_cb(irqmode, &null_irqmode_param_ops, &g_irqmode, 0444);
MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer");
static unsigned long g_completion_nsec = 10000;
-module_param_named(completion_nsec, g_completion_nsec, ulong, S_IRUGO);
+module_param_named(completion_nsec, g_completion_nsec, ulong, 0444);
MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns");
static int g_hw_queue_depth = 64;
-module_param_named(hw_queue_depth, g_hw_queue_depth, int, S_IRUGO);
+module_param_named(hw_queue_depth, g_hw_queue_depth, int, 0444);
MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64");
static bool g_use_per_node_hctx;
-module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, S_IRUGO);
+module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
static struct nullb_device *null_alloc_dev(void);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 3c1a49545e06..949a648fe560 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -478,8 +478,8 @@ static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
if (!pd->dfs_d_root)
return;
- pd->dfs_f_info = debugfs_create_file("info", S_IRUGO,
- pd->dfs_d_root, pd, &debug_fops);
+ pd->dfs_f_info = debugfs_create_file("info", 0444,
+ pd->dfs_d_root, pd, &debug_fops);
}
static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 33b36fea1d73..af354047ac4b 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -424,7 +424,7 @@ static struct workqueue_struct *rbd_wq;
* single-major requires >= 0.75 version of userspace rbd utility.
*/
static bool single_major = true;
-module_param(single_major, bool, S_IRUGO);
+module_param(single_major, bool, 0444);
MODULE_PARM_DESC(single_major, "Use a single major number for all rbd devices (default: true)");
static ssize_t rbd_add(struct bus_type *bus, const char *buf,
@@ -468,11 +468,11 @@ static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf)
return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED);
}
-static BUS_ATTR(add, S_IWUSR, NULL, rbd_add);
-static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove);
-static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major);
-static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major);
-static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL);
+static BUS_ATTR(add, 0200, NULL, rbd_add);
+static BUS_ATTR(remove, 0200, NULL, rbd_remove);
+static BUS_ATTR(add_single_major, 0200, NULL, rbd_add_single_major);
+static BUS_ATTR(remove_single_major, 0200, NULL, rbd_remove_single_major);
+static BUS_ATTR(supported_features, 0444, rbd_supported_features_show, NULL);
static struct attribute *rbd_bus_attrs[] = {
&bus_attr_add.attr,
@@ -4204,22 +4204,22 @@ static ssize_t rbd_image_refresh(struct device *dev,
return size;
}
-static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
-static DEVICE_ATTR(features, S_IRUGO, rbd_features_show, NULL);
-static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
-static DEVICE_ATTR(minor, S_IRUGO, rbd_minor_show, NULL);
-static DEVICE_ATTR(client_addr, S_IRUGO, rbd_client_addr_show, NULL);
-static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
-static DEVICE_ATTR(cluster_fsid, S_IRUGO, rbd_cluster_fsid_show, NULL);
-static DEVICE_ATTR(config_info, S_IRUSR, rbd_config_info_show, NULL);
-static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
-static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
-static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
-static DEVICE_ATTR(image_id, S_IRUGO, rbd_image_id_show, NULL);
-static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
-static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
-static DEVICE_ATTR(snap_id, S_IRUGO, rbd_snap_id_show, NULL);
-static DEVICE_ATTR(parent, S_IRUGO, rbd_parent_show, NULL);
+static DEVICE_ATTR(size, 0444, rbd_size_show, NULL);
+static DEVICE_ATTR(features, 0444, rbd_features_show, NULL);
+static DEVICE_ATTR(major, 0444, rbd_major_show, NULL);
+static DEVICE_ATTR(minor, 0444, rbd_minor_show, NULL);
+static DEVICE_ATTR(client_addr, 0444, rbd_client_addr_show, NULL);
+static DEVICE_ATTR(client_id, 0444, rbd_client_id_show, NULL);
+static DEVICE_ATTR(cluster_fsid, 0444, rbd_cluster_fsid_show, NULL);
+static DEVICE_ATTR(config_info, 0400, rbd_config_info_show, NULL);
+static DEVICE_ATTR(pool, 0444, rbd_pool_show, NULL);
+static DEVICE_ATTR(pool_id, 0444, rbd_pool_id_show, NULL);
+static DEVICE_ATTR(name, 0444, rbd_name_show, NULL);
+static DEVICE_ATTR(image_id, 0444, rbd_image_id_show, NULL);
+static DEVICE_ATTR(refresh, 0200, NULL, rbd_image_refresh);
+static DEVICE_ATTR(current_snap, 0444, rbd_snap_show, NULL);
+static DEVICE_ATTR(snap_id, 0444, rbd_snap_id_show, NULL);
+static DEVICE_ATTR(parent, 0444, rbd_parent_show, NULL);
static struct attribute *rbd_attrs[] = {
&dev_attr_size.attr,
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 34997df132e2..09537bee387f 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -247,19 +247,19 @@ static void rsxx_debugfs_dev_new(struct rsxx_cardinfo *card)
if (IS_ERR_OR_NULL(card->debugfs_dir))
goto failed_debugfs_dir;
- debugfs_stats = debugfs_create_file("stats", S_IRUGO,
+ debugfs_stats = debugfs_create_file("stats", 0444,
card->debugfs_dir, card,
&debugfs_stats_fops);
if (IS_ERR_OR_NULL(debugfs_stats))
goto failed_debugfs_stats;
- debugfs_pci_regs = debugfs_create_file("pci_regs", S_IRUGO,
+ debugfs_pci_regs = debugfs_create_file("pci_regs", 0444,
card->debugfs_dir, card,
&debugfs_pci_regs_fops);
if (IS_ERR_OR_NULL(debugfs_pci_regs))
goto failed_debugfs_pci_regs;
- debugfs_cram = debugfs_create_file("cram", S_IRUGO | S_IWUSR,
+ debugfs_cram = debugfs_create_file("cram", 0644,
card->debugfs_dir, card,
&debugfs_cram_fops);
if (IS_ERR_OR_NULL(debugfs_cram))
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0617b9922d59..23752dc99b00 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -371,7 +371,7 @@ static ssize_t virtblk_serial_show(struct device *dev,
return err;
}
-static DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
+static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL);
/* The queue's logical block size must be set before calling this */
static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize)
@@ -576,10 +576,10 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
}
static const struct device_attribute dev_attr_cache_type_ro =
- __ATTR(cache_type, S_IRUGO,
+ __ATTR(cache_type, 0444,
virtblk_cache_type_show, NULL);
static const struct device_attribute dev_attr_cache_type_rw =
- __ATTR(cache_type, S_IRUGO|S_IWUSR,
+ __ATTR(cache_type, 0644,
virtblk_cache_type_show, virtblk_cache_type_store);
static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq,
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 987d665e82de..b55b245e8052 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -98,7 +98,7 @@ MODULE_PARM_DESC(max_queues,
* backend, 4KB page granularity is used.
*/
unsigned int xen_blkif_max_ring_order = XENBUS_MAX_RING_GRANT_ORDER;
-module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
/*
* The LRU mechanism to clean the lists of persistent grants needs to
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 21c1be1eb226..66412eededda 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -367,7 +367,7 @@ int __init xen_blkif_interface_init(void)
out: \
return sprintf(buf, format, result); \
} \
- static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR(name, 0444, show_##name, NULL)
VBD_SHOW_ALLRING(oo_req, "%llu\n");
VBD_SHOW_ALLRING(rd_req, "%llu\n");
@@ -403,7 +403,7 @@ static const struct attribute_group xen_vbdstat_group = {
\
return sprintf(buf, format, ##args); \
} \
- static DEVICE_ATTR(name, S_IRUGO, show_##name, NULL)
+ static DEVICE_ATTR(name, 0444, show_##name, NULL)
VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
VBD_SHOW(mode, "%s\n", be->mode);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 2a8e7813bd1a..ae00a82f350b 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -129,13 +129,12 @@ static const struct block_device_operations xlvbd_block_fops;
*/
static unsigned int xen_blkif_max_segments = 32;
-module_param_named(max_indirect_segments, xen_blkif_max_segments, uint,
- S_IRUGO);
+module_param_named(max_indirect_segments, xen_blkif_max_segments, uint, 0444);
MODULE_PARM_DESC(max_indirect_segments,
"Maximum amount of segments in indirect requests (default is 32)");
static unsigned int xen_blkif_max_queues = 4;
-module_param_named(max_queues, xen_blkif_max_queues, uint, S_IRUGO);
+module_param_named(max_queues, xen_blkif_max_queues, uint, 0444);
MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per virtual disk");
/*
@@ -143,7 +142,7 @@ MODULE_PARM_DESC(max_queues, "Maximum number of hardware queues/rings used per v
* backend, 4KB page granularity is used.
*/
static unsigned int xen_blkif_max_ring_order;
-module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, S_IRUGO);
+module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
#define BLK_RING_SIZE(info) \
--
2.15.0
^ permalink raw reply related
* RE: [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command support
From: Liu, Changpeng @ 2018-05-23 20:37 UTC (permalink / raw)
To: Michael S. Tsirkin
Cc: virtualization@lists.linux-foundation.org, stefanha@redhat.com,
pbonzini@redhat.com, cavery@redhat.com
In-Reply-To: <20180330034517-mutt-send-email-mst@kernel.org>
> -----Original Message-----
> From: Michael S. Tsirkin [mailto:mst@redhat.com]
> Sent: Thursday, March 29, 2018 6:07 PM
> To: Liu, Changpeng <changpeng.liu@intel.com>
> Cc: virtualization@lists.linux-foundation.org; cavery@redhat.com;
> stefanha@redhat.com; jasowang@redhat.com; pbonzini@redhat.com;
> Wodkowski, PawelX <pawelx.wodkowski@intel.com>; Harris, James R
> <james.r.harris@intel.com>; fabio.miranda.martins@canonical.com
> Subject: Re: [PATCH v3] virtio_blk: add DISCARD and WRIET ZEROES command
> support
>
> On Fri, Mar 30, 2018 at 08:49:34AM +0800, Changpeng Liu wrote:
> > Existing virtio-blk protocol doesn't have DISCARD/WRITE ZEROES
> > command support, this will impact the performance when using SSD
> > backend over file systems.
> >
> > The idea here is using 16 Bytes payload as one descriptor for
> > DISCARD/WRITE ZEROES command, users can put several ranges into
> > one command, for the purpose to support such feature, two feature
> > flags VIRTIO_BLK_F_DISCARD/VIRTIO_BLK_F_WRITE_ZEROES and two
> > commands VIRTIO_BLK_T_DISCARD/VIRTIO_BLK_T_WRITE_ZEROES are
> > introduced, and some parameters are added to the configuration
> > space to tell the OS the granularity of DISCARD/WRITE ZEROES
> > commands.
>
> Pls fix grammar in this comment, I am not sure what are you
> trying to say.
Okay, will add more description here.
>
> >
> > The specification change list here:
> > https://github.com/oasis-tcs/virtio-spec
>
> Which commit?
>
> > CHANGELOG:
> > v3: finalized the specification change.
>
> Changelog belongs after -- and should be complete
> including changes v1 to v2.
>
> > Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
> > ---
> > drivers/block/virtio_blk.c | 96
> +++++++++++++++++++++++++++++++++++++++--
> > include/uapi/linux/virtio_blk.h | 39 +++++++++++++++++
> > 2 files changed, 132 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
> > index 4a07593c..1943adb 100644
> > --- a/drivers/block/virtio_blk.c
> > +++ b/drivers/block/virtio_blk.c
> > @@ -172,10 +172,53 @@ static int virtblk_add_req(struct virtqueue *vq, struct
> virtblk_req *vbr,
> > return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
> > }
> >
> > +static inline int virtblk_setup_discard_write_zeroes(struct request *req, bool
> unmap)
> > +{
> > + unsigned short segments = blk_rq_nr_discard_segments(req), n = 0;
>
> Split on two lines pls:
> unsigned short n = 0;
>
> > + u32 block_size = queue_logical_block_size(req->q);
>
> Seems to be unused except for the sanity check below. Why?
>
> > + struct virtio_blk_discard_write_zeroes *range;
> > + struct bio *bio;
> > +
> > + if (block_size < 512 || !block_size)
>
> Why 512? And when is it 0?
Ok, actually this check is not necessary, will remove it.
>
> > + return -1;
>
> -1 isn't a normal errno code.
>
> > +
> > + range = kmalloc_array(segments, sizeof(*range), GFP_ATOMIC);
>
> This might be pretty large: with 64K segments it looks like you are
> trying to allocate ~1Mbyte with GFP_ATOMIC which is unlikely to succeed.
> Can we split this up in chunks?
This is already chunks now, if the backend can only support 1 segment, so the number
is always 1, I can change the GFP_ATOMIC with GFP_KERNEL.
>
> > + if (!range)
> > + return -1;
> > +
> > + __rq_for_each_bio(bio, req) {
> > + u64 sector = bio->bi_iter.bi_sector;
> > + u32 num_sectors = bio->bi_iter.bi_size >> 9;
>
> why 9?
The sectors in virtio-blk protocol and Linux is expressed by 512-bytes.
>
> > +
> > + range[n].flags.unmap = cpu_to_le32(unmap);
> > + range[n].flags.reserved = cpu_to_le32(0);
> > + range[n].num_sectors = cpu_to_le32(num_sectors);
> > + range[n].sector = cpu_to_le64(sector);
>
> Isn't this causing sparse warnings?
No warning when I complied the module.
>
>
> > + n++;
> > + }
> > +
> > + if (WARN_ON_ONCE(n != segments)) {
>
> and when does this happen?
This check shouldn't happen too, will remove it.
>
> > + kfree(range);
> > + return -1;
> > + }
> > +
> > + req->special_vec.bv_page = virt_to_page(range);
> > + req->special_vec.bv_offset = offset_in_page(range);
> > + req->special_vec.bv_len = sizeof(*range) * segments;
> > + req->rq_flags |= RQF_SPECIAL_PAYLOAD;
> > +
> > + return 0;
> > +}
> > +
> > static inline void virtblk_request_done(struct request *req)
> > {
> > struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
> >
> > + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
> > + kfree(page_address(req->special_vec.bv_page) +
> > + req->special_vec.bv_offset);
> > + }
> > +
> > switch (req_op(req)) {
> > case REQ_OP_SCSI_IN:
> > case REQ_OP_SCSI_OUT:
> > @@ -225,6 +268,7 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> > int qid = hctx->queue_num;
> > int err;
> > bool notify = false;
> > + bool unmap = false;
> > u32 type;
> >
> > BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
> > @@ -237,6 +281,13 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> > case REQ_OP_FLUSH:
> > type = VIRTIO_BLK_T_FLUSH;
> > break;
> > + case REQ_OP_DISCARD:
> > + type = VIRTIO_BLK_T_DISCARD;
> > + break;
> > + case REQ_OP_WRITE_ZEROES:
> > + type = VIRTIO_BLK_T_WRITE_ZEROES;
> > + unmap = !(req->cmd_flags & REQ_NOUNMAP);
> > + break;
> > case REQ_OP_SCSI_IN:
> > case REQ_OP_SCSI_OUT:
> > type = VIRTIO_BLK_T_SCSI_CMD;
> > @@ -256,9 +307,16 @@ static blk_status_t virtio_queue_rq(struct
> blk_mq_hw_ctx *hctx,
> >
> > blk_mq_start_request(req);
> >
> > + if (type == VIRTIO_BLK_T_DISCARD || type ==
> VIRTIO_BLK_T_WRITE_ZEROES) {
> > + err = virtblk_setup_discard_write_zeroes(req, unmap);
> > + if (err)
> > + return BLK_STS_IOERR;
>
> Does a failure actually indicate an IO error?
Good catch, BLK_STS_RESOURCE makes more sense.
>
>
> > + }
> > +
> > num = blk_rq_map_sg(hctx->queue, req, vbr->sg);
> > if (num) {
> > - if (rq_data_dir(req) == WRITE)
> > + if (rq_data_dir(req) == WRITE || type == VIRTIO_BLK_T_DISCARD
> ||
> > + type == VIRTIO_BLK_T_WRITE_ZEROES)
> > vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
> VIRTIO_BLK_T_OUT);
> > else
> > vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev,
> VIRTIO_BLK_T_IN);
> > @@ -777,6 +835,38 @@ static int virtblk_probe(struct virtio_device *vdev)
> > if (!err && opt_io_size)
> > blk_queue_io_opt(q, blk_size * opt_io_size);
> >
> > + if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
> > + q->limits.discard_granularity = blk_size;
> > +
> > + virtio_cread(vdev, struct virtio_blk_config,
> discard_sector_alignment, &v);
> > + if (v)
> > + q->limits.discard_alignment = v << 9;
> > + else
> > + q->limits.discard_alignment = 0;
> > +
> > + virtio_cread(vdev, struct virtio_blk_config, max_discard_sectors,
> &v);
> > + if (v)
> > + blk_queue_max_discard_sectors(q, v);
> > + else
> > + blk_queue_max_discard_sectors(q, UINT_MAX);
> > +
> > + virtio_cread(vdev, struct virtio_blk_config, max_discard_seg, &v);
> > + if (v)
> > + blk_queue_max_discard_segments(q, v);
> > + else
> > + blk_queue_max_discard_segments(q, USHRT_MAX);
> > +
> > + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q);
> > + }
> > +
> > + if (virtio_has_feature(vdev, VIRTIO_BLK_F_WRITE_ZEROES)) {
> > + virtio_cread(vdev, struct virtio_blk_config,
> max_write_zeroes_sectors, &v);
> > + if (v)
> > + blk_queue_max_write_zeroes_sectors(q, v);
> > + else
> > + blk_queue_max_write_zeroes_sectors(q, UINT_MAX);
> > + }
> > +
> > virtblk_update_capacity(vblk, false);
> > virtio_device_ready(vdev);
> >
> > @@ -885,14 +975,14 @@ static int virtblk_restore(struct virtio_device *vdev)
> > VIRTIO_BLK_F_SCSI,
> > #endif
> > VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY,
> VIRTIO_BLK_F_CONFIG_WCE,
> > - VIRTIO_BLK_F_MQ,
> > + VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
> VIRTIO_BLK_F_WRITE_ZEROES,
> > }
> > ;
> > static unsigned int features[] = {
> > VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX,
> VIRTIO_BLK_F_GEOMETRY,
> > VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE,
> > VIRTIO_BLK_F_FLUSH, VIRTIO_BLK_F_TOPOLOGY,
> VIRTIO_BLK_F_CONFIG_WCE,
> > - VIRTIO_BLK_F_MQ,
> > + VIRTIO_BLK_F_MQ, VIRTIO_BLK_F_DISCARD,
> VIRTIO_BLK_F_WRITE_ZEROES,
> > };
> >
> > static struct virtio_driver virtio_blk = {
> > diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
> > index 9ebe4d9..78a60e6 100644
> > --- a/include/uapi/linux/virtio_blk.h
> > +++ b/include/uapi/linux/virtio_blk.h
> > @@ -38,6 +38,8 @@
> > #define VIRTIO_BLK_F_BLK_SIZE 6 /* Block size of disk is available*/
> > #define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is
> available */
> > #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
> > +#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD command is
> supported */
> > +#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES
> command is supported */
> >
> > /* Legacy feature bits */
> > #ifndef VIRTIO_BLK_NO_LEGACY
> > @@ -86,6 +88,22 @@ struct virtio_blk_config {
> >
> > /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
> > __u16 num_queues;
> > + /* The maximum discard segment size (if VIRTIO_BLK_F_DISCARD) */
> > + __u32 max_discard_sectors;
> > + /* The maximum number of discard segments (if VIRTIO_BLK_F_DISCARD)
> */
> > + __u32 max_discard_seg;
> > + /* The sector alignment for discard (if VIRTIO_BLK_F_DISCARD) */
> > + __u32 discard_sector_alignment;
> > + /* The maximum number of write zeroes sectors (if
> VIRTIO_BLK_F_WRITE_ZEROES) */
> > + __u32 max_write_zeroes_sectors;
> > + /* The maximum number of write zeroes segments (if
> VIRTIO_BLK_F_WRITE_ZEROES) */
> > + __u32 max_write_zeroes_seg;
> > + /* Device clear this bit when write zeroes command cannot result in
> > + * deallocating one or more sectors
> > + * (if VIRTIO_BLK_F_WRITE_ZEROES with unmap bit)
>
> Pls fix grammar in this comment, I am not sure what are you trying to
> say.
>
> > + */
> > + __u8 write_zeroes_may_unmap;
> > + __u8 unused1[3];
> > } __attribute__((packed));
> >
> > /*
> > @@ -114,6 +132,12 @@ struct virtio_blk_config {
> > /* Get device ID command */
> > #define VIRTIO_BLK_T_GET_ID 8
> >
> > +/* Discard command */
> > +#define VIRTIO_BLK_T_DISCARD 11
> > +
> > +/* Write zeroes command */
> > +#define VIRTIO_BLK_T_WRITE_ZEROES 13
> > +
> > #ifndef VIRTIO_BLK_NO_LEGACY
> > /* Barrier before this op. */
> > #define VIRTIO_BLK_T_BARRIER 0x80000000
> > @@ -133,6 +157,21 @@ struct virtio_blk_outhdr {
> > __virtio64 sector;
> > };
> >
> > +/*
> > + * discard/write zeroes range for each request.
> > + */
> > +struct virtio_blk_discard_write_zeroes {
> > + /* discard/write zeroes start sector */
> > + __virtio64 sector;
> > + /* number of discard/write zeroes sectors */
> > + __virtio32 num_sectors;
> > + /* valid for write zeroes command */
> > + struct {
> > + __virtio32 unmap:1;
> > + __virtio32 reserved:31;
> > + } flags;
> > +};
> > +
>
> You can't use bitmaps in portable code.
> The format differs between architectures.
>
> > #ifndef VIRTIO_BLK_NO_LEGACY
> > struct virtio_scsi_inhdr {
> > __virtio32 errors;
> > --
> > 1.9.3
^ permalink raw reply
* Re: [PATCH v3 16/27] compiler: Option to add PROVIDE_HIDDEN replacement for weak symbols
From: Randy Dunlap @ 2018-05-23 21:16 UTC (permalink / raw)
To: Thomas Garnier, Herbert Xu, David S . Miller, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Philippe Ombredanne, Kate Stewart,
Arnaldo Carvalho de Melo, Yonghong Song, Andrey Ryabinin,
Kees Cook, Tom Lendacky, Kirill A . Shutemov, Andy Lutomirski,
Dominik Brodowski, Borislav Petkov, Borislav Petkov,
Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-17-thgarnie@google.com>
On 05/23/2018 12:54 PM, Thomas Garnier wrote:
> Provide an option to have a PROVIDE_HIDDEN (linker script) entry for
> each weak symbol. This option solve an error in x86_64 where the linker
solves
> optimizes pie generate code to be non-pie because --emit-relocs was used
generated
> instead of -pie (to reduce dynamic relocations).
>
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
> ---
> init/Kconfig | 7 +++++++
> scripts/link-vmlinux.sh | 14 ++++++++++++++
> 2 files changed, 21 insertions(+)
>
> diff --git a/init/Kconfig b/init/Kconfig
> index 0fc3a58d9f2f..2866cca86b4a 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1954,6 +1954,13 @@ config ASN1
> inform it as to what tags are to be expected in a stream and what
> functions to call on what tags.
>
> +config WEAK_PROVIDE_HIDDEN
> + bool
> + help
> + Generate linker script PROVIDE_HIDDEN entries for all weak symbols. It
> + allows to prevent non-pie code being replaced by the linker if the
non-PIE
> + emit-relocs option is used instead of pie (useful for x86_64 pie).
PIE PIE).
> +
--
~Randy
^ permalink raw reply
* Re: [PATCH v3 23/27] x86/modules: Adapt module loading for PIE support
From: Randy Dunlap @ 2018-05-23 21:26 UTC (permalink / raw)
To: Thomas Garnier, Herbert Xu, David S . Miller, Thomas Gleixner,
Ingo Molnar, H . Peter Anvin, Peter Zijlstra, Josh Poimboeuf,
Greg Kroah-Hartman, Philippe Ombredanne, Kate Stewart,
Arnaldo Carvalho de Melo, Yonghong Song, Andrey Ryabinin,
Kees Cook, Tom Lendacky, Kirill A . Shutemov, Andy Lutomirski,
Dominik Brodowski, Borislav Petkov, Borislav Petkov,
Rafael J . Wysocki
Cc: linux-arch, kvm, linux-pm, x86, linux-doc, linux-kernel,
virtualization, linux-sparse, linux-crypto, kernel-hardening,
xen-devel
In-Reply-To: <20180523195421.180248-24-thgarnie@google.com>
Hi,
(for several patches in this series:)
The commit message is confusing. See below.
On 05/23/2018 12:54 PM, Thomas Garnier wrote:
> Adapt module loading to support PIE relocations. Generate dynamic GOT if
> a symbol requires it but no entry exist in the kernel GOT.
exists
>
> Position Independent Executable (PIE) support will allow to extended the
will allow us to extend the
> KASLR randomization range below the -2G memory limit.
Does that say "below th negative 2G memory limit"?
I don't get it.
>
> Signed-off-by: Thomas Garnier <thgarnie@google.com>
> ---
> arch/x86/Makefile | 4 +
> arch/x86/include/asm/module.h | 11 ++
> arch/x86/include/asm/sections.h | 4 +
> arch/x86/kernel/module.c | 181 +++++++++++++++++++++++++++++++-
> arch/x86/kernel/module.lds | 3 +
> 5 files changed, 198 insertions(+), 5 deletions(-)
> create mode 100644 arch/x86/kernel/module.lds
Thanks,
--
~Randy
^ permalink raw reply
* Re: [PATCH] block drivers/block: Use octal not symbolic permissions
From: Jens Axboe @ 2018-05-23 21:27 UTC (permalink / raw)
To: Joe Perches, Ed L. Cashin, Philipp Reisner, Lars Ellenberg,
Jiri Kosina, Josef Bacik, linux-block, Ilya Dryomov, Sage Weil,
Alex Elder, Joshua Morris, Philip Kelleher, Michael S. Tsirkin,
Jason Wang, Konrad Rzeszutek Wilk, Roger Pau Monné,
Boris Ostrovsky, Juergen Gross
Cc: linux-kernel, nbd, xen-devel, ceph-devel, virtualization,
drbd-dev
In-Reply-To: <5e97a292c1ba38a6b5a0caa271d85dc3de1b2aa7.1527105857.git.joe@perches.com>
On 5/23/18 2:05 PM, Joe Perches wrote:
> Convert the S_<FOO> symbolic permissions to their octal equivalents as
> using octal and not symbolic permissions is preferred by many as more
> readable.
>
> see: https://lkml.org/lkml/2016/8/2/1945
>
> Done with automated conversion via:
> $ ./scripts/checkpatch.pl -f --types=SYMBOLIC_PERMS --fix-inplace <files...>
>
> Miscellanea:
>
> o Wrapped modified multi-line calls to a single line where appropriate
> o Realign modified multi-line calls to open parenthesis
Honestly, I see this as pretty needless churn.
--
Jens Axboe
^ permalink raw reply
* Re: [PATCH] block drivers/block: Use octal not symbolic permissions
From: Joe Perches @ 2018-05-23 21:41 UTC (permalink / raw)
To: Jens Axboe, Ed L. Cashin, Philipp Reisner, Lars Ellenberg,
Jiri Kosina, Josef Bacik, linux-block, Ilya Dryomov, Sage Weil,
Alex Elder, Philip Kelleher, Michael S. Tsirkin, Jason Wang,
Konrad Rzeszutek Wilk, Roger Pau Monné, Boris Ostrovsky,
Juergen Gross
Cc: linux-kernel, nbd, xen-devel, ceph-devel, virtualization,
drbd-dev
In-Reply-To: <6bf44255-145f-bf14-e254-860731ff9296@kernel.dk>
On Wed, 2018-05-23 at 15:27 -0600, Jens Axboe wrote:
> On 5/23/18 2:05 PM, Joe Perches wrote:
> > Convert the S_<FOO> symbolic permissions to their octal equivalents as
> > using octal and not symbolic permissions is preferred by many as more
> > readable.
> >
> > see: https://lkml.org/lkml/2016/8/2/1945
> >
> > Done with automated conversion via:
> > $ ./scripts/checkpatch.pl -f --types=SYMBOLIC_PERMS --fix-inplace <files...>
> >
> > Miscellanea:
> >
> > o Wrapped modified multi-line calls to a single line where appropriate
> > o Realign modified multi-line calls to open parenthesis
>
> Honestly, I see this as pretty needless churn.
It's just for consistency and the ability to highlight
somewhat unusual permissions uses like just 0400.
Apply it at your leisure or ignore it.
btw: Joshua Morris' email address is bouncing.
Maybe it should be removed from MAINTAINERS.
---
MAINTAINERS | 1 -
1 file changed, 1 deletion(-)
diff --git a/MAINTAINERS b/MAINTAINERS
index 9051a9ca24a2..0d546a10d0b9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5598,7 +5598,6 @@ F: drivers/base/firmware_loader/
F: include/linux/firmware.h
FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash
Card)
-M: Joshua Morris <josh.h.morris@us.ibm.com>
M: Philip Kelleher <pjk1939@linux.vnet.ibm.com>
S: Maintained
F: drivers/block/rsxx/
^ permalink raw reply related
* Re: [PATCH v3 23/27] x86/modules: Adapt module loading for PIE support
From: Thomas Garnier via Virtualization @ 2018-05-23 22:01 UTC (permalink / raw)
To: Randy Dunlap
Cc: Kate Stewart, Nicolas Pitre, the arch/x86 maintainers,
Sergey Senozhatsky, Petr Mladek, Len Brown, Peter Zijlstra,
Yonghong Song, Christopher Li, Dave Hansen, Dominik Brodowski,
LKML, Masahiro Yamada, Jan Beulich, Pavel Machek, H . Peter Anvin,
Kernel Hardening, Christoph Lameter, Alok Kataria,
Linux Doc Mailing List, linux-arch, Herbert Xu
In-Reply-To: <168ebedb-7c27-d1f3-c2f9-223d44186a52@infradead.org>
On Wed, May 23, 2018 at 2:27 PM Randy Dunlap <rdunlap@infradead.org> wrote:
> Hi,
> (for several patches in this series:)
> The commit message is confusing. See below.
Thanks for the edits, I will change the different commit messages.
> On 05/23/2018 12:54 PM, Thomas Garnier wrote:
> > Adapt module loading to support PIE relocations. Generate dynamic GOT if
> > a symbol requires it but no entry exist in the kernel GOT.
> exists
> >
> > Position Independent Executable (PIE) support will allow to extended the
> will allow us to extend
the
> > KASLR randomization range below the -2G memory limit.
> Does that say "below th negative 2G memory limit"?
> I don't get it.
Yes, below 0xffffffff80000000 basically. I think I will just say that.
> >
> > Signed-off-by: Thomas Garnier <thgarnie@google.com>
> > ---
> > arch/x86/Makefile | 4 +
> > arch/x86/include/asm/module.h | 11 ++
> > arch/x86/include/asm/sections.h | 4 +
> > arch/x86/kernel/module.c | 181 +++++++++++++++++++++++++++++++-
> > arch/x86/kernel/module.lds | 3 +
> > 5 files changed, 198 insertions(+), 5 deletions(-)
> > create mode 100644 arch/x86/kernel/module.lds
> Thanks,
> --
> ~Randy
--
Thomas
^ permalink raw reply
* Re: [RFC V2] virtio: Add platform specific DMA API translation for virito devices
From: Benjamin Herrenschmidt @ 2018-05-23 22:27 UTC (permalink / raw)
To: Michael S. Tsirkin, Anshuman Khandual
Cc: robh, mpe, linux-kernel, virtualization, hch, joe, linuxppc-dev,
elfring, david
In-Reply-To: <20180523213703-mutt-send-email-mst@kernel.org>
On Wed, 2018-05-23 at 21:50 +0300, Michael S. Tsirkin wrote:
> I re-read that discussion and I'm still unclear on the
> original question, since I got several apparently
> conflicting answers.
>
> I asked:
>
> Why isn't setting VIRTIO_F_IOMMU_PLATFORM on the
> hypervisor side sufficient?
I thought I had replied to this...
There are a couple of reasons:
- First qemu doesn't know that the guest will switch to "secure mode"
in advance. There is no difference between a normal and a secure
partition until the partition does the magic UV call to "enter secure
mode" and qemu doesn't see any of it. So who can set the flag here ?
- Second, when using VIRTIO_F_IOMMU_PLATFORM, we also make qemu (or
vhost) go through the emulated MMIO for every access to the guest,
which adds additional overhead.
Cheers,
Ben.
>
>
> > arch/powerpc/include/asm/dma-mapping.h | 6 ++++++
> > arch/powerpc/platforms/pseries/iommu.c | 11 +++++++++++
> > drivers/virtio/virtio_ring.c | 10 ++++++++++
> > 3 files changed, 27 insertions(+)
> >
> > diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
> > index 8fa3945..056e578 100644
> > --- a/arch/powerpc/include/asm/dma-mapping.h
> > +++ b/arch/powerpc/include/asm/dma-mapping.h
> > @@ -115,4 +115,10 @@ extern u64 __dma_get_required_mask(struct device *dev);
> > #define ARCH_HAS_DMA_MMAP_COHERENT
> >
> > #endif /* __KERNEL__ */
> > +
> > +#define platform_forces_virtio_dma platform_forces_virtio_dma
> > +
> > +struct virtio_device;
> > +
> > +extern bool platform_forces_virtio_dma(struct virtio_device *vdev);
> > #endif /* _ASM_DMA_MAPPING_H */
> > diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
> > index 06f0296..a2ec15a 100644
> > --- a/arch/powerpc/platforms/pseries/iommu.c
> > +++ b/arch/powerpc/platforms/pseries/iommu.c
> > @@ -38,6 +38,7 @@
> > #include <linux/of.h>
> > #include <linux/iommu.h>
> > #include <linux/rculist.h>
> > +#include <linux/virtio.h>
> > #include <asm/io.h>
> > #include <asm/prom.h>
> > #include <asm/rtas.h>
> > @@ -1396,3 +1397,13 @@ static int __init disable_multitce(char *str)
> > __setup("multitce=", disable_multitce);
> >
> > machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
> > +
> > +bool platform_forces_virtio_dma(struct virtio_device *vdev)
> > +{
> > + /*
> > + * On protected guest platforms, force virtio core to use DMA
> > + * MAP API for all virtio devices. But there can also be some
> > + * exceptions for individual devices like virtio balloon.
> > + */
> > + return (of_find_compatible_node(NULL, NULL, "ibm,ultravisor") != NULL);
> > +}
>
> Isn't this kind of slow? vring_use_dma_api is on
> data path and supposed to be very fast.
>
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index 21d464a..47ea6c3 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -141,8 +141,18 @@ struct vring_virtqueue {
> > * unconditionally on data path.
> > */
> >
> > +#ifndef platform_forces_virtio_dma
> > +static inline bool platform_forces_virtio_dma(struct virtio_device *vdev)
> > +{
> > + return false;
> > +}
> > +#endif
> > +
> > static bool vring_use_dma_api(struct virtio_device *vdev)
> > {
> > + if (platform_forces_virtio_dma(vdev))
> > + return true;
> > +
> > if (!virtio_has_iommu_quirk(vdev))
> > return true;
> >
> > --
> > 2.9.3
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox