From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Linus Torvalds <torvalds@linux-foundation.org>,
Tim Chen <tim.c.chen@linux.intel.com>,
Josh Poimboeuf <jpoimboe@kernel.org>,
Andrew Cooper <Andrew.Cooper3@citrix.com>,
Pawan Gupta <pawan.kumar.gupta@linux.intel.com>,
Johannes Wikner <kwikner@ethz.ch>,
Alyssa Milburn <alyssa.milburn@linux.intel.com>,
Jann Horn <jannh@google.com>, "H.J. Lu" <hjl.tools@gmail.com>,
Joao Moreira <joao.moreira@intel.com>,
Joseph Nuzman <joseph.nuzman@intel.com>,
Steven Rostedt <rostedt@goodmis.org>,
Juergen Gross <jgross@suse.com>,
"Peter Zijlstra (Intel)" <peterz@infradead.org>,
Masami Hiramatsu <mhiramat@kernel.org>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>
Subject: Re: [patch 00/38] x86/retbleed: Call depth tracking mitigation
Date: Mon, 18 Jul 2022 21:30:45 +0200 [thread overview]
Message-ID: <87tu7euska.ffs@tglx> (raw)
In-Reply-To: <87wncauslw.ffs@tglx>
On Mon, Jul 18 2022 at 21:29, Thomas Gleixner wrote:
>> The implementation falls back to the allocated thunks when padding is not
>> available. I'll send out the GCC patch and the required kernel patch as a
>> reply to this series after polishing it a bit.
>
> Here it goes. GCC hackery first.
And the kernel counterpart.
---
Subject: x06/callthunks: Put thunks into compiler provided padding area
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 15 Jul 2022 16:12:47 +0200
- NOT FOR INCLUSION -
Let the compiler add a 16 byte padding in front of each function entry
point and put the call depth accounting there. That avoids calling out
into the module area and reduces ITLB pressure.
Not-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/Kconfig | 14 ++++++
arch/x86/Makefile | 4 +
arch/x86/kernel/callthunks.c | 99 ++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 115 insertions(+), 2 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2440,6 +2440,9 @@ config CC_HAS_SLS
config CC_HAS_RETURN_THUNK
def_bool $(cc-option,-mfunction-return=thunk-extern)
+config CC_HAS_PADDING
+ def_bool $(cc-option,-mforce-function-padding)
+
config HAVE_CALL_THUNKS
def_bool y
depends on RETHUNK && OBJTOOL
@@ -2512,6 +2515,17 @@ config CALL_DEPTH_TRACKING
of this option is marginal as the call depth tracking is using
run-time generated call thunks and call patching.
+config CALL_THUNKS_IN_PADDING
+ bool "Put call depth into padding area before function"
+ depends on CALL_DEPTH_TRACKING && CC_HAS_PADDING
+ default n
+ help
+ Put the call depth accounting into a padding area before the
+ function entry. This padding area is generated by the
+ compiler. This increases text size by ~5%. For non affected
+ systems this space is unused. On affected SKL systems this
+ results in a significant performance gain.
+
config CALL_THUNKS_DEBUG
bool "Enable call thunks and call depth tracking debugging"
depends on CALL_DEPTH_TRACKING
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -197,6 +197,10 @@ ifdef CONFIG_SLS
KBUILD_CFLAGS += -mharden-sls=all
endif
+ifdef CONFIG_CALL_THUNKS_IN_PADDING
+ KBUILD_CFLAGS += -mforce-function-padding
+endif
+
KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
ifdef CONFIG_LTO_CLANG
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -92,6 +92,7 @@ struct thunk_mem {
struct thunk_mem_area {
struct thunk_mem *tmem;
+ unsigned long *dests;
unsigned long start;
unsigned long nthunks;
};
@@ -181,6 +182,16 @@ static __init_or_module void callthunk_f
tmem->base + area->start * callthunk_desc.thunk_size,
area->start, area->nthunks);
+ /* Remove thunks in the padding area */
+ for (i = 0; area->dests && i < area->nthunks; i++) {
+ void *dest = (void *)area->dests[i];
+
+ if (!dest)
+ continue;
+ pr_info("Remove %px at index %u\n", dest, i);
+ btree_remove64(&call_thunks, (unsigned long)dest);
+ }
+
/* Jump starts right after the template */
thunk = tmem->base + area->start * callthunk_desc.thunk_size;
tp = thunk + callthunk_desc.template_size;
@@ -204,6 +215,7 @@ static __init_or_module void callthunk_f
size = area->nthunks * callthunk_desc.thunk_size;
text_poke_set_locked(thunk, 0xcc, size);
}
+ vfree(area->dests);
kfree(area);
}
@@ -289,7 +301,8 @@ patch_paravirt_call_sites(struct paravir
patch_call(p->instr, layout);
}
-static struct thunk_mem_area *callthunks_alloc(unsigned int nthunks)
+static struct thunk_mem_area *callthunks_alloc(unsigned int nthunks,
+ bool module)
{
struct thunk_mem_area *area;
unsigned int size, mapsize;
@@ -299,6 +312,13 @@ static struct thunk_mem_area *callthunks
if (!area)
return NULL;
+ if (module) {
+ area->dests = vzalloc(nthunks * sizeof(unsigned long));
+ if (!area->dests)
+ goto free_area;
+ pr_info("Allocated dests array: %px\n", area->dests);
+ }
+
list_for_each_entry(tmem, &thunk_mem_list, list) {
unsigned long start;
@@ -340,6 +360,7 @@ static struct thunk_mem_area *callthunks
free_tmem:
kfree(tmem);
free_area:
+ vfree(area->dests);
kfree(area);
return NULL;
}
@@ -372,6 +393,73 @@ static __init_or_module int callthunk_se
return 0;
}
+int setup_padding_thunks(s32 *start, s32 *end, struct thunk_mem_area *area,
+ struct module_layout *layout)
+{
+ int nthunks = 0, idx = 0;
+ s32 *s;
+
+ if (callthunk_desc.template_size > 16)
+ return 0;
+
+ for (s = start; s < end; s++) {
+ void *thunk, *tp, *dest = (void *)s + *s;
+ unsigned long key = (unsigned long)dest;
+ int fail, i;
+ u8 opcode;
+
+ if (is_inittext(layout, dest)) {
+ prdbg("Ignoring init dest: %pS %px\n", dest, dest);
+ return 0;
+ }
+
+ /* Multiple symbols can have the same location. */
+ if (btree_lookup64(&call_thunks, key)) {
+ prdbg("Ignoring duplicate dest: %pS %px\n", dest, dest);
+ continue;
+ }
+
+ thunk = tp = dest - 16;
+ prdbg("Probing dest: %pS %px at %px\n", dest, dest, tp);
+ pagefault_disable();
+ fail = 0;
+ for (i = 0; !fail && i < 16; i++) {
+ if (get_kernel_nofault(opcode, tp + i)) {
+ fail = 1;
+ } else if (opcode != 0xcc) {
+ fail = 2;
+ }
+ }
+ pagefault_enable();
+ switch (fail) {
+ case 1:
+ prdbg("Faulted for dest: %pS %px\n", dest, dest);
+ nthunks++;
+ continue;
+ case 2:
+ prdbg("No padding for dest: %pS %px\n", dest, dest);
+ nthunks++;
+ continue;
+ }
+
+ prdbg("Thunk for dest: %pS %px at %px\n", dest, dest, tp);
+ memcpy(tp, callthunk_desc.template, callthunk_desc.template_size);
+ tp += callthunk_desc.template_size;
+ memcpy(tp, x86_nops[6], 6);
+
+ if (area->dests) {
+ pr_info("Insert %px at index %d\n", dest, idx);
+ area->dests[idx++] = key;
+ }
+
+ fail = btree_insert64(&call_thunks, key, (void *)thunk, GFP_KERNEL);
+ if (fail)
+ return fail;
+ }
+ prdbg("%d external thunks required\n", nthunks);
+ return 0;
+}
+
static __init_or_module int callthunks_setup(struct callthunk_sites *cs,
struct module_layout *layout)
{
@@ -394,7 +482,7 @@ static __init_or_module int callthunks_s
if (!nthunks)
goto patch;
- area = callthunks_alloc(nthunks);
+ area = callthunks_alloc(nthunks, !!layout->mtn.mod);
if (!area)
return -ENOMEM;
@@ -420,6 +508,13 @@ static __init_or_module int callthunks_s
prdbg("Using thunk vbuf %px\n", vbuf);
}
+ if (IS_ENABLED(CONFIG_CALL_THUNKS_IN_PADDING)) {
+ ret = setup_padding_thunks(cs->syms_start, cs->syms_end,
+ area, layout);
+ if (ret < 0)
+ goto fail;
+ }
+
for (s = cs->syms_start; s < cs->syms_end; s++) {
void *dest = (void *)s + *s;
next prev parent reply other threads:[~2022-07-18 19:33 UTC|newest]
Thread overview: 142+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-16 23:17 [patch 00/38] x86/retbleed: Call depth tracking mitigation Thomas Gleixner
2022-07-16 23:17 ` [patch 01/38] x86/paravirt: Ensure proper alignment Thomas Gleixner
2022-07-16 23:17 ` [patch 02/38] x86/cpu: Use native_wrmsrl() in load_percpu_segment() Thomas Gleixner
2022-07-17 0:22 ` Andrew Cooper
2022-07-17 15:20 ` Linus Torvalds
2022-07-17 19:08 ` Thomas Gleixner
2022-07-17 20:08 ` Thomas Gleixner
2022-07-17 20:13 ` Thomas Gleixner
2022-07-17 21:54 ` Thomas Gleixner
2022-07-18 5:11 ` Juergen Gross
2022-07-18 6:54 ` Thomas Gleixner
2022-07-18 8:55 ` Thomas Gleixner
2022-07-18 9:31 ` Peter Zijlstra
2022-07-18 10:33 ` Thomas Gleixner
2022-07-18 11:42 ` Thomas Gleixner
2022-07-18 17:52 ` [patch 0/3] x86/cpu: Sanitize switch_to_new_gdt() Thomas Gleixner
2022-07-18 17:52 ` [patch 1/3] x86/cpu: Remove segment load from switch_to_new_gdt() Thomas Gleixner
2022-07-18 18:43 ` Linus Torvalds
2022-07-18 18:55 ` Thomas Gleixner
2022-07-18 17:52 ` [patch 2/3] x86/cpu: Get rid of redundant switch_to_new_gdt() invocations Thomas Gleixner
2022-07-18 17:52 ` [patch 3/3] x86/cpu: Re-enable stackprotector Thomas Gleixner
2022-07-16 23:17 ` [patch 03/38] x86/modules: Set VM_FLUSH_RESET_PERMS in module_alloc() Thomas Gleixner
2022-07-16 23:17 ` [patch 04/38] x86/vdso: Ensure all kernel code is seen by objtool Thomas Gleixner
2022-07-16 23:17 ` [patch 05/38] btree: Initialize early when builtin Thomas Gleixner
2022-07-16 23:17 ` [patch 06/38] objtool: Allow GS relative relocs Thomas Gleixner
2022-07-16 23:17 ` [patch 07/38] objtool: Track init section Thomas Gleixner
2022-07-16 23:17 ` [patch 08/38] objtool: Add .call_sites section Thomas Gleixner
2022-07-16 23:17 ` [patch 09/38] objtool: Add .sym_sites section Thomas Gleixner
2022-07-16 23:17 ` [patch 10/38] objtool: Add --hacks=skylake Thomas Gleixner
2022-07-16 23:17 ` [patch 11/38] objtool: Allow STT_NOTYPE -> STT_FUNC+0 tail-calls Thomas Gleixner
2022-07-16 23:17 ` [patch 12/38] x86/entry: Make sync_regs() invocation a tail call Thomas Gleixner
2022-07-16 23:17 ` [patch 13/38] x86/modules: Make module_alloc() generally available Thomas Gleixner
2022-07-16 23:17 ` [patch 14/38] x86/Kconfig: Add CONFIG_CALL_THUNKS Thomas Gleixner
2022-07-16 23:17 ` [patch 15/38] x86/retbleed: Add X86_FEATURE_CALL_DEPTH Thomas Gleixner
2022-07-16 23:17 ` [patch 16/38] modules: Make struct module_layout unconditionally available Thomas Gleixner
2022-07-16 23:17 ` [patch 17/38] module: Add arch_data to module_layout Thomas Gleixner
2022-07-16 23:17 ` [patch 18/38] mm/vmalloc: Provide huge page mappings Thomas Gleixner
2022-07-16 23:17 ` [patch 19/38] x86/module: Provide __module_alloc() Thomas Gleixner
2022-07-16 23:17 ` [patch 20/38] x86/alternatives: Provide text_poke_[copy|set]_locked() Thomas Gleixner
2022-07-16 23:17 ` [patch 21/38] x86/entry: Make some entry symbols global Thomas Gleixner
2022-07-16 23:17 ` [patch 22/38] x86/paravirt: Make struct paravirt_call_site unconditionally available Thomas Gleixner
2022-07-16 23:17 ` [patch 23/38] x86/callthunks: Add call patching for call depth tracking Thomas Gleixner
2022-07-16 23:17 ` [patch 24/38] module: Add layout for callthunks tracking Thomas Gleixner
2022-07-16 23:17 ` [patch 25/38] x86/modules: Add call thunk patching Thomas Gleixner
2022-07-16 23:17 ` [patch 26/38] x86/returnthunk: Allow different return thunks Thomas Gleixner
2022-07-16 23:17 ` [patch 27/38] x86/asm: Provide ALTERNATIVE_3 Thomas Gleixner
2022-07-16 23:17 ` [patch 28/38] x86/retbleed: Add SKL return thunk Thomas Gleixner
2022-07-16 23:17 ` [patch 29/38] x86/retpoline: Add SKL retthunk retpolines Thomas Gleixner
2022-07-16 23:17 ` [patch 30/38] x86/retbleed: Add SKL call thunk Thomas Gleixner
2022-07-16 23:18 ` [patch 31/38] x86/calldepth: Add ret/call counting for debug Thomas Gleixner
2022-07-16 23:18 ` [patch 32/38] static_call: Add call depth tracking support Thomas Gleixner
2022-07-16 23:18 ` [patch 33/38] kallsyms: Take callthunks into account Thomas Gleixner
2022-07-16 23:18 ` [patch 34/38] x86/orc: Make it callthunk aware Thomas Gleixner
2022-07-16 23:18 ` [patch 35/38] kprobes: Add callthunk blacklisting Thomas Gleixner
2022-07-16 23:18 ` [patch 36/38] x86/ftrace: Make it call depth tracking aware Thomas Gleixner
2022-07-18 21:01 ` Steven Rostedt
2022-07-19 8:46 ` Peter Zijlstra
2022-07-19 13:06 ` Steven Rostedt
2022-07-16 23:18 ` [patch 37/38] x86/bpf: Emit call depth accounting if required Thomas Gleixner
2022-07-19 5:30 ` Alexei Starovoitov
2022-07-19 8:34 ` Peter Zijlstra
2022-07-16 23:18 ` [patch 38/38] x86/retbleed: Add call depth tracking mitigation Thomas Gleixner
2022-07-17 9:45 ` [patch 00/38] x86/retbleed: Call " David Laight
2022-07-17 15:07 ` Thomas Gleixner
2022-07-17 17:56 ` David Laight
2022-07-17 19:15 ` Thomas Gleixner
2022-07-18 19:29 ` Thomas Gleixner
2022-07-18 19:30 ` Thomas Gleixner [this message]
2022-07-18 19:51 ` Linus Torvalds
2022-07-18 20:44 ` Thomas Gleixner
2022-07-18 21:01 ` Linus Torvalds
2022-07-18 21:43 ` Peter Zijlstra
2022-07-18 22:34 ` Linus Torvalds
2022-07-18 23:52 ` Peter Zijlstra
2022-07-18 21:18 ` Peter Zijlstra
2022-07-18 22:22 ` Thomas Gleixner
2022-07-18 22:47 ` Joao Moreira
2022-07-18 22:55 ` Sami Tolvanen
2022-07-18 23:08 ` Joao Moreira
2022-07-18 23:19 ` Thomas Gleixner
2022-07-18 23:42 ` Linus Torvalds
2022-07-18 23:52 ` Linus Torvalds
2022-07-18 23:57 ` Peter Zijlstra
2022-07-19 0:03 ` Linus Torvalds
2022-07-19 0:11 ` Linus Torvalds
2022-07-19 0:23 ` Peter Zijlstra
2022-07-19 1:02 ` Linus Torvalds
2022-07-19 17:19 ` Sami Tolvanen
2022-07-20 21:13 ` Peter Zijlstra
2022-07-21 8:21 ` David Laight
2022-07-21 10:56 ` David Laight
2022-07-21 15:54 ` Peter Zijlstra
2022-07-21 17:55 ` Peter Zijlstra
2022-07-21 18:06 ` Linus Torvalds
2022-07-21 18:27 ` Peter Zijlstra
2022-07-21 18:32 ` Linus Torvalds
2022-07-21 20:22 ` Joao Moreira
2022-07-22 0:16 ` Sami Tolvanen
2022-07-22 10:23 ` Peter Zijlstra
2022-07-22 15:38 ` Sami Tolvanen
2022-07-21 22:01 ` David Laight
2022-07-22 11:03 ` Peter Zijlstra
2022-07-22 13:27 ` David Laight
2022-07-23 9:50 ` Thomas Gleixner
2022-07-19 0:01 ` Linus Torvalds
2022-07-19 0:19 ` Joao Moreira
2022-07-19 17:21 ` Sami Tolvanen
2022-07-19 17:58 ` Joao Moreira
2022-07-19 8:26 ` David Laight
2022-07-19 16:27 ` Linus Torvalds
2022-07-19 17:23 ` Sami Tolvanen
2022-07-19 17:27 ` Linus Torvalds
2022-07-19 18:06 ` Sami Tolvanen
2022-07-19 20:10 ` Peter Zijlstra
2022-07-18 22:48 ` Sami Tolvanen
2022-07-18 22:59 ` Thomas Gleixner
2022-07-18 23:10 ` Sami Tolvanen
2022-07-18 23:39 ` Linus Torvalds
2022-07-18 23:51 ` Peter Zijlstra
2022-07-20 9:00 ` Thomas Gleixner
2022-07-20 16:55 ` Sami Tolvanen
2022-07-20 19:42 ` Sami Tolvanen
2022-07-22 20:11 ` Tim Chen
2022-07-22 22:18 ` Linus Torvalds
2022-07-18 19:55 ` Thomas Gleixner
2022-07-19 10:24 ` Virt " Andrew Cooper
2022-07-19 14:13 ` Thomas Gleixner
2022-07-19 16:23 ` Andrew Cooper
2022-07-19 21:17 ` Thomas Gleixner
2022-07-19 14:45 ` Michael Kelley (LINUX)
2022-07-19 20:16 ` Peter Zijlstra
2022-07-20 16:57 ` [patch 00/38] x86/retbleed: " Steven Rostedt
2022-07-20 17:09 ` Linus Torvalds
2022-07-20 17:24 ` Peter Zijlstra
2022-07-20 17:50 ` Steven Rostedt
2022-07-20 18:07 ` Linus Torvalds
2022-07-20 18:31 ` Steven Rostedt
2022-07-20 18:43 ` Linus Torvalds
2022-07-20 19:11 ` Steven Rostedt
2022-07-20 19:36 ` Kees Cook
2022-07-20 19:43 ` Steven Rostedt
2022-07-20 21:36 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87tu7euska.ffs@tglx \
--to=tglx@linutronix.de \
--cc=Andrew.Cooper3@citrix.com \
--cc=alyssa.milburn@linux.intel.com \
--cc=ast@kernel.org \
--cc=daniel@iogearbox.net \
--cc=hjl.tools@gmail.com \
--cc=jannh@google.com \
--cc=jgross@suse.com \
--cc=joao.moreira@intel.com \
--cc=joseph.nuzman@intel.com \
--cc=jpoimboe@kernel.org \
--cc=kwikner@ethz.ch \
--cc=linux-kernel@vger.kernel.org \
--cc=mhiramat@kernel.org \
--cc=pawan.kumar.gupta@linux.intel.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=tim.c.chen@linux.intel.com \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox