From: Thomas Gleixner <tglx@linutronix.de>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Linus Torvalds <torvalds@linux-foundation.org>,
Tim Chen <tim.c.chen@linux.intel.com>,
Josh Poimboeuf <jpoimboe@kernel.org>,
Andrew Cooper <Andrew.Cooper3@citrix.com>,
Pawan Gupta <pawan.kumar.gupta@linux.intel.com>,
Johannes Wikner <kwikner@ethz.ch>,
Alyssa Milburn <alyssa.milburn@linux.intel.com>,
Jann Horn <jannh@google.com>, "H.J. Lu" <hjl.tools@gmail.com>,
Joao Moreira <joao.moreira@intel.com>,
Joseph Nuzman <joseph.nuzman@intel.com>,
Steven Rostedt <rostedt@goodmis.org>,
Juergen Gross <jgross@suse.com>,
"Peter Zijlstra (Intel)" <peterz@infradead.org>,
Masami Hiramatsu <mhiramat@kernel.org>,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>
Subject: Re: [patch 00/38] x86/retbleed: Call depth tracking mitigation
Date: Mon, 18 Jul 2022 21:30:45 +0200 [thread overview]
Message-ID: <87tu7euska.ffs@tglx> (raw)
In-Reply-To: <87wncauslw.ffs@tglx>
On Mon, Jul 18 2022 at 21:29, Thomas Gleixner wrote:
>> The implementation falls back to the allocated thunks when padding is not
>> available. I'll send out the GCC patch and the required kernel patch as a
>> reply to this series after polishing it a bit.
>
> Here it goes. GCC hackery first.
And the kernel counterpart.
---
Subject: x06/callthunks: Put thunks into compiler provided padding area
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 15 Jul 2022 16:12:47 +0200
- NOT FOR INCLUSION -
Let the compiler add a 16 byte padding in front of each function entry
point and put the call depth accounting there. That avoids calling out
into the module area and reduces ITLB pressure.
Not-Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
arch/x86/Kconfig | 14 ++++++
arch/x86/Makefile | 4 +
arch/x86/kernel/callthunks.c | 99 ++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 115 insertions(+), 2 deletions(-)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2440,6 +2440,9 @@ config CC_HAS_SLS
config CC_HAS_RETURN_THUNK
def_bool $(cc-option,-mfunction-return=thunk-extern)
+config CC_HAS_PADDING
+ def_bool $(cc-option,-mforce-function-padding)
+
config HAVE_CALL_THUNKS
def_bool y
depends on RETHUNK && OBJTOOL
@@ -2512,6 +2515,17 @@ config CALL_DEPTH_TRACKING
of this option is marginal as the call depth tracking is using
run-time generated call thunks and call patching.
+config CALL_THUNKS_IN_PADDING
+ bool "Put call depth into padding area before function"
+ depends on CALL_DEPTH_TRACKING && CC_HAS_PADDING
+ default n
+ help
+ Put the call depth accounting into a padding area before the
+ function entry. This padding area is generated by the
+ compiler. This increases text size by ~5%. For non affected
+ systems this space is unused. On affected SKL systems this
+ results in a significant performance gain.
+
config CALL_THUNKS_DEBUG
bool "Enable call thunks and call depth tracking debugging"
depends on CALL_DEPTH_TRACKING
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -197,6 +197,10 @@ ifdef CONFIG_SLS
KBUILD_CFLAGS += -mharden-sls=all
endif
+ifdef CONFIG_CALL_THUNKS_IN_PADDING
+ KBUILD_CFLAGS += -mforce-function-padding
+endif
+
KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
ifdef CONFIG_LTO_CLANG
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -92,6 +92,7 @@ struct thunk_mem {
struct thunk_mem_area {
struct thunk_mem *tmem;
+ unsigned long *dests;
unsigned long start;
unsigned long nthunks;
};
@@ -181,6 +182,16 @@ static __init_or_module void callthunk_f
tmem->base + area->start * callthunk_desc.thunk_size,
area->start, area->nthunks);
+ /* Remove thunks in the padding area */
+ for (i = 0; area->dests && i < area->nthunks; i++) {
+ void *dest = (void *)area->dests[i];
+
+ if (!dest)
+ continue;
+ pr_info("Remove %px at index %u\n", dest, i);
+ btree_remove64(&call_thunks, (unsigned long)dest);
+ }
+
/* Jump starts right after the template */
thunk = tmem->base + area->start * callthunk_desc.thunk_size;
tp = thunk + callthunk_desc.template_size;
@@ -204,6 +215,7 @@ static __init_or_module void callthunk_f
size = area->nthunks * callthunk_desc.thunk_size;
text_poke_set_locked(thunk, 0xcc, size);
}
+ vfree(area->dests);
kfree(area);
}
@@ -289,7 +301,8 @@ patch_paravirt_call_sites(struct paravir
patch_call(p->instr, layout);
}
-static struct thunk_mem_area *callthunks_alloc(unsigned int nthunks)
+static struct thunk_mem_area *callthunks_alloc(unsigned int nthunks,
+ bool module)
{
struct thunk_mem_area *area;
unsigned int size, mapsize;
@@ -299,6 +312,13 @@ static struct thunk_mem_area *callthunks
if (!area)
return NULL;
+ if (module) {
+ area->dests = vzalloc(nthunks * sizeof(unsigned long));
+ if (!area->dests)
+ goto free_area;
+ pr_info("Allocated dests array: %px\n", area->dests);
+ }
+
list_for_each_entry(tmem, &thunk_mem_list, list) {
unsigned long start;
@@ -340,6 +360,7 @@ static struct thunk_mem_area *callthunks
free_tmem:
kfree(tmem);
free_area:
+ vfree(area->dests);
kfree(area);
return NULL;
}
@@ -372,6 +393,73 @@ static __init_or_module int callthunk_se
return 0;
}
+int setup_padding_thunks(s32 *start, s32 *end, struct thunk_mem_area *area,
+ struct module_layout *layout)
+{
+ int nthunks = 0, idx = 0;
+ s32 *s;
+
+ if (callthunk_desc.template_size > 16)
+ return 0;
+
+ for (s = start; s < end; s++) {
+ void *thunk, *tp, *dest = (void *)s + *s;
+ unsigned long key = (unsigned long)dest;
+ int fail, i;
+ u8 opcode;
+
+ if (is_inittext(layout, dest)) {
+ prdbg("Ignoring init dest: %pS %px\n", dest, dest);
+ return 0;
+ }
+
+ /* Multiple symbols can have the same location. */
+ if (btree_lookup64(&call_thunks, key)) {
+ prdbg("Ignoring duplicate dest: %pS %px\n", dest, dest);
+ continue;
+ }
+
+ thunk = tp = dest - 16;
+ prdbg("Probing dest: %pS %px at %px\n", dest, dest, tp);
+ pagefault_disable();
+ fail = 0;
+ for (i = 0; !fail && i < 16; i++) {
+ if (get_kernel_nofault(opcode, tp + i)) {
+ fail = 1;
+ } else if (opcode != 0xcc) {
+ fail = 2;
+ }
+ }
+ pagefault_enable();
+ switch (fail) {
+ case 1:
+ prdbg("Faulted for dest: %pS %px\n", dest, dest);
+ nthunks++;
+ continue;
+ case 2:
+ prdbg("No padding for dest: %pS %px\n", dest, dest);
+ nthunks++;
+ continue;
+ }
+
+ prdbg("Thunk for dest: %pS %px at %px\n", dest, dest, tp);
+ memcpy(tp, callthunk_desc.template, callthunk_desc.template_size);
+ tp += callthunk_desc.template_size;
+ memcpy(tp, x86_nops[6], 6);
+
+ if (area->dests) {
+ pr_info("Insert %px at index %d\n", dest, idx);
+ area->dests[idx++] = key;
+ }
+
+ fail = btree_insert64(&call_thunks, key, (void *)thunk, GFP_KERNEL);
+ if (fail)
+ return fail;
+ }
+ prdbg("%d external thunks required\n", nthunks);
+ return 0;
+}
+
static __init_or_module int callthunks_setup(struct callthunk_sites *cs,
struct module_layout *layout)
{
@@ -394,7 +482,7 @@ static __init_or_module int callthunks_s
if (!nthunks)
goto patch;
- area = callthunks_alloc(nthunks);
+ area = callthunks_alloc(nthunks, !!layout->mtn.mod);
if (!area)
return -ENOMEM;
@@ -420,6 +508,13 @@ static __init_or_module int callthunks_s
prdbg("Using thunk vbuf %px\n", vbuf);
}
+ if (IS_ENABLED(CONFIG_CALL_THUNKS_IN_PADDING)) {
+ ret = setup_padding_thunks(cs->syms_start, cs->syms_end,
+ area, layout);
+ if (ret < 0)
+ goto fail;
+ }
+
for (s = cs->syms_start; s < cs->syms_end; s++) {
void *dest = (void *)s + *s;
next prev parent reply other threads:[~2022-07-18 19:33 UTC|newest]
Thread overview: 142+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-16 23:17 [patch 00/38] x86/retbleed: Call depth tracking mitigation Thomas Gleixner
2022-07-16 23:17 ` [patch 01/38] x86/paravirt: Ensure proper alignment Thomas Gleixner
2022-07-16 23:17 ` [patch 02/38] x86/cpu: Use native_wrmsrl() in load_percpu_segment() Thomas Gleixner
2022-07-17 0:22 ` Andrew Cooper
2022-07-17 15:20 ` Linus Torvalds
2022-07-17 19:08 ` Thomas Gleixner
2022-07-17 20:08 ` Thomas Gleixner
2022-07-17 20:13 ` Thomas Gleixner
2022-07-17 21:54 ` Thomas Gleixner
2022-07-18 5:11 ` Juergen Gross
2022-07-18 6:54 ` Thomas Gleixner
2022-07-18 8:55 ` Thomas Gleixner
2022-07-18 9:31 ` Peter Zijlstra
2022-07-18 10:33 ` Thomas Gleixner
2022-07-18 11:42 ` Thomas Gleixner
2022-07-18 17:52 ` [patch 0/3] x86/cpu: Sanitize switch_to_new_gdt() Thomas Gleixner
2022-07-18 17:52 ` [patch 1/3] x86/cpu: Remove segment load from switch_to_new_gdt() Thomas Gleixner
2022-07-18 18:43 ` Linus Torvalds
2022-07-18 18:55 ` Thomas Gleixner
2022-07-18 17:52 ` [patch 2/3] x86/cpu: Get rid of redundant switch_to_new_gdt() invocations Thomas Gleixner
2022-07-18 17:52 ` [patch 3/3] x86/cpu: Re-enable stackprotector Thomas Gleixner
2022-07-16 23:17 ` [patch 03/38] x86/modules: Set VM_FLUSH_RESET_PERMS in module_alloc() Thomas Gleixner
2022-07-16 23:17 ` [patch 04/38] x86/vdso: Ensure all kernel code is seen by objtool Thomas Gleixner
2022-07-16 23:17 ` [patch 05/38] btree: Initialize early when builtin Thomas Gleixner
2022-07-16 23:17 ` [patch 06/38] objtool: Allow GS relative relocs Thomas Gleixner
2022-07-16 23:17 ` [patch 07/38] objtool: Track init section Thomas Gleixner
2022-07-16 23:17 ` [patch 08/38] objtool: Add .call_sites section Thomas Gleixner
2022-07-16 23:17 ` [patch 09/38] objtool: Add .sym_sites section Thomas Gleixner
2022-07-16 23:17 ` [patch 10/38] objtool: Add --hacks=skylake Thomas Gleixner
2022-07-16 23:17 ` [patch 11/38] objtool: Allow STT_NOTYPE -> STT_FUNC+0 tail-calls Thomas Gleixner
2022-07-16 23:17 ` [patch 12/38] x86/entry: Make sync_regs() invocation a tail call Thomas Gleixner
2022-07-16 23:17 ` [patch 13/38] x86/modules: Make module_alloc() generally available Thomas Gleixner
2022-07-16 23:17 ` [patch 14/38] x86/Kconfig: Add CONFIG_CALL_THUNKS Thomas Gleixner
2022-07-16 23:17 ` [patch 15/38] x86/retbleed: Add X86_FEATURE_CALL_DEPTH Thomas Gleixner
2022-07-16 23:17 ` [patch 16/38] modules: Make struct module_layout unconditionally available Thomas Gleixner
2022-07-16 23:17 ` [patch 17/38] module: Add arch_data to module_layout Thomas Gleixner
2022-07-16 23:17 ` [patch 18/38] mm/vmalloc: Provide huge page mappings Thomas Gleixner
2022-07-16 23:17 ` [patch 19/38] x86/module: Provide __module_alloc() Thomas Gleixner
2022-07-16 23:17 ` [patch 20/38] x86/alternatives: Provide text_poke_[copy|set]_locked() Thomas Gleixner
2022-07-16 23:17 ` [patch 21/38] x86/entry: Make some entry symbols global Thomas Gleixner
2022-07-16 23:17 ` [patch 22/38] x86/paravirt: Make struct paravirt_call_site unconditionally available Thomas Gleixner
2022-07-16 23:17 ` [patch 23/38] x86/callthunks: Add call patching for call depth tracking Thomas Gleixner
2022-07-16 23:17 ` [patch 24/38] module: Add layout for callthunks tracking Thomas Gleixner
2022-07-16 23:17 ` [patch 25/38] x86/modules: Add call thunk patching Thomas Gleixner
2022-07-16 23:17 ` [patch 26/38] x86/returnthunk: Allow different return thunks Thomas Gleixner
2022-07-16 23:17 ` [patch 27/38] x86/asm: Provide ALTERNATIVE_3 Thomas Gleixner
2022-07-16 23:17 ` [patch 28/38] x86/retbleed: Add SKL return thunk Thomas Gleixner
2022-07-16 23:17 ` [patch 29/38] x86/retpoline: Add SKL retthunk retpolines Thomas Gleixner
2022-07-16 23:17 ` [patch 30/38] x86/retbleed: Add SKL call thunk Thomas Gleixner
2022-07-16 23:18 ` [patch 31/38] x86/calldepth: Add ret/call counting for debug Thomas Gleixner
2022-07-16 23:18 ` [patch 32/38] static_call: Add call depth tracking support Thomas Gleixner
2022-07-16 23:18 ` [patch 33/38] kallsyms: Take callthunks into account Thomas Gleixner
2022-07-16 23:18 ` [patch 34/38] x86/orc: Make it callthunk aware Thomas Gleixner
2022-07-16 23:18 ` [patch 35/38] kprobes: Add callthunk blacklisting Thomas Gleixner
2022-07-16 23:18 ` [patch 36/38] x86/ftrace: Make it call depth tracking aware Thomas Gleixner
2022-07-18 21:01 ` Steven Rostedt
2022-07-19 8:46 ` Peter Zijlstra
2022-07-19 13:06 ` Steven Rostedt
2022-07-16 23:18 ` [patch 37/38] x86/bpf: Emit call depth accounting if required Thomas Gleixner
2022-07-19 5:30 ` Alexei Starovoitov
2022-07-19 8:34 ` Peter Zijlstra
2022-07-16 23:18 ` [patch 38/38] x86/retbleed: Add call depth tracking mitigation Thomas Gleixner
2022-07-17 9:45 ` [patch 00/38] x86/retbleed: Call " David Laight
2022-07-17 15:07 ` Thomas Gleixner
2022-07-17 17:56 ` David Laight
2022-07-17 19:15 ` Thomas Gleixner
2022-07-18 19:29 ` Thomas Gleixner
2022-07-18 19:30 ` Thomas Gleixner [this message]
2022-07-18 19:51 ` Linus Torvalds
2022-07-18 20:44 ` Thomas Gleixner
2022-07-18 21:01 ` Linus Torvalds
2022-07-18 21:43 ` Peter Zijlstra
2022-07-18 22:34 ` Linus Torvalds
2022-07-18 23:52 ` Peter Zijlstra
2022-07-18 21:18 ` Peter Zijlstra
2022-07-18 22:22 ` Thomas Gleixner
2022-07-18 22:47 ` Joao Moreira
2022-07-18 22:55 ` Sami Tolvanen
2022-07-18 23:08 ` Joao Moreira
2022-07-18 23:19 ` Thomas Gleixner
2022-07-18 23:42 ` Linus Torvalds
2022-07-18 23:52 ` Linus Torvalds
2022-07-18 23:57 ` Peter Zijlstra
2022-07-19 0:03 ` Linus Torvalds
2022-07-19 0:11 ` Linus Torvalds
2022-07-19 0:23 ` Peter Zijlstra
2022-07-19 1:02 ` Linus Torvalds
2022-07-19 17:19 ` Sami Tolvanen
2022-07-20 21:13 ` Peter Zijlstra
2022-07-21 8:21 ` David Laight
2022-07-21 10:56 ` David Laight
2022-07-21 15:54 ` Peter Zijlstra
2022-07-21 17:55 ` Peter Zijlstra
2022-07-21 18:06 ` Linus Torvalds
2022-07-21 18:27 ` Peter Zijlstra
2022-07-21 18:32 ` Linus Torvalds
2022-07-21 20:22 ` Joao Moreira
2022-07-22 0:16 ` Sami Tolvanen
2022-07-22 10:23 ` Peter Zijlstra
2022-07-22 15:38 ` Sami Tolvanen
2022-07-21 22:01 ` David Laight
2022-07-22 11:03 ` Peter Zijlstra
2022-07-22 13:27 ` David Laight
2022-07-23 9:50 ` Thomas Gleixner
2022-07-19 0:01 ` Linus Torvalds
2022-07-19 0:19 ` Joao Moreira
2022-07-19 17:21 ` Sami Tolvanen
2022-07-19 17:58 ` Joao Moreira
2022-07-19 8:26 ` David Laight
2022-07-19 16:27 ` Linus Torvalds
2022-07-19 17:23 ` Sami Tolvanen
2022-07-19 17:27 ` Linus Torvalds
2022-07-19 18:06 ` Sami Tolvanen
2022-07-19 20:10 ` Peter Zijlstra
2022-07-18 22:48 ` Sami Tolvanen
2022-07-18 22:59 ` Thomas Gleixner
2022-07-18 23:10 ` Sami Tolvanen
2022-07-18 23:39 ` Linus Torvalds
2022-07-18 23:51 ` Peter Zijlstra
2022-07-20 9:00 ` Thomas Gleixner
2022-07-20 16:55 ` Sami Tolvanen
2022-07-20 19:42 ` Sami Tolvanen
2022-07-22 20:11 ` Tim Chen
2022-07-22 22:18 ` Linus Torvalds
2022-07-18 19:55 ` Thomas Gleixner
2022-07-19 10:24 ` Virt " Andrew Cooper
2022-07-19 14:13 ` Thomas Gleixner
2022-07-19 16:23 ` Andrew Cooper
2022-07-19 21:17 ` Thomas Gleixner
2022-07-19 14:45 ` Michael Kelley (LINUX)
2022-07-19 20:16 ` Peter Zijlstra
2022-07-20 16:57 ` [patch 00/38] x86/retbleed: " Steven Rostedt
2022-07-20 17:09 ` Linus Torvalds
2022-07-20 17:24 ` Peter Zijlstra
2022-07-20 17:50 ` Steven Rostedt
2022-07-20 18:07 ` Linus Torvalds
2022-07-20 18:31 ` Steven Rostedt
2022-07-20 18:43 ` Linus Torvalds
2022-07-20 19:11 ` Steven Rostedt
2022-07-20 19:36 ` Kees Cook
2022-07-20 19:43 ` Steven Rostedt
2022-07-20 21:36 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87tu7euska.ffs@tglx \
--to=tglx@linutronix.de \
--cc=Andrew.Cooper3@citrix.com \
--cc=alyssa.milburn@linux.intel.com \
--cc=ast@kernel.org \
--cc=daniel@iogearbox.net \
--cc=hjl.tools@gmail.com \
--cc=jannh@google.com \
--cc=jgross@suse.com \
--cc=joao.moreira@intel.com \
--cc=joseph.nuzman@intel.com \
--cc=jpoimboe@kernel.org \
--cc=kwikner@ethz.ch \
--cc=linux-kernel@vger.kernel.org \
--cc=mhiramat@kernel.org \
--cc=pawan.kumar.gupta@linux.intel.com \
--cc=peterz@infradead.org \
--cc=rostedt@goodmis.org \
--cc=tim.c.chen@linux.intel.com \
--cc=torvalds@linux-foundation.org \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.