From: Song Liu <song@kernel.org>
To: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>
Cc: <akpm@linux-foundation.org>, <x86@kernel.org>,
<peterz@infradead.org>, <hch@lst.de>, <kernel-team@fb.com>,
<rick.p.edgecombe@intel.com>, <dave.hansen@intel.com>,
<urezki@gmail.com>, Song Liu <song@kernel.org>
Subject: [RFC v2 3/4] modules, x86: use vmalloc_exec for module core
Date: Fri, 7 Oct 2022 16:43:14 -0700 [thread overview]
Message-ID: <20221007234315.2877365-4-song@kernel.org> (raw)
In-Reply-To: <20221007234315.2877365-1-song@kernel.org>
This is a prototype that allows modules to share 2MB text pages with other
modules and BPF programs.
Current version only covers core_layout.
---
arch/x86/Kconfig | 1 +
arch/x86/kernel/alternative.c | 30 ++++++++++++++++++++++++------
arch/x86/kernel/module.c | 1 +
kernel/module/main.c | 23 +++++++++++++----------
kernel/module/strict_rwx.c | 3 ---
kernel/trace/ftrace.c | 3 ++-
6 files changed, 41 insertions(+), 20 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f9920f1341c8..0b1ea05a1da6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -91,6 +91,7 @@ config X86
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
+ select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if X86_64
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_UBSAN_SANITIZE_ALL
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4f3204364caa..0e47a558c5bc 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
- text_poke_early(instr, insn_buff, insn_buff_sz);
+ if (system_state < SYSTEM_RUNNING) {
+ text_poke_early(instr, insn_buff, insn_buff_sz);
+ } else {
+ mutex_lock(&text_mutex);
+ text_poke(instr, insn_buff, insn_buff_sz);
+ mutex_unlock(&text_mutex);
+ }
next:
optimize_nops(instr, a->instrlen);
@@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
optimize_nops(bytes, len);
DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
- text_poke_early(addr, bytes, len);
+ if (system_state == SYSTEM_BOOTING) {
+ text_poke_early(addr, bytes, len);
+ } else {
+ mutex_lock(&text_mutex);
+ text_poke(addr, bytes, len);
+ mutex_unlock(&text_mutex);
+ }
}
}
}
@@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
if (len == insn.length) {
DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
- text_poke_early(addr, bytes, len);
+ if (unlikely(system_state == SYSTEM_BOOTING)) {
+ text_poke_early(addr, bytes, len);
+ } else {
+ mutex_lock(&text_mutex);
+ text_poke(addr, bytes, len);
+ mutex_unlock(&text_mutex);
+ }
}
}
}
@@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
*/
DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
- text_poke_early(addr, &poison, 4);
+ text_poke(addr, &poison, 4);
}
}
@@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
/* Pad the rest with nops */
add_nops(insn_buff + used, p->len - used);
- text_poke_early(p->instr, insn_buff, p->len);
+ text_poke(p->instr, insn_buff, p->len);
}
}
extern struct paravirt_patch_site __start_parainstructions[],
@@ -1699,7 +1717,7 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *
struct text_poke_loc tp;
if (unlikely(system_state == SYSTEM_BOOTING)) {
- text_poke_early(addr, opcode, len);
+ text_poke(addr, opcode, len);
return;
}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index b1abf663417c..577e31647dc4 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -229,6 +229,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
bool early = me->state == MODULE_STATE_UNFORMED;
void *(*write)(void *, const void *, size_t) = memcpy;
+ early = false;
if (!early) {
write = text_poke;
mutex_lock(&text_mutex);
diff --git a/kernel/module/main.c b/kernel/module/main.c
index a4e4d84b6f4e..b44806e31a56 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -53,6 +53,7 @@
#include <linux/bsearch.h>
#include <linux/dynamic_debug.h>
#include <linux/audit.h>
+#include <linux/bpf.h>
#include <uapi/linux/module.h>
#include "internal.h"
@@ -1203,7 +1204,7 @@ static void free_module(struct module *mod)
lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size);
/* Finally, free the core (containing the module structure) */
- module_memfree(mod->core_layout.base);
+ vfree_exec(mod->core_layout.base);
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
vfree(mod->data_layout.base);
#endif
@@ -1321,7 +1322,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
ksym = resolve_symbol_wait(mod, info, name);
/* Ok if resolved. */
if (ksym && !IS_ERR(ksym)) {
- sym[i].st_value = kernel_symbol_value(ksym);
+ unsigned long val = kernel_symbol_value(ksym);
+ bpf_arch_text_copy(&sym[i].st_value, &val, sizeof(val));
break;
}
@@ -1342,7 +1344,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
secbase = (unsigned long)mod_percpu(mod);
else
secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
- sym[i].st_value += secbase;
+ secbase += sym[i].st_value;
+ bpf_arch_text_copy(&sym[i].st_value, &secbase, sizeof(secbase));
break;
}
}
@@ -2123,7 +2126,7 @@ static int move_module(struct module *mod, struct load_info *info)
void *ptr;
/* Do the allocs. */
- ptr = module_alloc(mod->core_layout.size);
+ ptr = vmalloc_exec(mod->core_layout.size, PAGE_SIZE);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
@@ -2133,7 +2136,7 @@ static int move_module(struct module *mod, struct load_info *info)
if (!ptr)
return -ENOMEM;
- memset(ptr, 0, mod->core_layout.size);
+/* memset(ptr, 0, mod->core_layout.size); */
mod->core_layout.base = ptr;
if (mod->init_layout.size) {
@@ -2146,7 +2149,7 @@ static int move_module(struct module *mod, struct load_info *info)
*/
kmemleak_ignore(ptr);
if (!ptr) {
- module_memfree(mod->core_layout.base);
+ vfree_exec(mod->core_layout.base);
return -ENOMEM;
}
memset(ptr, 0, mod->init_layout.size);
@@ -2156,7 +2159,7 @@ static int move_module(struct module *mod, struct load_info *info)
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
/* Do the allocs. */
- ptr = vzalloc(mod->data_layout.size);
+ ptr = module_alloc(mod->data_layout.size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
@@ -2164,7 +2167,7 @@ static int move_module(struct module *mod, struct load_info *info)
*/
kmemleak_not_leak(ptr);
if (!ptr) {
- module_memfree(mod->core_layout.base);
+ vfree_exec(mod->core_layout.base);
module_memfree(mod->init_layout.base);
return -ENOMEM;
}
@@ -2189,7 +2192,7 @@ static int move_module(struct module *mod, struct load_info *info)
dest = mod->core_layout.base + shdr->sh_entsize;
if (shdr->sh_type != SHT_NOBITS)
- memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
+ bpf_arch_text_copy(dest, (void *)shdr->sh_addr, shdr->sh_size);
/* Update sh_addr to point to copy in image. */
shdr->sh_addr = (unsigned long)dest;
pr_debug("\t0x%lx %s\n",
@@ -2345,7 +2348,7 @@ static void module_deallocate(struct module *mod, struct load_info *info)
percpu_modfree(mod);
module_arch_freeing_init(mod);
module_memfree(mod->init_layout.base);
- module_memfree(mod->core_layout.base);
+ vfree_exec(mod->core_layout.base);
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
vfree(mod->data_layout.base);
#endif
diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c
index 14fbea66f12f..d392eb7bf574 100644
--- a/kernel/module/strict_rwx.c
+++ b/kernel/module/strict_rwx.c
@@ -85,7 +85,6 @@ void module_enable_x(const struct module *mod)
!PAGE_ALIGNED(mod->init_layout.base))
return;
- frob_text(&mod->core_layout, set_memory_x);
frob_text(&mod->init_layout, set_memory_x);
}
@@ -98,9 +97,7 @@ void module_enable_ro(const struct module *mod, bool after_init)
return;
#endif
- set_vm_flush_reset_perms(mod->core_layout.base);
set_vm_flush_reset_perms(mod->init_layout.base);
- frob_text(&mod->core_layout, set_memory_ro);
frob_rodata(&mod->data_layout, set_memory_ro);
frob_text(&mod->init_layout, set_memory_ro);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 439e2ab6905e..818418d5b853 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3142,6 +3142,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
if (mod)
rec_flags |= FTRACE_FL_DISABLED;
+ ftrace_arch_code_modify_prepare();
for (pg = new_pgs; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
@@ -3163,7 +3164,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
update_cnt++;
}
}
-
+ ftrace_arch_code_modify_post_process();
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
ftrace_update_tot_cnt += update_cnt;
--
2.30.2
next prev parent reply other threads:[~2022-10-07 23:46 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-07 23:43 [RFC v2 0/4] vmalloc_exec for modules and BPF programs Song Liu
2022-10-07 23:43 ` [RFC v2 1/4] vmalloc: introduce vmalloc_exec and vfree_exec Song Liu
2022-10-10 18:13 ` Edgecombe, Rick P
2022-10-10 19:04 ` Song Liu
2022-10-10 19:59 ` Edgecombe, Rick P
2022-10-07 23:43 ` [RFC v2 2/4] bpf: use vmalloc_exec Song Liu
2022-10-07 23:43 ` Song Liu [this message]
2022-10-14 3:48 ` [RFC v2 3/4] modules, x86: use vmalloc_exec for module core Aaron Lu
2022-10-14 6:07 ` Song Liu
[not found] ` <fb7a38faa52ce0f35061473c9c8b56394a726e59.camel@intel.com>
2022-10-14 18:26 ` Song Liu
2022-10-07 23:43 ` [RFC v2 4/4] vmalloc_exec: share a huge page with kernel text Song Liu
2022-10-10 18:32 ` Edgecombe, Rick P
2022-10-10 19:08 ` Song Liu
2022-10-10 20:09 ` Edgecombe, Rick P
[not found] ` <2B66E2E7-7D32-418C-9DFD-1E17180300B4@fb.com>
2022-10-11 20:40 ` Edgecombe, Rick P
2022-10-12 5:37 ` Song Liu
2022-10-12 18:38 ` Edgecombe, Rick P
2022-10-12 19:01 ` Song Liu
2022-10-08 0:17 ` [RFC v2 0/4] vmalloc_exec for modules and BPF programs Song Liu
2022-10-12 19:03 ` Song Liu
2022-10-17 7:26 ` Christoph Hellwig
2022-10-17 16:23 ` Song Liu
2022-10-18 14:50 ` Christoph Hellwig
2022-10-18 15:05 ` Song Liu
2022-10-18 15:40 ` Christoph Hellwig
2022-10-18 15:40 ` Christoph Hellwig
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221007234315.2877365-4-song@kernel.org \
--to=song@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=dave.hansen@intel.com \
--cc=hch@lst.de \
--cc=kernel-team@fb.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=peterz@infradead.org \
--cc=rick.p.edgecombe@intel.com \
--cc=urezki@gmail.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).