From: Song Liu <song@kernel.org>
To: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>
Cc: <akpm@linux-foundation.org>, <x86@kernel.org>,
<peterz@infradead.org>, <hch@lst.de>, <kernel-team@fb.com>,
<rick.p.edgecombe@intel.com>, <mcgrof@kernel.org>,
<dave.hansen@intel.com>, Song Liu <song@kernel.org>
Subject: [RFC 3/5] modules, x86: use vmalloc_exec for module core
Date: Thu, 18 Aug 2022 15:42:16 -0700 [thread overview]
Message-ID: <20220818224218.2399791-4-song@kernel.org> (raw)
In-Reply-To: <20220818224218.2399791-1-song@kernel.org>
This is a prototype that allows modules to share 2MB text pages with other
modules and BPF programs.
Current version only covers core_layout.
---
arch/x86/Kconfig | 1 +
arch/x86/kernel/alternative.c | 30 ++++++++++++++++++++++++------
arch/x86/kernel/module.c | 1 +
kernel/module/main.c | 23 +++++++++++++----------
kernel/module/strict_rwx.c | 3 ---
kernel/trace/ftrace.c | 3 ++-
6 files changed, 41 insertions(+), 20 deletions(-)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fb5900e2c29a..e932bceb7f23 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -91,6 +91,7 @@ config X86
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
+ select ARCH_WANTS_MODULES_DATA_IN_VMALLOC if X86_64
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_UBSAN_SANITIZE_ALL
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 62f6b8b7c4a5..c83888ec232b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
- text_poke_early(instr, insn_buff, insn_buff_sz);
+ if (system_state < SYSTEM_RUNNING) {
+ text_poke_early(instr, insn_buff, insn_buff_sz);
+ } else {
+ mutex_lock(&text_mutex);
+ text_poke(instr, insn_buff, insn_buff_sz);
+ mutex_unlock(&text_mutex);
+ }
next:
optimize_nops(instr, a->instrlen);
@@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
optimize_nops(bytes, len);
DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
- text_poke_early(addr, bytes, len);
+ if (system_state == SYSTEM_BOOTING) {
+ text_poke_early(addr, bytes, len);
+ } else {
+ mutex_lock(&text_mutex);
+ text_poke(addr, bytes, len);
+ mutex_unlock(&text_mutex);
+ }
}
}
}
@@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
if (len == insn.length) {
DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
- text_poke_early(addr, bytes, len);
+ if (unlikely(system_state == SYSTEM_BOOTING)) {
+ text_poke_early(addr, bytes, len);
+ } else {
+ mutex_lock(&text_mutex);
+ text_poke(addr, bytes, len);
+ mutex_unlock(&text_mutex);
+ }
}
}
}
@@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
*/
DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
- text_poke_early(addr, &poison, 4);
+ text_poke(addr, &poison, 4);
}
}
@@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
/* Pad the rest with nops */
add_nops(insn_buff + used, p->len - used);
- text_poke_early(p->instr, insn_buff, p->len);
+ text_poke(p->instr, insn_buff, p->len);
}
}
extern struct paravirt_patch_site __start_parainstructions[],
@@ -1698,7 +1716,7 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *
struct text_poke_loc tp;
if (unlikely(system_state == SYSTEM_BOOTING)) {
- text_poke_early(addr, opcode, len);
+ text_poke(addr, opcode, len);
return;
}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 100446ffdc1d..570af623e28f 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -229,6 +229,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
bool early = me->state == MODULE_STATE_UNFORMED;
void *(*write)(void *, const void *, size_t) = memcpy;
+ early = false;
if (!early) {
write = text_poke;
mutex_lock(&text_mutex);
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 57fc2821be63..c51dafa1089a 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -53,6 +53,7 @@
#include <linux/bsearch.h>
#include <linux/dynamic_debug.h>
#include <linux/audit.h>
+#include <linux/bpf.h>
#include <uapi/linux/module.h>
#include "internal.h"
@@ -1198,7 +1199,7 @@ static void free_module(struct module *mod)
lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size);
/* Finally, free the core (containing the module structure) */
- module_memfree(mod->core_layout.base);
+ vfree_exec(mod->core_layout.base);
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
vfree(mod->data_layout.base);
#endif
@@ -1316,7 +1317,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
ksym = resolve_symbol_wait(mod, info, name);
/* Ok if resolved. */
if (ksym && !IS_ERR(ksym)) {
- sym[i].st_value = kernel_symbol_value(ksym);
+ unsigned long val = kernel_symbol_value(ksym);
+ bpf_arch_text_copy(&sym[i].st_value, &val, sizeof(val));
break;
}
@@ -1337,7 +1339,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
secbase = (unsigned long)mod_percpu(mod);
else
secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
- sym[i].st_value += secbase;
+ secbase += sym[i].st_value;
+ bpf_arch_text_copy(&sym[i].st_value, &secbase, sizeof(secbase));
break;
}
}
@@ -2118,7 +2121,7 @@ static int move_module(struct module *mod, struct load_info *info)
void *ptr;
/* Do the allocs. */
- ptr = module_alloc(mod->core_layout.size);
+ ptr = vmalloc_exec(mod->core_layout.size, PAGE_SIZE);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
@@ -2128,7 +2131,7 @@ static int move_module(struct module *mod, struct load_info *info)
if (!ptr)
return -ENOMEM;
- memset(ptr, 0, mod->core_layout.size);
+/* memset(ptr, 0, mod->core_layout.size); */
mod->core_layout.base = ptr;
if (mod->init_layout.size) {
@@ -2141,7 +2144,7 @@ static int move_module(struct module *mod, struct load_info *info)
*/
kmemleak_ignore(ptr);
if (!ptr) {
- module_memfree(mod->core_layout.base);
+ vfree_exec(mod->core_layout.base);
return -ENOMEM;
}
memset(ptr, 0, mod->init_layout.size);
@@ -2151,7 +2154,7 @@ static int move_module(struct module *mod, struct load_info *info)
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
/* Do the allocs. */
- ptr = vmalloc(mod->data_layout.size);
+ ptr = module_alloc(mod->data_layout.size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
@@ -2159,7 +2162,7 @@ static int move_module(struct module *mod, struct load_info *info)
*/
kmemleak_not_leak(ptr);
if (!ptr) {
- module_memfree(mod->core_layout.base);
+ vfree_exec(mod->core_layout.base);
module_memfree(mod->init_layout.base);
return -ENOMEM;
}
@@ -2185,7 +2188,7 @@ static int move_module(struct module *mod, struct load_info *info)
dest = mod->core_layout.base + shdr->sh_entsize;
if (shdr->sh_type != SHT_NOBITS)
- memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
+ bpf_arch_text_copy(dest, (void *)shdr->sh_addr, shdr->sh_size);
/* Update sh_addr to point to copy in image. */
shdr->sh_addr = (unsigned long)dest;
pr_debug("\t0x%lx %s\n",
@@ -2341,7 +2344,7 @@ static void module_deallocate(struct module *mod, struct load_info *info)
percpu_modfree(mod);
module_arch_freeing_init(mod);
module_memfree(mod->init_layout.base);
- module_memfree(mod->core_layout.base);
+ vfree_exec(mod->core_layout.base);
#ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
vfree(mod->data_layout.base);
#endif
diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c
index 14fbea66f12f..d392eb7bf574 100644
--- a/kernel/module/strict_rwx.c
+++ b/kernel/module/strict_rwx.c
@@ -85,7 +85,6 @@ void module_enable_x(const struct module *mod)
!PAGE_ALIGNED(mod->init_layout.base))
return;
- frob_text(&mod->core_layout, set_memory_x);
frob_text(&mod->init_layout, set_memory_x);
}
@@ -98,9 +97,7 @@ void module_enable_ro(const struct module *mod, bool after_init)
return;
#endif
- set_vm_flush_reset_perms(mod->core_layout.base);
set_vm_flush_reset_perms(mod->init_layout.base);
- frob_text(&mod->core_layout, set_memory_ro);
frob_rodata(&mod->data_layout, set_memory_ro);
frob_text(&mod->init_layout, set_memory_ro);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bc921a3f7ea8..8cd31dc9ac84 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3177,6 +3177,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
if (mod)
rec_flags |= FTRACE_FL_DISABLED;
+ ftrace_arch_code_modify_prepare();
for (pg = new_pgs; pg; pg = pg->next) {
for (i = 0; i < pg->index; i++) {
@@ -3198,7 +3199,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
update_cnt++;
}
}
-
+ ftrace_arch_code_modify_post_process();
stop = ftrace_now(raw_smp_processor_id());
ftrace_update_time = stop - start;
ftrace_update_tot_cnt += update_cnt;
--
2.30.2
next prev parent reply other threads:[~2022-08-18 23:14 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-08-18 22:42 [RFC 0/5] vmalloc_exec for modules and BPF programs Song Liu
2022-08-18 22:42 ` [RFC 1/5] vmalloc: introduce vmalloc_exec and vfree_exec Song Liu
2022-10-06 23:15 ` Luis Chamberlain
2022-10-07 6:39 ` Song Liu
2022-08-18 22:42 ` [RFC 2/5] bpf: use vmalloc_exec Song Liu
2022-08-18 22:42 ` Song Liu [this message]
2022-10-06 23:38 ` [RFC 3/5] modules, x86: use vmalloc_exec for module core Luis Chamberlain
2022-10-07 6:46 ` Song Liu
2022-08-18 22:42 ` [RFC 4/5] vmalloc_exec: share a huge page with kernel text Song Liu
2022-10-06 23:44 ` Luis Chamberlain
2022-10-07 6:53 ` Song Liu
2022-08-18 22:42 ` [RFC 5/5] vmalloc: vfree_exec: free unused vm_struct Song Liu
2022-08-22 15:46 ` [RFC 0/5] vmalloc_exec for modules and BPF programs Song Liu
2022-08-22 16:34 ` Peter Zijlstra
2022-08-22 16:56 ` Song Liu
2022-08-23 5:42 ` Peter Zijlstra
2022-08-23 6:39 ` Christophe Leroy
2022-08-23 6:57 ` Song Liu
2022-08-23 6:55 ` Song Liu
2022-08-24 17:06 ` Song Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220818224218.2399791-4-song@kernel.org \
--to=song@kernel.org \
--cc=akpm@linux-foundation.org \
--cc=dave.hansen@intel.com \
--cc=hch@lst.de \
--cc=kernel-team@fb.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mcgrof@kernel.org \
--cc=peterz@infradead.org \
--cc=rick.p.edgecombe@intel.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).