linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Song Liu <song@kernel.org>
To: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>
Cc: <akpm@linux-foundation.org>, <x86@kernel.org>,
	<peterz@infradead.org>, <hch@lst.de>, <kernel-team@fb.com>,
	<rick.p.edgecombe@intel.com>, <mcgrof@kernel.org>,
	<dave.hansen@intel.com>, Song Liu <song@kernel.org>
Subject: [RFC 3/5] modules, x86: use vmalloc_exec for module core
Date: Thu, 18 Aug 2022 15:42:16 -0700	[thread overview]
Message-ID: <20220818224218.2399791-4-song@kernel.org> (raw)
In-Reply-To: <20220818224218.2399791-1-song@kernel.org>

This is a prototype that allows modules to share 2MB text pages with other
modules and BPF programs.

Current version only covers core_layout.
---
 arch/x86/Kconfig              |  1 +
 arch/x86/kernel/alternative.c | 30 ++++++++++++++++++++++++------
 arch/x86/kernel/module.c      |  1 +
 kernel/module/main.c          | 23 +++++++++++++----------
 kernel/module/strict_rwx.c    |  3 ---
 kernel/trace/ftrace.c         |  3 ++-
 6 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fb5900e2c29a..e932bceb7f23 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -91,6 +91,7 @@ config X86
 	select ARCH_HAS_SET_DIRECT_MAP
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
+	select ARCH_WANTS_MODULES_DATA_IN_VMALLOC	if X86_64
 	select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
 	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 62f6b8b7c4a5..c83888ec232b 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 
 		DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
 
-		text_poke_early(instr, insn_buff, insn_buff_sz);
+		if (system_state < SYSTEM_RUNNING) {
+			text_poke_early(instr, insn_buff, insn_buff_sz);
+		} else {
+			mutex_lock(&text_mutex);
+			text_poke(instr, insn_buff, insn_buff_sz);
+			mutex_unlock(&text_mutex);
+		}
 
 next:
 		optimize_nops(instr, a->instrlen);
@@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
 			optimize_nops(bytes, len);
 			DUMP_BYTES(((u8*)addr),  len, "%px: orig: ", addr);
 			DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
-			text_poke_early(addr, bytes, len);
+			if (system_state == SYSTEM_BOOTING) {
+				text_poke_early(addr, bytes, len);
+			} else {
+				mutex_lock(&text_mutex);
+				text_poke(addr, bytes, len);
+				mutex_unlock(&text_mutex);
+			}
 		}
 	}
 }
@@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
 		if (len == insn.length) {
 			DUMP_BYTES(((u8*)addr),  len, "%px: orig: ", addr);
 			DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
-			text_poke_early(addr, bytes, len);
+			if (unlikely(system_state == SYSTEM_BOOTING)) {
+				text_poke_early(addr, bytes, len);
+			} else {
+				mutex_lock(&text_mutex);
+				text_poke(addr, bytes, len);
+				mutex_unlock(&text_mutex);
+			}
 		}
 	}
 }
@@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
 		 */
 		DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
 		DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
-		text_poke_early(addr, &poison, 4);
+		text_poke(addr, &poison, 4);
 	}
 }
 
@@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
 
 		/* Pad the rest with nops */
 		add_nops(insn_buff + used, p->len - used);
-		text_poke_early(p->instr, insn_buff, p->len);
+		text_poke(p->instr, insn_buff, p->len);
 	}
 }
 extern struct paravirt_patch_site __start_parainstructions[],
@@ -1698,7 +1716,7 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *
 	struct text_poke_loc tp;
 
 	if (unlikely(system_state == SYSTEM_BOOTING)) {
-		text_poke_early(addr, opcode, len);
+		text_poke(addr, opcode, len);
 		return;
 	}
 
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 100446ffdc1d..570af623e28f 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -229,6 +229,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	bool early = me->state == MODULE_STATE_UNFORMED;
 	void *(*write)(void *, const void *, size_t) = memcpy;
 
+	early = false;
 	if (!early) {
 		write = text_poke;
 		mutex_lock(&text_mutex);
diff --git a/kernel/module/main.c b/kernel/module/main.c
index 57fc2821be63..c51dafa1089a 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -53,6 +53,7 @@
 #include <linux/bsearch.h>
 #include <linux/dynamic_debug.h>
 #include <linux/audit.h>
+#include <linux/bpf.h>
 #include <uapi/linux/module.h>
 #include "internal.h"
 
@@ -1198,7 +1199,7 @@ static void free_module(struct module *mod)
 	lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size);
 
 	/* Finally, free the core (containing the module structure) */
-	module_memfree(mod->core_layout.base);
+	vfree_exec(mod->core_layout.base);
 #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
 	vfree(mod->data_layout.base);
 #endif
@@ -1316,7 +1317,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 			ksym = resolve_symbol_wait(mod, info, name);
 			/* Ok if resolved.  */
 			if (ksym && !IS_ERR(ksym)) {
-				sym[i].st_value = kernel_symbol_value(ksym);
+				unsigned long val = kernel_symbol_value(ksym);
+				bpf_arch_text_copy(&sym[i].st_value, &val, sizeof(val));
 				break;
 			}
 
@@ -1337,7 +1339,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 				secbase = (unsigned long)mod_percpu(mod);
 			else
 				secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
-			sym[i].st_value += secbase;
+			secbase += sym[i].st_value;
+			bpf_arch_text_copy(&sym[i].st_value, &secbase, sizeof(secbase));
 			break;
 		}
 	}
@@ -2118,7 +2121,7 @@ static int move_module(struct module *mod, struct load_info *info)
 	void *ptr;
 
 	/* Do the allocs. */
-	ptr = module_alloc(mod->core_layout.size);
+	ptr = vmalloc_exec(mod->core_layout.size, PAGE_SIZE);
 	/*
 	 * The pointer to this block is stored in the module structure
 	 * which is inside the block. Just mark it as not being a
@@ -2128,7 +2131,7 @@ static int move_module(struct module *mod, struct load_info *info)
 	if (!ptr)
 		return -ENOMEM;
 
-	memset(ptr, 0, mod->core_layout.size);
+/* 	memset(ptr, 0, mod->core_layout.size); */
 	mod->core_layout.base = ptr;
 
 	if (mod->init_layout.size) {
@@ -2141,7 +2144,7 @@ static int move_module(struct module *mod, struct load_info *info)
 		 */
 		kmemleak_ignore(ptr);
 		if (!ptr) {
-			module_memfree(mod->core_layout.base);
+			vfree_exec(mod->core_layout.base);
 			return -ENOMEM;
 		}
 		memset(ptr, 0, mod->init_layout.size);
@@ -2151,7 +2154,7 @@ static int move_module(struct module *mod, struct load_info *info)
 
 #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
 	/* Do the allocs. */
-	ptr = vmalloc(mod->data_layout.size);
+	ptr = module_alloc(mod->data_layout.size);
 	/*
 	 * The pointer to this block is stored in the module structure
 	 * which is inside the block. Just mark it as not being a
@@ -2159,7 +2162,7 @@ static int move_module(struct module *mod, struct load_info *info)
 	 */
 	kmemleak_not_leak(ptr);
 	if (!ptr) {
-		module_memfree(mod->core_layout.base);
+		vfree_exec(mod->core_layout.base);
 		module_memfree(mod->init_layout.base);
 		return -ENOMEM;
 	}
@@ -2185,7 +2188,7 @@ static int move_module(struct module *mod, struct load_info *info)
 			dest = mod->core_layout.base + shdr->sh_entsize;
 
 		if (shdr->sh_type != SHT_NOBITS)
-			memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
+			bpf_arch_text_copy(dest, (void *)shdr->sh_addr, shdr->sh_size);
 		/* Update sh_addr to point to copy in image. */
 		shdr->sh_addr = (unsigned long)dest;
 		pr_debug("\t0x%lx %s\n",
@@ -2341,7 +2344,7 @@ static void module_deallocate(struct module *mod, struct load_info *info)
 	percpu_modfree(mod);
 	module_arch_freeing_init(mod);
 	module_memfree(mod->init_layout.base);
-	module_memfree(mod->core_layout.base);
+	vfree_exec(mod->core_layout.base);
 #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
 	vfree(mod->data_layout.base);
 #endif
diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c
index 14fbea66f12f..d392eb7bf574 100644
--- a/kernel/module/strict_rwx.c
+++ b/kernel/module/strict_rwx.c
@@ -85,7 +85,6 @@ void module_enable_x(const struct module *mod)
 	    !PAGE_ALIGNED(mod->init_layout.base))
 		return;
 
-	frob_text(&mod->core_layout, set_memory_x);
 	frob_text(&mod->init_layout, set_memory_x);
 }
 
@@ -98,9 +97,7 @@ void module_enable_ro(const struct module *mod, bool after_init)
 		return;
 #endif
 
-	set_vm_flush_reset_perms(mod->core_layout.base);
 	set_vm_flush_reset_perms(mod->init_layout.base);
-	frob_text(&mod->core_layout, set_memory_ro);
 
 	frob_rodata(&mod->data_layout, set_memory_ro);
 	frob_text(&mod->init_layout, set_memory_ro);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index bc921a3f7ea8..8cd31dc9ac84 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3177,6 +3177,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 	if (mod)
 		rec_flags |= FTRACE_FL_DISABLED;
 
+	ftrace_arch_code_modify_prepare();
 	for (pg = new_pgs; pg; pg = pg->next) {
 
 		for (i = 0; i < pg->index; i++) {
@@ -3198,7 +3199,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 			update_cnt++;
 		}
 	}
-
+	ftrace_arch_code_modify_post_process();
 	stop = ftrace_now(raw_smp_processor_id());
 	ftrace_update_time = stop - start;
 	ftrace_update_tot_cnt += update_cnt;
-- 
2.30.2



  parent reply	other threads:[~2022-08-18 23:14 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-18 22:42 [RFC 0/5] vmalloc_exec for modules and BPF programs Song Liu
2022-08-18 22:42 ` [RFC 1/5] vmalloc: introduce vmalloc_exec and vfree_exec Song Liu
2022-10-06 23:15   ` Luis Chamberlain
2022-10-07  6:39     ` Song Liu
2022-08-18 22:42 ` [RFC 2/5] bpf: use vmalloc_exec Song Liu
2022-08-18 22:42 ` Song Liu [this message]
2022-10-06 23:38   ` [RFC 3/5] modules, x86: use vmalloc_exec for module core Luis Chamberlain
2022-10-07  6:46     ` Song Liu
2022-08-18 22:42 ` [RFC 4/5] vmalloc_exec: share a huge page with kernel text Song Liu
2022-10-06 23:44   ` Luis Chamberlain
2022-10-07  6:53     ` Song Liu
2022-08-18 22:42 ` [RFC 5/5] vmalloc: vfree_exec: free unused vm_struct Song Liu
2022-08-22 15:46 ` [RFC 0/5] vmalloc_exec for modules and BPF programs Song Liu
2022-08-22 16:34   ` Peter Zijlstra
2022-08-22 16:56     ` Song Liu
2022-08-23  5:42       ` Peter Zijlstra
2022-08-23  6:39         ` Christophe Leroy
2022-08-23  6:57           ` Song Liu
2022-08-23  6:55         ` Song Liu
2022-08-24 17:06       ` Song Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220818224218.2399791-4-song@kernel.org \
    --to=song@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@intel.com \
    --cc=hch@lst.de \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mcgrof@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).