linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Song Liu <song@kernel.org>
To: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>
Cc: <akpm@linux-foundation.org>, <x86@kernel.org>,
	<peterz@infradead.org>, <hch@lst.de>, <kernel-team@fb.com>,
	<rick.p.edgecombe@intel.com>, <dave.hansen@intel.com>,
	<urezki@gmail.com>, Song Liu <song@kernel.org>
Subject: [RFC v2 3/4] modules, x86: use vmalloc_exec for module core
Date: Fri, 7 Oct 2022 16:43:14 -0700	[thread overview]
Message-ID: <20221007234315.2877365-4-song@kernel.org> (raw)
In-Reply-To: <20221007234315.2877365-1-song@kernel.org>

This is a prototype that allows modules to share 2MB text pages with other
modules and BPF programs.

Current version only covers core_layout.
---
 arch/x86/Kconfig              |  1 +
 arch/x86/kernel/alternative.c | 30 ++++++++++++++++++++++++------
 arch/x86/kernel/module.c      |  1 +
 kernel/module/main.c          | 23 +++++++++++++----------
 kernel/module/strict_rwx.c    |  3 ---
 kernel/trace/ftrace.c         |  3 ++-
 6 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f9920f1341c8..0b1ea05a1da6 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -91,6 +91,7 @@ config X86
 	select ARCH_HAS_SET_DIRECT_MAP
 	select ARCH_HAS_STRICT_KERNEL_RWX
 	select ARCH_HAS_STRICT_MODULE_RWX
+	select ARCH_WANTS_MODULES_DATA_IN_VMALLOC	if X86_64
 	select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
 	select ARCH_HAS_SYSCALL_WRAPPER
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 4f3204364caa..0e47a558c5bc 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -332,7 +332,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 
 		DUMP_BYTES(insn_buff, insn_buff_sz, "%px: final_insn: ", instr);
 
-		text_poke_early(instr, insn_buff, insn_buff_sz);
+		if (system_state < SYSTEM_RUNNING) {
+			text_poke_early(instr, insn_buff, insn_buff_sz);
+		} else {
+			mutex_lock(&text_mutex);
+			text_poke(instr, insn_buff, insn_buff_sz);
+			mutex_unlock(&text_mutex);
+		}
 
 next:
 		optimize_nops(instr, a->instrlen);
@@ -503,7 +509,13 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
 			optimize_nops(bytes, len);
 			DUMP_BYTES(((u8*)addr),  len, "%px: orig: ", addr);
 			DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
-			text_poke_early(addr, bytes, len);
+			if (system_state == SYSTEM_BOOTING) {
+				text_poke_early(addr, bytes, len);
+			} else {
+				mutex_lock(&text_mutex);
+				text_poke(addr, bytes, len);
+				mutex_unlock(&text_mutex);
+			}
 		}
 	}
 }
@@ -568,7 +580,13 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end)
 		if (len == insn.length) {
 			DUMP_BYTES(((u8*)addr),  len, "%px: orig: ", addr);
 			DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
-			text_poke_early(addr, bytes, len);
+			if (unlikely(system_state == SYSTEM_BOOTING)) {
+				text_poke_early(addr, bytes, len);
+			} else {
+				mutex_lock(&text_mutex);
+				text_poke(addr, bytes, len);
+				mutex_unlock(&text_mutex);
+			}
 		}
 	}
 }
@@ -609,7 +627,7 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
 		 */
 		DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
 		DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
-		text_poke_early(addr, &poison, 4);
+		text_poke(addr, &poison, 4);
 	}
 }
 
@@ -791,7 +809,7 @@ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
 
 		/* Pad the rest with nops */
 		add_nops(insn_buff + used, p->len - used);
-		text_poke_early(p->instr, insn_buff, p->len);
+		text_poke(p->instr, insn_buff, p->len);
 	}
 }
 extern struct paravirt_patch_site __start_parainstructions[],
@@ -1699,7 +1717,7 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *
 	struct text_poke_loc tp;
 
 	if (unlikely(system_state == SYSTEM_BOOTING)) {
-		text_poke_early(addr, opcode, len);
+		text_poke(addr, opcode, len);
 		return;
 	}
 
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index b1abf663417c..577e31647dc4 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -229,6 +229,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	bool early = me->state == MODULE_STATE_UNFORMED;
 	void *(*write)(void *, const void *, size_t) = memcpy;
 
+	early = false;
 	if (!early) {
 		write = text_poke;
 		mutex_lock(&text_mutex);
diff --git a/kernel/module/main.c b/kernel/module/main.c
index a4e4d84b6f4e..b44806e31a56 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -53,6 +53,7 @@
 #include <linux/bsearch.h>
 #include <linux/dynamic_debug.h>
 #include <linux/audit.h>
+#include <linux/bpf.h>
 #include <uapi/linux/module.h>
 #include "internal.h"
 
@@ -1203,7 +1204,7 @@ static void free_module(struct module *mod)
 	lockdep_free_key_range(mod->data_layout.base, mod->data_layout.size);
 
 	/* Finally, free the core (containing the module structure) */
-	module_memfree(mod->core_layout.base);
+	vfree_exec(mod->core_layout.base);
 #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
 	vfree(mod->data_layout.base);
 #endif
@@ -1321,7 +1322,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 			ksym = resolve_symbol_wait(mod, info, name);
 			/* Ok if resolved.  */
 			if (ksym && !IS_ERR(ksym)) {
-				sym[i].st_value = kernel_symbol_value(ksym);
+				unsigned long val = kernel_symbol_value(ksym);
+				bpf_arch_text_copy(&sym[i].st_value, &val, sizeof(val));
 				break;
 			}
 
@@ -1342,7 +1344,8 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 				secbase = (unsigned long)mod_percpu(mod);
 			else
 				secbase = info->sechdrs[sym[i].st_shndx].sh_addr;
-			sym[i].st_value += secbase;
+			secbase += sym[i].st_value;
+			bpf_arch_text_copy(&sym[i].st_value, &secbase, sizeof(secbase));
 			break;
 		}
 	}
@@ -2123,7 +2126,7 @@ static int move_module(struct module *mod, struct load_info *info)
 	void *ptr;
 
 	/* Do the allocs. */
-	ptr = module_alloc(mod->core_layout.size);
+	ptr = vmalloc_exec(mod->core_layout.size, PAGE_SIZE);
 	/*
 	 * The pointer to this block is stored in the module structure
 	 * which is inside the block. Just mark it as not being a
@@ -2133,7 +2136,7 @@ static int move_module(struct module *mod, struct load_info *info)
 	if (!ptr)
 		return -ENOMEM;
 
-	memset(ptr, 0, mod->core_layout.size);
+/* 	memset(ptr, 0, mod->core_layout.size); */
 	mod->core_layout.base = ptr;
 
 	if (mod->init_layout.size) {
@@ -2146,7 +2149,7 @@ static int move_module(struct module *mod, struct load_info *info)
 		 */
 		kmemleak_ignore(ptr);
 		if (!ptr) {
-			module_memfree(mod->core_layout.base);
+			vfree_exec(mod->core_layout.base);
 			return -ENOMEM;
 		}
 		memset(ptr, 0, mod->init_layout.size);
@@ -2156,7 +2159,7 @@ static int move_module(struct module *mod, struct load_info *info)
 
 #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
 	/* Do the allocs. */
-	ptr = vzalloc(mod->data_layout.size);
+	ptr = module_alloc(mod->data_layout.size);
 	/*
 	 * The pointer to this block is stored in the module structure
 	 * which is inside the block. Just mark it as not being a
@@ -2164,7 +2167,7 @@ static int move_module(struct module *mod, struct load_info *info)
 	 */
 	kmemleak_not_leak(ptr);
 	if (!ptr) {
-		module_memfree(mod->core_layout.base);
+		vfree_exec(mod->core_layout.base);
 		module_memfree(mod->init_layout.base);
 		return -ENOMEM;
 	}
@@ -2189,7 +2192,7 @@ static int move_module(struct module *mod, struct load_info *info)
 			dest = mod->core_layout.base + shdr->sh_entsize;
 
 		if (shdr->sh_type != SHT_NOBITS)
-			memcpy(dest, (void *)shdr->sh_addr, shdr->sh_size);
+			bpf_arch_text_copy(dest, (void *)shdr->sh_addr, shdr->sh_size);
 		/* Update sh_addr to point to copy in image. */
 		shdr->sh_addr = (unsigned long)dest;
 		pr_debug("\t0x%lx %s\n",
@@ -2345,7 +2348,7 @@ static void module_deallocate(struct module *mod, struct load_info *info)
 	percpu_modfree(mod);
 	module_arch_freeing_init(mod);
 	module_memfree(mod->init_layout.base);
-	module_memfree(mod->core_layout.base);
+	vfree_exec(mod->core_layout.base);
 #ifdef CONFIG_ARCH_WANTS_MODULES_DATA_IN_VMALLOC
 	vfree(mod->data_layout.base);
 #endif
diff --git a/kernel/module/strict_rwx.c b/kernel/module/strict_rwx.c
index 14fbea66f12f..d392eb7bf574 100644
--- a/kernel/module/strict_rwx.c
+++ b/kernel/module/strict_rwx.c
@@ -85,7 +85,6 @@ void module_enable_x(const struct module *mod)
 	    !PAGE_ALIGNED(mod->init_layout.base))
 		return;
 
-	frob_text(&mod->core_layout, set_memory_x);
 	frob_text(&mod->init_layout, set_memory_x);
 }
 
@@ -98,9 +97,7 @@ void module_enable_ro(const struct module *mod, bool after_init)
 		return;
 #endif
 
-	set_vm_flush_reset_perms(mod->core_layout.base);
 	set_vm_flush_reset_perms(mod->init_layout.base);
-	frob_text(&mod->core_layout, set_memory_ro);
 
 	frob_rodata(&mod->data_layout, set_memory_ro);
 	frob_text(&mod->init_layout, set_memory_ro);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 439e2ab6905e..818418d5b853 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3142,6 +3142,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 	if (mod)
 		rec_flags |= FTRACE_FL_DISABLED;
 
+	ftrace_arch_code_modify_prepare();
 	for (pg = new_pgs; pg; pg = pg->next) {
 
 		for (i = 0; i < pg->index; i++) {
@@ -3163,7 +3164,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 			update_cnt++;
 		}
 	}
-
+	ftrace_arch_code_modify_post_process();
 	stop = ftrace_now(raw_smp_processor_id());
 	ftrace_update_time = stop - start;
 	ftrace_update_tot_cnt += update_cnt;
-- 
2.30.2



  parent reply	other threads:[~2022-10-07 23:46 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-07 23:43 [RFC v2 0/4] vmalloc_exec for modules and BPF programs Song Liu
2022-10-07 23:43 ` [RFC v2 1/4] vmalloc: introduce vmalloc_exec and vfree_exec Song Liu
2022-10-10 18:13   ` Edgecombe, Rick P
2022-10-10 19:04     ` Song Liu
2022-10-10 19:59       ` Edgecombe, Rick P
2022-10-07 23:43 ` [RFC v2 2/4] bpf: use vmalloc_exec Song Liu
2022-10-07 23:43 ` Song Liu [this message]
2022-10-14  3:48   ` [RFC v2 3/4] modules, x86: use vmalloc_exec for module core Aaron Lu
2022-10-14  6:07     ` Song Liu
     [not found]   ` <fb7a38faa52ce0f35061473c9c8b56394a726e59.camel@intel.com>
2022-10-14 18:26     ` Song Liu
2022-10-07 23:43 ` [RFC v2 4/4] vmalloc_exec: share a huge page with kernel text Song Liu
2022-10-10 18:32   ` Edgecombe, Rick P
2022-10-10 19:08     ` Song Liu
2022-10-10 20:09       ` Edgecombe, Rick P
     [not found]         ` <2B66E2E7-7D32-418C-9DFD-1E17180300B4@fb.com>
2022-10-11 20:40           ` Edgecombe, Rick P
2022-10-12  5:37             ` Song Liu
2022-10-12 18:38               ` Edgecombe, Rick P
2022-10-12 19:01                 ` Song Liu
2022-10-08  0:17 ` [RFC v2 0/4] vmalloc_exec for modules and BPF programs Song Liu
2022-10-12 19:03 ` Song Liu
2022-10-17  7:26 ` Christoph Hellwig
2022-10-17 16:23   ` Song Liu
2022-10-18 14:50     ` Christoph Hellwig
2022-10-18 15:05       ` Song Liu
2022-10-18 15:40         ` Christoph Hellwig
2022-10-18 15:40           ` Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221007234315.2877365-4-song@kernel.org \
    --to=song@kernel.org \
    --cc=akpm@linux-foundation.org \
    --cc=dave.hansen@intel.com \
    --cc=hch@lst.de \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=peterz@infradead.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=urezki@gmail.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).