All of lore.kernel.org
 help / color / mirror / Atom feed
From: Borislav Petkov <bp@alien8.de>
To: X86 ML <x86@kernel.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>,
	LKML <linux-kernel@vger.kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>
Subject: [PATCH 1/4] x86/alternatives: Use a temporary buffer when optimizing NOPs
Date: Tue, 30 Jan 2024 11:59:38 +0100	[thread overview]
Message-ID: <20240130105941.19707-2-bp@alien8.de> (raw)
In-Reply-To: <20240130105941.19707-1-bp@alien8.de>

From: "Borislav Petkov (AMD)" <bp@alien8.de>

Instead of optimizing NOPs inplace, use a temporary buffer like the
usual alternatives patching flow does. This obviates the need to grab
locks when patching, see

  6778977590da ("x86/alternatives: Disable interrupts and sync when optimizing NOPs in place")

While at it, add nomenclature definitions clarifying and simplifying the
naming of function-local variables in the alternatives code.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/alternative.c | 78 +++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index cc130b57542a..d633eb59f2b6 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -124,6 +124,20 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
 #endif
 };
 
+/*
+ * Nomenclature for variable names to simplify and clarify this code and ease
+ * any potential staring at it:
+ *
+ * @instr: source address of the original instructions in the kernel text as
+ * generated by the compiler.
+ *
+ * @buf: temporary buffer on which the patching operates. This buffer is
+ * eventually text-poked into the kernel image.
+ *
+ * @replacement: pointer to the opcodes which are replacing @instr, located in
+ * the .altinstr_replacement section.
+ */
+
 /*
  * Fill the buffer with a single effective instruction of size @len.
  *
@@ -133,28 +147,28 @@ const unsigned char * const x86_nops[ASM_NOP_MAX+1] =
  * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and
  * *jump* over instead of executing long and daft NOPs.
  */
-static void __init_or_module add_nop(u8 *instr, unsigned int len)
+static void __init_or_module add_nop(u8 *buf, unsigned int len)
 {
-	u8 *target = instr + len;
+	u8 *target = buf + len;
 
 	if (!len)
 		return;
 
 	if (len <= ASM_NOP_MAX) {
-		memcpy(instr, x86_nops[len], len);
+		memcpy(buf, x86_nops[len], len);
 		return;
 	}
 
 	if (len < 128) {
-		__text_gen_insn(instr, JMP8_INSN_OPCODE, instr, target, JMP8_INSN_SIZE);
-		instr += JMP8_INSN_SIZE;
+		__text_gen_insn(buf, JMP8_INSN_OPCODE, buf, target, JMP8_INSN_SIZE);
+		buf += JMP8_INSN_SIZE;
 	} else {
-		__text_gen_insn(instr, JMP32_INSN_OPCODE, instr, target, JMP32_INSN_SIZE);
-		instr += JMP32_INSN_SIZE;
+		__text_gen_insn(buf, JMP32_INSN_OPCODE, buf, target, JMP32_INSN_SIZE);
+		buf += JMP32_INSN_SIZE;
 	}
 
-	for (;instr < target; instr++)
-		*instr = INT3_INSN_OPCODE;
+	for (;buf < target; buf++)
+		*buf = INT3_INSN_OPCODE;
 }
 
 extern s32 __retpoline_sites[], __retpoline_sites_end[];
@@ -187,12 +201,12 @@ static bool insn_is_nop(struct insn *insn)
  * Find the offset of the first non-NOP instruction starting at @offset
  * but no further than @len.
  */
-static int skip_nops(u8 *instr, int offset, int len)
+static int skip_nops(u8 *buf, int offset, int len)
 {
 	struct insn insn;
 
 	for (; offset < len; offset += insn.length) {
-		if (insn_decode_kernel(&insn, &instr[offset]))
+		if (insn_decode_kernel(&insn, &buf[offset]))
 			break;
 
 		if (!insn_is_nop(&insn))
@@ -207,7 +221,7 @@ static int skip_nops(u8 *instr, int offset, int len)
  * to the end of the NOP sequence into a single NOP.
  */
 static bool __init_or_module
-__optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev, int *target)
+__optimize_nops(const u8 * const instr, u8 *buf, size_t len, struct insn *insn, int *next, int *prev, int *target)
 {
 	int i = *next - insn->length;
 
@@ -222,12 +236,12 @@ __optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev,
 	if (insn_is_nop(insn)) {
 		int nop = i;
 
-		*next = skip_nops(instr, *next, len);
+		*next = skip_nops(buf, *next, len);
 		if (*target && *next == *target)
 			nop = *prev;
 
-		add_nop(instr + nop, *next - nop);
-		DUMP_BYTES(ALT, instr, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, *next);
+		add_nop(buf + nop, *next - nop);
+		DUMP_BYTES(ALT, buf, len, "%px: [%d:%d) optimized NOPs: ", instr, nop, *next);
 		return true;
 	}
 
@@ -239,32 +253,22 @@ __optimize_nops(u8 *instr, size_t len, struct insn *insn, int *next, int *prev,
  * "noinline" to cause control flow change and thus invalidate I$ and
  * cause refetch after modification.
  */
-static void __init_or_module noinline optimize_nops(u8 *instr, size_t len)
+static void __init_or_module noinline optimize_nops(const u8 * const instr, u8 *buf, size_t len)
 {
 	int prev, target = 0;
 
 	for (int next, i = 0; i < len; i = next) {
 		struct insn insn;
 
-		if (insn_decode_kernel(&insn, &instr[i]))
+		if (insn_decode_kernel(&insn, &buf[i]))
 			return;
 
 		next = i + insn.length;
 
-		__optimize_nops(instr, len, &insn, &next, &prev, &target);
+		__optimize_nops(instr, buf, len, &insn, &next, &prev, &target);
 	}
 }
 
-static void __init_or_module noinline optimize_nops_inplace(u8 *instr, size_t len)
-{
-	unsigned long flags;
-
-	local_irq_save(flags);
-	optimize_nops(instr, len);
-	sync_core();
-	local_irq_restore(flags);
-}
-
 /*
  * In this context, "source" is where the instructions are placed in the
  * section .altinstr_replacement, for example during kernel build by the
@@ -336,7 +340,7 @@ bool need_reloc(unsigned long offset, u8 *src, size_t src_len)
 }
 
 static void __init_or_module noinline
-apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
+apply_relocation(const u8 * const instr, u8 *buf, size_t len, u8 *src, size_t src_len)
 {
 	int prev, target = 0;
 
@@ -348,7 +352,7 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
 
 		next = i + insn.length;
 
-		if (__optimize_nops(buf, len, &insn, &next, &prev, &target))
+		if (__optimize_nops(instr, buf, len, &insn, &next, &prev, &target))
 			continue;
 
 		switch (insn.opcode.bytes[0]) {
@@ -365,7 +369,7 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
 			if (need_reloc(next + insn.immediate.value, src, src_len)) {
 				apply_reloc(insn.immediate.nbytes,
 					    buf + i + insn_offset_immediate(&insn),
-					    src - dest);
+					    src - instr);
 			}
 
 			/*
@@ -373,7 +377,7 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
 			 */
 			if (insn.opcode.bytes[0] == JMP32_INSN_OPCODE) {
 				s32 imm = insn.immediate.value;
-				imm += src - dest;
+				imm += src - instr;
 				imm += JMP32_INSN_SIZE - JMP8_INSN_SIZE;
 				if ((imm >> 31) == (imm >> 7)) {
 					buf[i+0] = JMP8_INSN_OPCODE;
@@ -389,7 +393,7 @@ apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len)
 			if (need_reloc(next + insn.displacement.value, src, src_len)) {
 				apply_reloc(insn.displacement.nbytes,
 					    buf + i + insn_offset_displacement(&insn),
-					    src - dest);
+					    src - instr);
 			}
 		}
 	}
@@ -505,7 +509,9 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 		 *   patch if feature is *NOT* present.
 		 */
 		if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
-			optimize_nops_inplace(instr, a->instrlen);
+			memcpy(insn_buff, instr, a->instrlen);
+			optimize_nops(instr, insn_buff, a->instrlen);
+			text_poke_early(instr, insn_buff, a->instrlen);
 			continue;
 		}
 
@@ -527,7 +533,7 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start,
 		for (; insn_buff_sz < a->instrlen; insn_buff_sz++)
 			insn_buff[insn_buff_sz] = 0x90;
 
-		apply_relocation(insn_buff, a->instrlen, instr, replacement, a->replacementlen);
+		apply_relocation(instr, insn_buff, a->instrlen, replacement, a->replacementlen);
 
 		DUMP_BYTES(ALT, instr, a->instrlen, "%px:   old_insn: ", instr);
 		DUMP_BYTES(ALT, replacement, a->replacementlen, "%px:   rpl_insn: ", replacement);
@@ -762,7 +768,7 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
 
 		len = patch_retpoline(addr, &insn, bytes);
 		if (len == insn.length) {
-			optimize_nops(bytes, len);
+			optimize_nops(addr, bytes, len);
 			DUMP_BYTES(RETPOLINE, ((u8*)addr),  len, "%px: orig: ", addr);
 			DUMP_BYTES(RETPOLINE, ((u8*)bytes), len, "%px: repl: ", addr);
 			text_poke_early(addr, bytes, len);
-- 
2.43.0


  reply	other threads:[~2024-01-30 11:00 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-30 10:59 [PATCH 0/4] x86/alternatives: Do NOPs optimization on a temporary buffer Borislav Petkov
2024-01-30 10:59 ` Borislav Petkov [this message]
2024-02-13 15:36   ` [tip: x86/alternatives] x86/alternatives: Use a temporary buffer when optimizing NOPs tip-bot2 for Borislav Petkov (AMD)
2024-04-09 17:11   ` tip-bot2 for Borislav Petkov (AMD)
2024-01-30 10:59 ` [PATCH 2/4] x86/alternatives: Get rid of __optimize_nops() Borislav Petkov
2024-02-13 15:35   ` [tip: x86/alternatives] " tip-bot2 for Borislav Petkov (AMD)
2024-04-09 17:11   ` tip-bot2 for Borislav Petkov (AMD)
2024-01-30 10:59 ` [PATCH 3/4] x86/alternatives: Optimize optimize_nops() Borislav Petkov
2024-02-13 15:35   ` [tip: x86/alternatives] " tip-bot2 for Borislav Petkov (AMD)
2024-04-09 17:11   ` tip-bot2 for Borislav Petkov (AMD)
2024-01-30 10:59 ` [PATCH 4/4] x86/alternatives: Sort local vars in apply_alternatives() Borislav Petkov
2024-02-13 15:35   ` [tip: x86/alternatives] " tip-bot2 for Borislav Petkov (AMD)
2024-04-09 17:11   ` tip-bot2 for Borislav Petkov (AMD)
2024-01-31 16:17 ` [PATCH 0/4] x86/alternatives: Do NOPs optimization on a temporary buffer Paul Gortmaker
2024-01-31 16:25   ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240130105941.19707-2-bp@alien8.de \
    --to=bp@alien8.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=paul.gortmaker@windriver.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.