From: Feng Wu <feng.wu@intel.com>
To: xen-devel@lists.xen.org
Cc: Feng Wu <feng.wu@intel.com>,
ian.campbell@citrix.com, andrew.cooper3@citrix.com, tim@xen.org,
keir.xen@gmail.com, stefano.stabellini@citrix.com,
JBeulich@suse.com, boris.ostrovsky@oracle.com
Subject: [PATCH v3 4/5] x86: Port the basic alternative mechanism from Linux to Xen
Date: Fri, 30 May 2014 16:56:24 +0800 [thread overview]
Message-ID: <1401440185-11266-5-git-send-email-feng.wu@intel.com> (raw)
In-Reply-To: <1401440185-11266-1-git-send-email-feng.wu@intel.com>
This patch ports the basic alternative mechanism from Linux to Xen.
With this mechanism, we can patch code based on the CPU features.
Signed-off-by: Feng Wu <feng.wu@intel.com>
---
xen/arch/x86/Makefile | 1 +
xen/arch/x86/alternative.c | 213 ++++++++++++++++++++++++++++++++++++++
xen/arch/x86/setup.c | 3 +
xen/arch/x86/xen.lds.S | 15 +++
xen/include/asm-x86/alternative.h | 78 ++++++++++++++
5 files changed, 310 insertions(+)
create mode 100644 xen/arch/x86/alternative.c
create mode 100644 xen/include/asm-x86/alternative.h
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index d502bdf..3734884 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -58,6 +58,7 @@ obj-y += crash.o
obj-y += tboot.o
obj-y += hpet.o
obj-y += xstate.o
+obj-y += alternative.o
obj-$(crash_debug) += gdbstub.o
diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
new file mode 100644
index 0000000..18cf17c
--- /dev/null
+++ b/xen/arch/x86/alternative.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * alternative.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#include <xen/types.h>
+#include <asm/processor.h>
+#include <asm/alternative.h>
+#include <xen/init.h>
+#include <asm/system.h>
+#include <asm/traps.h>
+#include <asm/nmi.h>
+
+#define MAX_PATCH_LEN (255-1)
+
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+#ifdef K8_NOP1
+static const unsigned char k8nops[] __initconst = {
+ K8_NOP1,
+ K8_NOP2,
+ K8_NOP3,
+ K8_NOP4,
+ K8_NOP5,
+ K8_NOP6,
+ K8_NOP7,
+ K8_NOP8
+};
+static const unsigned char * const k8_nops[ASM_NOP_MAX+1] __initconst = {
+ NULL,
+ k8nops,
+ k8nops + 1,
+ k8nops + 1 + 2,
+ k8nops + 1 + 2 + 3,
+ k8nops + 1 + 2 + 3 + 4,
+ k8nops + 1 + 2 + 3 + 4 + 5,
+ k8nops + 1 + 2 + 3 + 4 + 5 + 6,
+ k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7
+};
+#endif
+
+#ifdef P6_NOP1
+static const unsigned char p6nops[] __initconst = {
+ P6_NOP1,
+ P6_NOP2,
+ P6_NOP3,
+ P6_NOP4,
+ P6_NOP5,
+ P6_NOP6,
+ P6_NOP7,
+ P6_NOP8
+};
+static const unsigned char * const p6_nops[ASM_NOP_MAX+1] __initconst = {
+ NULL,
+ p6nops,
+ p6nops + 1,
+ p6nops + 1 + 2,
+ p6nops + 1 + 2 + 3,
+ p6nops + 1 + 2 + 3 + 4,
+ p6nops + 1 + 2 + 3 + 4 + 5,
+ p6nops + 1 + 2 + 3 + 4 + 5 + 6,
+ p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7
+};
+#endif
+
+static const unsigned char * const *ideal_nops __initdata = k8_nops;
+
+static int __init mask_nmi_callback(struct cpu_user_regs *regs, int cpu)
+{
+ return 1;
+}
+
+static void __init arch_init_ideal_nops(void)
+{
+ /*
+ * Due to a decoder implementation quirk, some
+ * specific Intel CPUs actually perform better with
+ * the "k8_nops" than with the SDM-recommended NOPs.
+ */
+ if ( (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ !(boot_cpu_data.x86 == 6 &&
+ boot_cpu_data.x86_model >= 0x0f &&
+ boot_cpu_data.x86_model != 0x1c &&
+ boot_cpu_data.x86_model != 0x26 &&
+ boot_cpu_data.x86_model != 0x27 &&
+ boot_cpu_data.x86_model < 0x30) )
+ ideal_nops = p6_nops;
+}
+
+/* Use this to add nops to a buffer, then text_poke the whole buffer. */
+static void __init add_nops(void *insns, unsigned int len)
+{
+ while ( len > 0 )
+ {
+ unsigned int noplen = len;
+ if ( noplen > ASM_NOP_MAX )
+ noplen = ASM_NOP_MAX;
+ memcpy(insns, ideal_nops[noplen], noplen);
+ insns += noplen;
+ len -= noplen;
+ }
+}
+
+/*
+ * text_poke_early - Update instructions on a live kernel at boot time
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * When you use this code to patch more than one byte of an instruction
+ * you need to make sure that other CPUs cannot execute this code in parallel.
+ * Also no thread must be currently preempted in the middle of these
+ * instructions. And on the local CPU you need to be protected again NMI or MCE
+ * handlers seeing an inconsistent instruction while you patch.
+ *
+ * This routine is called with local interrupt disabled.
+ */
+static void *__init text_poke_early(void *addr, const void *opcode, size_t len)
+{
+ memcpy(addr, opcode, len);
+ sync_core();
+
+ return addr;
+}
+
+/*
+ * Replace instructions with better alternatives for this CPU type.
+ * This runs before SMP is initialized to avoid SMP problems with
+ * self modifying code. This implies that asymmetric systems where
+ * APs have less capabilities than the boot processor are not handled.
+ * Tough. Make sure you disable such features by hand.
+ */
+static void __init apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+{
+ struct alt_instr *a;
+ u8 *instr, *replacement;
+ u8 insnbuf[MAX_PATCH_LEN];
+
+ ASSERT(!local_irq_is_enabled());
+
+ printk(KERN_INFO "alt table %p -> %p\n", start, end);
+
+ /*
+ * The scan order should be from start to end. A later scanned
+ * alternative code can overwrite a previous scanned alternative code.
+ * Some kernel functions (e.g. memcpy, memset, etc) use this order to
+ * patch code.
+ *
+ * So be careful if you want to change the scan order to any other
+ * order.
+ */
+ for ( a = start; a < end; a++ )
+ {
+ instr = (u8 *)&a->instr_offset + a->instr_offset;
+ replacement = (u8 *)&a->repl_offset + a->repl_offset;
+ BUG_ON(a->replacementlen > a->instrlen);
+ BUG_ON(a->instrlen > sizeof(insnbuf));
+ BUG_ON(a->cpuid >= NCAPINTS * 32);
+ if ( !boot_cpu_has(a->cpuid) )
+ continue;
+
+ memcpy(insnbuf, replacement, a->replacementlen);
+
+ /* 0xe8/0xe9 is a relative jump; fix the offset. */
+ if ( (*insnbuf & 0xfe) == 0xe8 && a->replacementlen == 5 )
+ *(s32 *)(insnbuf + 1) += replacement - instr;
+
+ add_nops(insnbuf + a->replacementlen,
+ a->instrlen - a->replacementlen);
+ text_poke_early(instr, insnbuf, a->instrlen);
+ }
+}
+
+void __init alternative_instructions(void)
+{
+ nmi_callback_t saved_nmi_callback;
+
+ arch_init_ideal_nops();
+
+ /*
+ * The patching is not fully atomic, so try to avoid local interruptions
+ * that might execute the to be patched code.
+ * Other CPUs are not running.
+ */
+ saved_nmi_callback = set_nmi_callback(mask_nmi_callback);
+
+ /*
+ * Don't stop machine check exceptions while patching.
+ * MCEs only happen when something got corrupted and in this
+ * case we must do something about the corruption.
+ * Ignoring it is worse than a unlikely patching race.
+ * Also machine checks tend to be broadcast and if one CPU
+ * goes into machine check the others follow quickly, so we don't
+ * expect a machine check to cause undue problems during to code
+ * patching.
+ */
+ apply_alternatives(__alt_instructions, __alt_instructions_end);
+
+ set_nmi_callback(saved_nmi_callback);
+}
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index 508649d..d16453a 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -48,6 +48,7 @@
#include <asm/setup.h>
#include <xen/cpu.h>
#include <asm/nmi.h>
+#include <asm/alternative.h>
/* opt_nosmp: If true, secondary processors are ignored. */
static bool_t __initdata opt_nosmp;
@@ -1288,6 +1289,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
if ( cpu_has_fsgsbase )
set_in_cr4(X86_CR4_FSGSBASE);
+ alternative_instructions();
+
local_irq_enable();
pt_pci_init();
diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
index 17db361..d4b1f1a 100644
--- a/xen/arch/x86/xen.lds.S
+++ b/xen/arch/x86/xen.lds.S
@@ -105,6 +105,12 @@ SECTIONS
.init.text : {
_sinittext = .;
*(.init.text)
+ /*
+ * Here are the replacement instructions. The linker sticks them
+ * as binary blobs. The .altinstructions has enough data to get
+ * the address and the length of them to patch the kernel safely.
+ */
+ *(.altinstr_replacement)
_einittext = .;
} :text
.init.data : {
@@ -120,6 +126,15 @@ SECTIONS
__trampoline_seg_start = .;
*(.trampoline_seg)
__trampoline_seg_stop = .;
+ /*
+ * struct alt_inst entries. From the header (alternative.h):
+ * "Alternative instructions for different CPU types or capabilities"
+ * Think locking instructions on spinlocks.
+ */
+ . = ALIGN(8);
+ __alt_instructions = .;
+ *(.altinstructions)
+ __alt_instructions_end = .;
. = ALIGN(8);
__ctors_start = .;
diff --git a/xen/include/asm-x86/alternative.h b/xen/include/asm-x86/alternative.h
new file mode 100644
index 0000000..c746047
--- /dev/null
+++ b/xen/include/asm-x86/alternative.h
@@ -0,0 +1,78 @@
+#ifndef __X86_ALTERNATIVE_H__
+#define __X86_ALTERNATIVE_H__
+
+#include <asm/nops.h>
+
+#ifdef __ASSEMBLY__
+.macro altinstruction_entry orig alt feature orig_len alt_len
+ .long \orig - .
+ .long \alt - .
+ .word \feature
+ .byte \orig_len
+ .byte \alt_len
+.endm
+#else
+#include <xen/types.h>
+
+struct alt_instr {
+ s32 instr_offset; /* original instruction */
+ s32 repl_offset; /* offset to replacement instruction */
+ u16 cpuid; /* cpuid bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction, <= instrlen */
+};
+
+extern void alternative_instructions(void);
+
+#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n"
+
+#define b_replacement(number) "663"#number
+#define e_replacement(number) "664"#number
+
+#define alt_slen "662b-661b"
+#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+
+#define ALTINSTR_ENTRY(feature, number) \
+ " .long 661b - .\n" /* label */ \
+ " .long " b_replacement(number)"f - .\n" /* new instruction */ \
+ " .word " __stringify(feature) "\n" /* feature bit */ \
+ " .byte " alt_slen "\n" /* source len */ \
+ " .byte " alt_rlen(number) "\n" /* replacement len */
+
+#define DISCARD_ENTRY(number) /* rlen <= slen */ \
+ " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+
+#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \
+ b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, feature) \
+ OLDINSTR(oldinstr) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ ALTINSTR_ENTRY(feature, 1) \
+ ".popsection\n" \
+ ".pushsection .discard,\"aw\",@progbits\n" \
+ DISCARD_ENTRY(1) \
+ ".popsection\n" \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
+ ".popsection"
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * length of oldinstr must be longer or equal the length of newinstr
+ * It can be padded with nops as needed.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, newinstr, feature) \
+ asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __X86_ALTERNATIVE_H__ */
--
1.8.3.1
next prev parent reply other threads:[~2014-05-30 8:56 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-05-30 8:56 [PATCH v3 0/5] x86: Use alternative mechanism to define CLAC/STAC Feng Wu
2014-05-30 8:56 ` [PATCH v3 1/5] Use __stringify() as the only method for performing preprocessor stringificaion Feng Wu
2014-05-30 11:35 ` Jan Beulich
2014-06-03 0:42 ` Wu, Feng
2014-06-03 12:37 ` Wu, Feng
2014-06-03 13:05 ` Jan Beulich
2014-06-03 13:13 ` Wu, Feng
2014-05-30 8:56 ` [PATCH v3 2/5] x86: Add definitions for NOP operation Feng Wu
2014-05-30 8:56 ` [PATCH v3 3/5] x86: Make set_nmi_callback return the old nmi callback Feng Wu
2014-05-30 8:56 ` Feng Wu [this message]
2014-06-03 10:00 ` [PATCH v3 4/5] x86: Port the basic alternative mechanism from Linux to Xen Jan Beulich
2014-06-03 10:13 ` Wu, Feng
2014-06-03 10:25 ` Jan Beulich
2014-06-03 10:35 ` Wu, Feng
2014-06-03 10:51 ` Jan Beulich
2014-06-03 11:42 ` Wu, Feng
2014-06-03 11:59 ` Jan Beulich
2014-06-03 13:05 ` Wu, Feng
2014-06-03 13:10 ` Jan Beulich
2014-06-03 13:27 ` Wu, Feng
2014-05-30 8:56 ` [PATCH v3 5/5] x86: Use alternative mechanism to define CLAC/STAC Feng Wu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1401440185-11266-5-git-send-email-feng.wu@intel.com \
--to=feng.wu@intel.com \
--cc=JBeulich@suse.com \
--cc=andrew.cooper3@citrix.com \
--cc=boris.ostrovsky@oracle.com \
--cc=ian.campbell@citrix.com \
--cc=keir.xen@gmail.com \
--cc=stefano.stabellini@citrix.com \
--cc=tim@xen.org \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).