All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Feng Wu <feng.wu@intel.com>, xen-devel@lists.xen.org
Cc: tim@xen.org, keir.xen@gmail.com, stefano.stabellini@citrix.com,
	ian.campbell@citrix.com, JBeulich@suse.com
Subject: Re: [PATCH v2 4/5] x86: Port the basic alternative mechanism from Linux to Xen
Date: Thu, 29 May 2014 09:55:58 +0100	[thread overview]
Message-ID: <5386F61E.7030408@citrix.com> (raw)
In-Reply-To: <1401341669-5237-5-git-send-email-feng.wu@intel.com>

On 29/05/2014 06:34, Feng Wu wrote:
> This patch ports the basic alternative mechanism from Linux to Xen.
> With this mechanism, we can patch code based on the CPU features.
>
> Signed-off-by: Feng Wu <feng.wu@intel.com>
> ---
>   xen/arch/x86/Makefile             |   1 +
>   xen/arch/x86/alternative.c        | 213 ++++++++++++++++++++++++++++++++++++++
>   xen/arch/x86/setup.c              |   3 +
>   xen/arch/x86/xen.lds.S            |  15 +++
>   xen/include/asm-x86/alternative.h |  78 ++++++++++++++
>   5 files changed, 310 insertions(+)
>   create mode 100644 xen/arch/x86/alternative.c
>   create mode 100644 xen/include/asm-x86/alternative.h
>
> diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
> index d502bdf..3734884 100644
> --- a/xen/arch/x86/Makefile
> +++ b/xen/arch/x86/Makefile
> @@ -58,6 +58,7 @@ obj-y += crash.o
>   obj-y += tboot.o
>   obj-y += hpet.o
>   obj-y += xstate.o
> +obj-y += alternative.o
>   
>   obj-$(crash_debug) += gdbstub.o
>   
> diff --git a/xen/arch/x86/alternative.c b/xen/arch/x86/alternative.c
> new file mode 100644
> index 0000000..3dbc811
> --- /dev/null
> +++ b/xen/arch/x86/alternative.c
> @@ -0,0 +1,213 @@
> +/******************************************************************************
> + * alternative.c
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
> + */
> +
> +#include <xen/types.h>
> +#include <asm/processor.h>
> +#include <asm/alternative.h>
> +#include <xen/init.h>
> +#include <asm/system.h>
> +#include <asm/traps.h>
> +#include <asm/nmi.h>
> +
> +#define MAX_PATCH_LEN (255-1)
> +
> +extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
> +
> +#ifdef K8_NOP1
> +static const unsigned char k8nops[] __initconst = {
> +    K8_NOP1,
> +    K8_NOP2,
> +    K8_NOP3,
> +    K8_NOP4,
> +    K8_NOP5,
> +    K8_NOP6,
> +    K8_NOP7,
> +    K8_NOP8
> +};
> +static const unsigned char * const k8_nops[ASM_NOP_MAX+1] __initconst = {
> +    NULL,
> +    k8nops,
> +    k8nops + 1,
> +    k8nops + 1 + 2,
> +    k8nops + 1 + 2 + 3,
> +    k8nops + 1 + 2 + 3 + 4,
> +    k8nops + 1 + 2 + 3 + 4 + 5,
> +    k8nops + 1 + 2 + 3 + 4 + 5 + 6,
> +    k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7
> +};
> +#endif
> +
> +#ifdef P6_NOP1
> +static const unsigned char p6nops[] __initconst = {
> +    P6_NOP1,
> +    P6_NOP2,
> +    P6_NOP3,
> +    P6_NOP4,
> +    P6_NOP5,
> +    P6_NOP6,
> +    P6_NOP7,
> +    P6_NOP8
> +};
> +static const unsigned char * const p6_nops[ASM_NOP_MAX+1] __initconst = {
> +    NULL,
> +    p6nops,
> +    p6nops + 1,
> +    p6nops + 1 + 2,
> +    p6nops + 1 + 2 + 3,
> +    p6nops + 1 + 2 + 3 + 4,
> +    p6nops + 1 + 2 + 3 + 4 + 5,
> +    p6nops + 1 + 2 + 3 + 4 + 5 + 6,
> +    p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7
> +};
> +#endif
> +
> +static const unsigned char * const *ideal_nops __initdata = k8_nops;
> +
> +static int __init mask_nmi_callback(struct cpu_user_regs *regs, int cpu)
> +{
> +    return 1;
> +}
> +
> +static void __init arch_init_ideal_nops(void)
> +{
> +    /*
> +     * Due to a decoder implementation quirk, some
> +     * specific Intel CPUs actually perform better with
> +     * the "k8_nops" than with the SDM-recommended NOPs.
> +     */
> +    if ( (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
> +         !(boot_cpu_data.x86 == 6 &&
> +           boot_cpu_data.x86_model >= 0x0f &&
> +           boot_cpu_data.x86_model != 0x1c &&
> +           boot_cpu_data.x86_model != 0x26 &&
> +           boot_cpu_data.x86_model != 0x27 &&
> +           boot_cpu_data.x86_model < 0x30) )
> +        ideal_nops = p6_nops;
> +}
> +
> +/* Use this to add nops to a buffer, then text_poke the whole buffer. */
> +static void __init add_nops(void *insns, unsigned int len)
> +{
> +    while ( len > 0 )
> +    {
> +        unsigned int noplen = len;
> +        if ( noplen > ASM_NOP_MAX )
> +            noplen = ASM_NOP_MAX;
> +        memcpy(insns, ideal_nops[noplen], noplen);
> +        insns += noplen;
> +        len -= noplen;
> +    }
> +}
> +
> +/*
> + * text_poke_early - Update instructions on a live kernel at boot time
> + * @addr: address to modify
> + * @opcode: source of the copy
> + * @len: length to copy
> + *
> + * When you use this code to patch more than one byte of an instruction
> + * you need to make sure that other CPUs cannot execute this code in parallel.
> + * Also no thread must be currently preempted in the middle of these
> + * instructions. And on the local CPU you need to be protected again NMI or MCE
> + * handlers seeing an inconsistent instruction while you patch.
> + */
> +static void *__init text_poke_early(void *addr, const void *opcode, size_t len)
> +{
> +    unsigned long flags;
> +
> +    local_irq_save(flags);
> +    memcpy(addr, opcode, len);
> +    sync_core();
> +    local_irq_restore(flags);
> +
> +    return addr;
> +}
> +
> +/*
> + * Replace instructions with better alternatives for this CPU type.
> + * This runs before SMP is initialized to avoid SMP problems with
> + * self modifying code. This implies that asymmetric systems where
> + * APs have less capabilities than the boot processor are not handled.
> + * Tough. Make sure you disable such features by hand.
> + */
> +

Excess newline

> +static void __init apply_alternatives(struct alt_instr *start, struct alt_instr *end)
> +{
> +    struct alt_instr *a;
> +    u8 *instr, *replacement;
> +    u8 insnbuf[MAX_PATCH_LEN];
> +
> +    printk(KERN_INFO "alt table %p -> %p\n", start, end);
> +
> +    /*
> +     * The scan order should be from start to end. A later scanned
> +     * alternative code can overwrite a previous scanned alternative code.
> +     * Some kernel functions (e.g. memcpy, memset, etc) use this order to
> +     * patch code.
> +     *
> +     * So be careful if you want to change the scan order to any other
> +     * order.
> +     */
> +    for ( a = start; a < end; a++ )
> +    {
> +        instr = (u8 *)&a->instr_offset + a->instr_offset;
> +        replacement = (u8 *)&a->repl_offset + a->repl_offset;
> +        BUG_ON(a->replacementlen > a->instrlen);
> +        BUG_ON(a->instrlen > sizeof(insnbuf));
> +        BUG_ON(a->cpuid >= NCAPINTS * 32);
> +        if ( !boot_cpu_has(a->cpuid) )
> +            continue;
> +
> +        memcpy(insnbuf, replacement, a->replacementlen);
> +
> +        /* 0xe8/0xe9 is a relative jump; fix the offset. */
> +        if ( (*insnbuf & 0xfe) == 0xe8 && a->replacementlen == 5 )
> +            *(s32 *)(insnbuf + 1) += replacement - instr;
> +
> +        add_nops(insnbuf + a->replacementlen,
> +                 a->instrlen - a->replacementlen);
> +        text_poke_early(instr, insnbuf, a->instrlen);
> +    }
> +}
> +
> +void __init alternative_instructions(void)
> +{
> +    nmi_callback_t saved_nmi_callback;
> +
> +    arch_init_ideal_nops();
> +
> +    /*
> +     * The patching is not fully atomic, so try to avoid local interruptions
> +     * that might execute the to be patched code.
> +     * Other CPUs are not running.
> +     */
> +    saved_nmi_callback = set_nmi_callback(mask_nmi_callback);

Newline here

> +    /*
> +     * Don't stop machine check exceptions while patching.
> +     * MCEs only happen when something got corrupted and in this
> +     * case we must do something about the corruption.
> +     * Ignoring it is worse than a unlikely patching race.
> +     * Also machine checks tend to be broadcast and if one CPU
> +     * goes into machine check the others follow quickly, so we don't
> +     * expect a machine check to cause undue problems during to code
> +     * patching.
> +     */
> +

but not here.

> +    apply_alternatives(__alt_instructions, __alt_instructions_end);

Possibly also here.

> +    set_nmi_callback(saved_nmi_callback);
> +}
> diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
> index 508649d..d16453a 100644
> --- a/xen/arch/x86/setup.c
> +++ b/xen/arch/x86/setup.c
> @@ -48,6 +48,7 @@
>   #include <asm/setup.h>
>   #include <xen/cpu.h>
>   #include <asm/nmi.h>
> +#include <asm/alternative.h>
>   
>   /* opt_nosmp: If true, secondary processors are ignored. */
>   static bool_t __initdata opt_nosmp;
> @@ -1288,6 +1289,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
>       if ( cpu_has_fsgsbase )
>           set_in_cr4(X86_CR4_FSGSBASE);
>   
> +    alternative_instructions();
> +

Given this ordering, it might be cleaner to have an 
ASSERT(!local_irq_enabled()) in the top of alternative_instructions(), 
and forgo the local_irq_save/restore() in text_poke_early().

If you can move this higher up before enabling MCEs in CR4, it might be 
slightly more resilient.

~Andrew

>       local_irq_enable();
>   
>       pt_pci_init();
> diff --git a/xen/arch/x86/xen.lds.S b/xen/arch/x86/xen.lds.S
> index 17db361..d4b1f1a 100644
> --- a/xen/arch/x86/xen.lds.S
> +++ b/xen/arch/x86/xen.lds.S
> @@ -105,6 +105,12 @@ SECTIONS
>     .init.text : {
>          _sinittext = .;
>          *(.init.text)
> +       /*
> +        * Here are the replacement instructions. The linker sticks them
> +        * as binary blobs. The .altinstructions has enough data to get
> +        * the address and the length of them to patch the kernel safely.
> +        */
> +       *(.altinstr_replacement)
>          _einittext = .;
>     } :text
>     .init.data : {
> @@ -120,6 +126,15 @@ SECTIONS
>          __trampoline_seg_start = .;
>          *(.trampoline_seg)
>          __trampoline_seg_stop = .;
> +       /*
> +        * struct alt_inst entries. From the header (alternative.h):
> +        * "Alternative instructions for different CPU types or capabilities"
> +        * Think locking instructions on spinlocks.
> +        */
> +       . = ALIGN(8);
> +        __alt_instructions = .;
> +        *(.altinstructions)
> +        __alt_instructions_end = .;
>   
>          . = ALIGN(8);
>          __ctors_start = .;
> diff --git a/xen/include/asm-x86/alternative.h b/xen/include/asm-x86/alternative.h
> new file mode 100644
> index 0000000..55a6604
> --- /dev/null
> +++ b/xen/include/asm-x86/alternative.h
> @@ -0,0 +1,78 @@
> +#ifndef __X86_ALTERNATIVE_H__
> +#define __X86_ALTERNATIVE_H__
> +
> +#include <asm/nops.h>
> +
> +#ifdef __ASSEMBLY__
> +.macro altinstruction_entry orig alt feature orig_len alt_len
> +        .long \orig - .
> +        .long \alt - .
> +        .word \feature
> +        .byte \orig_len
> +        .byte \alt_len
> +.endm
> +#else
> +#include <xen/types.h>
> +
> +struct alt_instr {
> +    s32 instr_offset;       /* original instruction */
> +    s32 repl_offset;        /* offset to replacement instruction */
> +    u16 cpuid;              /* cpuid bit set for replacement */
> +    u8  instrlen;           /* length of original instruction */
> +    u8  replacementlen;     /* length of new instruction, <= instrlen */
> +};
> +
> +extern void alternative_instructions(void);
> +
> +#define OLDINSTR(oldinstr)      "661:\n\t" oldinstr "\n662:\n"
> +
> +#define b_replacement(number)   "663"#number
> +#define e_replacement(number)   "664"#number
> +
> +#define alt_slen "662b-661b"
> +#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
> +
> +#define ALTINSTR_ENTRY(feature, number)                                       \
> +        " .long 661b - .\n"                             /* label           */ \
> +        " .long " b_replacement(number)"f - .\n"        /* new instruction */ \
> +        " .word " STR(feature) "\n"                     /* feature bit     */ \
> +        " .byte " alt_slen "\n"                         /* source len      */ \
> +        " .byte " alt_rlen(number) "\n"                 /* replacement len */
> +
> +#define DISCARD_ENTRY(number)                           /* rlen <= slen */    \
> +        " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
> +
> +#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */     \
> +        b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
> +
> +/* alternative assembly primitive: */
> +#define ALTERNATIVE(oldinstr, newinstr, feature)                        \
> +        OLDINSTR(oldinstr)                                              \
> +        ".pushsection .altinstructions,\"a\"\n"                         \
> +        ALTINSTR_ENTRY(feature, 1)                                      \
> +        ".popsection\n"                                                 \
> +        ".pushsection .discard,\"aw\",@progbits\n"                      \
> +        DISCARD_ENTRY(1)                                                \
> +        ".popsection\n"                                                 \
> +        ".pushsection .altinstr_replacement, \"ax\"\n"                  \
> +        ALTINSTR_REPLACEMENT(newinstr, feature, 1)                      \
> +        ".popsection"
> +
> +/*
> + * Alternative instructions for different CPU types or capabilities.
> + *
> + * This allows to use optimized instructions even on generic binary
> + * kernels.
> + *
> + * length of oldinstr must be longer or equal the length of newinstr
> + * It can be padded with nops as needed.
> + *
> + * For non barrier like inlines please define new variants
> + * without volatile and memory clobber.
> + */
> +#define alternative(oldinstr, newinstr, feature)                        \
> +        asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
> +
> +#endif  /*  __ASSEMBLY__  */
> +
> +#endif /* __X86_ALTERNATIVE_H__ */

  reply	other threads:[~2014-05-29  8:55 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-29  5:34 [PATCH v2 0/5] x86: Use alternative mechanism to define CLAC/STAC Feng Wu
2014-05-29  5:34 ` [PATCH v2 1/5] Use STR() as the only method for performing preprocessor stringificaion Feng Wu
2014-05-29  7:00   ` Andrew Cooper
2014-05-29 15:00   ` Jan Beulich
2014-05-29  5:34 ` [PATCH v2 2/5] x86: Add definitions for NOP operation Feng Wu
2014-05-29  8:43   ` Andrew Cooper
2014-05-29  5:34 ` [PATCH v2 3/5] x86: Make set_nmi_callback return the old nmi callback Feng Wu
2014-05-29  7:01   ` Andrew Cooper
2014-05-29  5:34 ` [PATCH v2 4/5] x86: Port the basic alternative mechanism from Linux to Xen Feng Wu
2014-05-29  8:55   ` Andrew Cooper [this message]
2014-05-29  9:28     ` Wu, Feng
2014-05-29 14:59       ` Andrew Cooper
2014-05-30  4:48         ` Wu, Feng
2014-05-30  7:07           ` Jan Beulich
2014-05-29  5:34 ` [PATCH v2 5/5] x86: Use alternative mechanism to define CLAC/STAC Feng Wu
2014-05-29  8:56   ` Andrew Cooper

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5386F61E.7030408@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=JBeulich@suse.com \
    --cc=feng.wu@intel.com \
    --cc=ian.campbell@citrix.com \
    --cc=keir.xen@gmail.com \
    --cc=stefano.stabellini@citrix.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.