Re: [PATCH v5 7/8] x86/microcode: Synchronize late microcode loading

From: "Roger Pau Monné" <roger.pau@citrix.com>
To: Chao Gao <chao.gao@intel.com>
Cc: Kevin Tian <kevin.tian@intel.com>, Wei Liu <wei.liu2@citrix.com>,
	Jan Beulich <jbeulich@suse.com>,
	Andrew Cooper <andrew.cooper3@citrix.com>,
	Jun Nakajima <jun.nakajima@intel.com>,
	xen-devel@lists.xenproject.org,
	Thomas Gleixner <tglx@linutronix.de>,
	Borislav Petkov <bp@suse.de>, Ashok Raj <ashok.raj@intel.com>
Subject: Re: [PATCH v5 7/8] x86/microcode: Synchronize late microcode loading
Date: Tue, 29 Jan 2019 11:37:12 +0100	[thread overview]
Message-ID: <20190129103712.6omjwjaf4sdygbye@mac> (raw)
In-Reply-To: <1548659210-16870-8-git-send-email-chao.gao@intel.com>

On Mon, Jan 28, 2019 at 03:06:49PM +0800, Chao Gao wrote:
> This patch ports microcode improvement patches from linux kernel.
> 
> Before you read any further: the early loading method is still the
> preferred one and you should always do that. The following patch is
> improving the late loading mechanism for long running jobs and cloud use
> cases.
> 
> Gather all cores and serialize the microcode update on them by doing it
> one-by-one to make the late update process as reliable as possible and
> avoid potential issues caused by the microcode update.
> 
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> Tested-by: Chao Gao <chao.gao@intel.com>
> [linux commit: a5321aec6412b20b5ad15db2d6b916c05349dbff]
> [linux commit: bb8c13d61a629276a162c1d2b1a20a815cbcfbb7]
> Cc: Kevin Tian <kevin.tian@intel.com>
> Cc: Jun Nakajima <jun.nakajima@intel.com>
> Cc: Ashok Raj <ashok.raj@intel.com>
> Cc: Borislav Petkov <bp@suse.de>
> Cc: Thomas Gleixner <tglx@linutronix.de>
> Cc: Andrew Cooper <andrew.cooper3@citrix.com>
> Cc: Jan Beulich <jbeulich@suse.com>
> ---
>  xen/arch/x86/microcode.c | 125 +++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 98 insertions(+), 27 deletions(-)
> 
> diff --git a/xen/arch/x86/microcode.c b/xen/arch/x86/microcode.c
> index 3c2274f..b7b20cf 100644
> --- a/xen/arch/x86/microcode.c
> +++ b/xen/arch/x86/microcode.c
> @@ -22,6 +22,7 @@
>   */
>  
>  #include <xen/cpu.h>
> +#include <xen/cpumask.h>
>  #include <xen/lib.h>
>  #include <xen/kernel.h>
>  #include <xen/init.h>
> @@ -30,18 +31,25 @@
>  #include <xen/smp.h>
>  #include <xen/softirq.h>
>  #include <xen/spinlock.h>
> +#include <xen/stop_machine.h>
>  #include <xen/tasklet.h>
>  #include <xen/guest_access.h>
>  #include <xen/earlycpio.h>
> +#include <xen/watchdog.h>
>  
> +#include <asm/delay.h>
>  #include <asm/msr.h>
>  #include <asm/processor.h>
>  #include <asm/setup.h>
>  #include <asm/microcode.h>
>  
> +/* By default, wait for 30000us */
> +#define MICROCODE_DEFAULT_TIMEOUT_US 30000
> +
>  static module_t __initdata ucode_mod;
>  static signed int __initdata ucode_mod_idx;
>  static bool_t __initdata ucode_mod_forced;
> +static unsigned int nr_cores;
>  
>  /*
>   * If we scan the initramfs.cpio for the early microcode code
> @@ -188,10 +196,11 @@ static DEFINE_SPINLOCK(microcode_mutex);
>  
>  DEFINE_PER_CPU(struct ucode_cpu_info, ucode_cpu_info);
>  
> -struct microcode_info {
> -    unsigned int cpu;
> -    int error;
> -};
> +/*
> + * Count the CPUs that have entered and exited the rendezvous
> + * during late microcode update.
> + */
> +static atomic_t cpu_in, cpu_out;
>  
>  static void microcode_fini_cpu(unsigned int cpu)
>  {
> @@ -290,30 +299,60 @@ int microcode_resume_cpu(unsigned int cpu)
>      return microcode_ops ? microcode_update_cpu() : 0;
>  }
>  
> -static long do_microcode_update(void *_info)
> +/* Wait for all CPUs to rendezvous with a timeout (us) */
> +static int wait_for_cpus(atomic_t *cnt, unsigned int timeout)
>  {
> -    struct microcode_info *info = _info;
> -    int error;
> +    unsigned int cpus = num_online_cpus();
>  
> -    BUG_ON(info->cpu != smp_processor_id());
> +    atomic_inc(cnt);
> +
> +    while ( atomic_read(cnt) != cpus )
> +    {
> +        if ( timeout <= 0 )
> +        {
> +            printk("CPU%d: Timeout when waiting for CPUs calling in\n",
> +                   smp_processor_id());
> +            return -EBUSY;
> +        }
> +        udelay(1);

udelay will call the rdtsc instruction, is it fine to use it on a
sibling thread while there's a microcode update in process on the same
core?

> +        timeout--;
> +    }
> +
> +    return 0;
> +}
> +
> +static int do_microcode_update(void *unused)
> +{
> +    unsigned int cpu = smp_processor_id();
> +    int ret;
>  
> -    error = microcode_update_cpu();
> -    if ( error )
> -        info->error = error;
> +    ret = wait_for_cpus(&cpu_in, MICROCODE_DEFAULT_TIMEOUT_US);
> +    if ( ret )
> +        return ret;
>  
> -    info->cpu = cpumask_next(info->cpu, &cpu_online_map);
> -    if ( info->cpu < nr_cpu_ids )
> -        return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
> +    /*
> +     * Initiate an update on all processors which don't have an online sibling
> +     * thread with a lower thread id. Other sibling threads just await the
> +     * completion of microcode update.
> +     */
> +    if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
> +        ret = microcode_update_cpu();

The description says "Gather all cores and serialize the microcode
update on them by doing it one-by-one" but it looks like you are doing
the update in parallel actually?

> +    /*
> +     * Increase the wait timeout to a safe value here since we're serializing
> +     * the microcode update and that could take a while on a large number of
> +     * CPUs. And that is fine as the *actual* timeout will be determined by
> +     * the last CPU finished updating and thus cut short
> +     */
> +    if ( wait_for_cpus(&cpu_out, MICROCODE_DEFAULT_TIMEOUT_US * nr_cores) )
> +        panic("Timeout when finishing updating microcode");
>  
> -    error = info->error;
> -    xfree(info);
> -    return error;
> +    return ret;
>  }
>  
>  int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
>  {
>      int ret;
> -    struct microcode_info *info;
> +    unsigned int cpu;
>      void * buffer;
>  
>      if ( len != (uint32_t)len )
> @@ -334,28 +372,61 @@ int microcode_update(XEN_GUEST_HANDLE_PARAM(const_void) buf, unsigned long len)
>      if ( ret != 0 )
>          goto free;
>  
> +    /* cpu_online_map must not change during update */
> +    if ( !get_cpu_maps() )
> +    {
> +        ret = -EBUSY;
> +        goto free;
> +    }
> +
>      if ( microcode_ops->start_update )
>      {
>          ret = microcode_ops->start_update();
>          if ( ret != 0 )
> -            goto free;
> +            goto put;
>      }
>  
>      ret = parse_microcode_blob(buffer, len);
>      if ( ret <= 0 )
>      {
>          printk(XENLOG_ERR "No valid or newer ucode found. Update abort!\n");
> -        xfree(info);
> -        return -EINVAL;
> +        ret = -EINVAL;
> +        goto put;
>      }
>  
> -    info->error = 0;
> -    info->cpu = cpumask_first(&cpu_online_map);
> +    atomic_set(&cpu_in, 0);
> +    atomic_set(&cpu_out, 0);
> +
> +    /* Calculate the number of online CPU core */
> +    nr_cores = 0;
> +    for_each_online_cpu(cpu)
> +        if ( cpu == cpumask_first(per_cpu(cpu_sibling_mask, cpu)) )
> +            nr_cores++;
> +
> +    printk(XENLOG_INFO "%d cores are to update their microcode\n", nr_cores);
>  
> -    return continue_hypercall_on_cpu(info->cpu, do_microcode_update, info);
> +    /*
> +     * We intend to disable interrupt for long time, which may lead to
> +     * watchdog timeout.
> +     */
> +    watchdog_disable();
> +    /*
> +     * Late loading dance. Why the heavy-handed stop_machine effort?
> +     *
> +     * - HT siblings must be idle and not execute other code while the other
> +     *   sibling is loading microcode in order to avoid any negative
> +     *   interactions cause by the loading.
> +     *
> +     * - In addition, microcode update on the cores must be serialized until
> +     *   this requirement can be relaxed in the future. Right now, this is

As said above, I'm not sure what you are doing here could be
considered serialized, the previous method was clearly serialized
moving from one CPU to the next one.

Here you are likely updating multiple cores at the same time, which
I'm not saying it's wrong, but doesn't seem to match the commit
description or the comments in the code.

Thanks, Roger.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xenproject.org
https://lists.xenproject.org/mailman/listinfo/xen-devel