From mboxrd@z Thu Jan  1 00:00:00 1970
From: Boris Ostrovsky <boris.ostrovsky@amd.com>
Subject: Re: [PATCH] x86/ucode: Improve error handling and
 container file processing on AMD
Date: Fri, 7 Dec 2012 08:46:28 -0500
Message-ID: <50C1F334.20802@amd.com>
References: <1354818491-9723-1-git-send-email-boris.ostrovsky@amd.com>
	<50C1E41402000078000AEED6@nat28.tlf.novell.com>
Mime-Version: 1.0
Content-Type: text/plain; charset="us-ascii"; Format="flowed"
Content-Transfer-Encoding: 7bit
Return-path: <xen-devel-bounces@lists.xen.org>
In-Reply-To: <50C1E41402000078000AEED6@nat28.tlf.novell.com>
List-Unsubscribe: <http://lists.xen.org/cgi-bin/mailman/options/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=unsubscribe>
List-Post: <mailto:xen-devel@lists.xen.org>
List-Help: <mailto:xen-devel-request@lists.xen.org?subject=help>
List-Subscribe: <http://lists.xen.org/cgi-bin/mailman/listinfo/xen-devel>,
	<mailto:xen-devel-request@lists.xen.org?subject=subscribe>
Sender: xen-devel-bounces@lists.xen.org
Errors-To: xen-devel-bounces@lists.xen.org
To: Jan Beulich <JBeulich@suse.com>
Cc: Ian.Campbell@citrix.com, Christoph_Egger@gmx.de, xen-devel@lists.xen.org
List-Id: xen-devel@lists.xenproject.org

Inline.

On 12/07/2012 06:41 AM, Jan Beulich wrote:
>>>> On 06.12.12 at 19:28, Boris Ostrovsky <boris.ostrovsky@amd.com> wrote:
>> Do not report error when a patch is not appplicable to current processor,
>> simply skip it and move on to next patch in container file.
>>
>> Process container file to the end instead of stopping at the first
>> applicable patch.
>>
>> Log the fact that a patch has been applied at KERN_WARNING level, add
>> CPU number to debug messages.
>>
>> Signed-off-by: Boris Ostrovsky <boris.ostrovsky@amd.com>
>> ---
>>   xen/arch/x86/microcode_amd.c |   69 +++++++++++++++++++++++-------------------
>>   1 file changed, 38 insertions(+), 31 deletions(-)
>>
>> diff --git a/xen/arch/x86/microcode_amd.c b/xen/arch/x86/microcode_amd.c
>> index 7a54001..bb5968e 100644
>> --- a/xen/arch/x86/microcode_amd.c
>> +++ b/xen/arch/x86/microcode_amd.c
>> @@ -88,13 +88,13 @@ static int collect_cpu_info(int cpu, struct cpu_signature
>> *csig)
>>
>>       rdmsrl(MSR_AMD_PATCHLEVEL, csig->rev);
>>
>> -    printk(KERN_DEBUG "microcode: collect_cpu_info: patch_id=%#x\n",
>> -           csig->rev);
>> +    printk(KERN_DEBUG "microcode: CPU%d collect_cpu_info: patch_id=%#x\n",
>> +           cpu, csig->rev);
>>
>>       return 0;
>>   }
>>
>> -static int microcode_fits(const struct microcode_amd *mc_amd, int cpu)
>> +static bool_t microcode_fits(const struct microcode_amd *mc_amd, int cpu)
>>   {
>>       struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu);
>>       const struct microcode_header_amd *mc_header = mc_amd->mpb;
>> @@ -125,11 +125,16 @@ static int microcode_fits(const struct microcode_amd *mc_amd, int cpu)
>>           printk(KERN_DEBUG "microcode: CPU%d patch does not match "
>>                  "(patch is %x, cpu base id is %x) \n",
>>                  cpu, mc_header->processor_rev_id, equiv_cpu_id);
>> -        return -EINVAL;
>> +        return 0;
>>       }
>>
>>       if ( mc_header->patch_id <= uci->cpu_sig.rev )
>> +    {
>> +        printk(KERN_DEBUG "microcode: CPU%d patching is not needed: "
>> +               "patch provides level 0x%x, cpu is at 0x%x \n",
>
> Did you not notice that all the other messages now use %#x?
>
> Also, I'm not really convinced we need this added verbosity. I
> personally tend to run all my systems with "loglvl=all", and the
> amount of output produced by the code in this file made me
> change that for just the one AMD machine I have that is new
> enough to support microcode loading.
>
> Minimally I'd expect nothing to be printed here if the two
> versions match.

Yes, I may have gone a bit overboard with chattiness.

This is actually made worse by the fact that we are going through patch 
loading twice during boot --- once through start_secondary() (or via 
microcode_presmp_init() on CPU0) and second time when the driver is 
initialized.

Do we really need what microcode_init() does?

>
>> +               cpu, mc_header->patch_id, uci->cpu_sig.rev);
>>           return 0;
>> +    }
>>
>>       printk(KERN_DEBUG "microcode: CPU%d found a matching microcode "
>>              "update with version %#x (current=%#x)\n",
>> @@ -173,7 +178,7 @@ static int apply_microcode(int cpu)
>>           return -EIO;
>>       }
>>
>> -    printk(KERN_INFO "microcode: CPU%d updated from revision %#x to %#x\n",
>> +    printk(KERN_WARNING "microcode: CPU%d updated from revision %#x to %#x\n",
>>              cpu, uci->cpu_sig.rev, hdr->patch_id);
>>
>>       uci->cpu_sig.rev = rev;
>> @@ -181,7 +186,7 @@ static int apply_microcode(int cpu)
>>       return 0;
>>   }
>>
>> -static int get_next_ucode_from_buffer_amd(
>> +static int get_ucode_from_buffer_amd(
>>       struct microcode_amd *mc_amd,
>>       const void *buf,
>>       size_t bufsize,
>> @@ -194,8 +199,12 @@ static int get_next_ucode_from_buffer_amd(
>>       off = *offset;
>>
>>       /* No more data */
>> -    if ( off >= bufsize )
>> -        return 1;
>> +    if ( off >= bufsize )
>> +    {
>> +        printk(KERN_ERR "microcode: error! "
>> +               "ucode buffer overrun\n");
>
> All on one line please (and the "error!" probably is superfluous).
>
> Also, is printing this really correct when off == bufsize? Or can
> this not happen at all?

Prior to this patch off >= bufsize was not an error but actually loop 
termination condition for the caller ("==" was the most likely case). 
Now the loop is broken out explicitly so when off >= bufsize (including 
"==") we have an error. Having said this, it is pretty much impossible 
to get it with current code.

>
>> +        return -EINVAL;
>> +    }
>>
>>       mpbuf = (const struct mpbhdr *)&bufp[off];
>>       if ( mpbuf->type != UCODE_UCODE_TYPE )
>> @@ -205,8 +214,8 @@ static int get_next_ucode_from_buffer_amd(
>>           return -EINVAL;
>>       }
>>
>> -    printk(KERN_DEBUG "microcode: size %zu, block size %u, offset %zu\n",
>> -           bufsize, mpbuf->len, off);
>> +    printk(KERN_DEBUG "microcode: CPU%d size %zu, block size %u, offset %zu\n",
>> +           raw_smp_processor_id(), bufsize, mpbuf->len, off);
>>
>>       if ( (off + mpbuf->len) > bufsize )
>>       {
>> @@ -278,8 +287,8 @@ static int cpu_request_microcode(int cpu, const void *buf, size_t bufsize)
>>   {
>>       struct microcode_amd *mc_amd, *mc_old;
>>       size_t offset = bufsize;
>> +    size_t last_offset, applied_offset = 0;
>>       int error = 0;
>> -    int ret;
>>       struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu);
>>
>>       /* We should bind the task to the CPU */
>> @@ -321,33 +330,32 @@ static int cpu_request_microcode(int cpu, const void
>> *buf, size_t bufsize)
>>        */
>>       mc_amd->mpb = NULL;
>>       mc_amd->mpb_size = 0;
>> -    while ( (ret = get_next_ucode_from_buffer_amd(mc_amd, buf, bufsize,
>> -                                                  &offset)) == 0 )
>> +    last_offset = offset;
>> +    while ( (error = get_ucode_from_buffer_amd(mc_amd, buf, bufsize,
>> +                                               &offset)) == 0 )
>>       {
>> -        error = microcode_fits(mc_amd, cpu);
>> -        if (error <= 0)
>> -            continue;
>> +        if ( microcode_fits(mc_amd, cpu) )
>> +            if ( apply_microcode(cpu) == 0 )
>
> Fold the two if()-s into one, please.
>
> But then again you lose the return value of apply_microcode()
> here, which is wrong.

Right, I need to deal with it. Unfortunately apply_microcode() may get 
an error after a previous patch has been successfully loaded (for the 
very unlikely cases when container has multiple matches). I will 
probably still make the driver return an error in this case too.


Thanks.
-boris


>
>> +                applied_offset = last_offset;
>>
>> -        error = apply_microcode(cpu);
>> -        if (error == 0)
>> -        {
>> -            error = 1;
>> +        last_offset = offset;
>> +
>> +        if ( offset >= bufsize )
>>               break;
>> -        }
>>       }
>>
>> -    if ( ret < 0 )
>> -        error = ret;
>> -
>>       /* On success keep the microcode patch for
>>        * re-apply on resume.
>>        */
>> -    if ( error == 1 )
>> +    if ( applied_offset != 0 )
>>       {
>> -        xfree(mc_old);
>> -        error = 0;
>> +        error = get_ucode_from_buffer_amd(mc_amd, buf, bufsize,
>> +                                          &applied_offset);
>> +        if (error == 0)
>> +            xfree(mc_old);
>>       }
>> -    else
>> +
>> +    if ( applied_offset == 0 || error != 0 )
>>       {
>>           xfree(mc_amd);
>>           uci->mc.mc_amd = mc_old;
>> @@ -364,10 +372,9 @@ static int microcode_resume_match(int cpu, const void *mc)
>>       struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu);
>>       struct microcode_amd *mc_amd = uci->mc.mc_amd;
>>       const struct microcode_amd *src = mc;
>> -    int res = microcode_fits(src, cpu);
>>
>> -    if ( res <= 0 )
>> -        return res;
>> +    if ( microcode_fits(src, cpu) == 0 )
>
> microcode_fits() now returning bool_t clearly asks for using ! instead
> of == 0 here.
>
> Jan
>
>> +        return 0;
>>
>>       if ( src != mc_amd )
>>       {
>> --
>> 1.7.10.4
>
>
>