From mboxrd@z Thu Jan 1 00:00:00 1970 From: Boris Ostrovsky Subject: Re: [PATCH] x86/ucode: Improve error handling and container file processing on AMD Date: Fri, 7 Dec 2012 08:46:28 -0500 Message-ID: <50C1F334.20802@amd.com> References: <1354818491-9723-1-git-send-email-boris.ostrovsky@amd.com> <50C1E41402000078000AEED6@nat28.tlf.novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <50C1E41402000078000AEED6@nat28.tlf.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich Cc: Ian.Campbell@citrix.com, Christoph_Egger@gmx.de, xen-devel@lists.xen.org List-Id: xen-devel@lists.xenproject.org Inline. On 12/07/2012 06:41 AM, Jan Beulich wrote: >>>> On 06.12.12 at 19:28, Boris Ostrovsky wrote: >> Do not report error when a patch is not appplicable to current processor, >> simply skip it and move on to next patch in container file. >> >> Process container file to the end instead of stopping at the first >> applicable patch. >> >> Log the fact that a patch has been applied at KERN_WARNING level, add >> CPU number to debug messages. >> >> Signed-off-by: Boris Ostrovsky >> --- >> xen/arch/x86/microcode_amd.c | 69 +++++++++++++++++++++++------------------- >> 1 file changed, 38 insertions(+), 31 deletions(-) >> >> diff --git a/xen/arch/x86/microcode_amd.c b/xen/arch/x86/microcode_amd.c >> index 7a54001..bb5968e 100644 >> --- a/xen/arch/x86/microcode_amd.c >> +++ b/xen/arch/x86/microcode_amd.c >> @@ -88,13 +88,13 @@ static int collect_cpu_info(int cpu, struct cpu_signature >> *csig) >> >> rdmsrl(MSR_AMD_PATCHLEVEL, csig->rev); >> >> - printk(KERN_DEBUG "microcode: collect_cpu_info: patch_id=%#x\n", >> - csig->rev); >> + printk(KERN_DEBUG "microcode: CPU%d collect_cpu_info: patch_id=%#x\n", >> + cpu, csig->rev); >> >> return 0; >> } >> >> -static int microcode_fits(const struct microcode_amd *mc_amd, int cpu) >> +static bool_t microcode_fits(const struct microcode_amd *mc_amd, int cpu) >> { >> struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu); >> const struct microcode_header_amd *mc_header = mc_amd->mpb; >> @@ -125,11 +125,16 @@ static int microcode_fits(const struct microcode_amd *mc_amd, int cpu) >> printk(KERN_DEBUG "microcode: CPU%d patch does not match " >> "(patch is %x, cpu base id is %x) \n", >> cpu, mc_header->processor_rev_id, equiv_cpu_id); >> - return -EINVAL; >> + return 0; >> } >> >> if ( mc_header->patch_id <= uci->cpu_sig.rev ) >> + { >> + printk(KERN_DEBUG "microcode: CPU%d patching is not needed: " >> + "patch provides level 0x%x, cpu is at 0x%x \n", > > Did you not notice that all the other messages now use %#x? > > Also, I'm not really convinced we need this added verbosity. I > personally tend to run all my systems with "loglvl=all", and the > amount of output produced by the code in this file made me > change that for just the one AMD machine I have that is new > enough to support microcode loading. > > Minimally I'd expect nothing to be printed here if the two > versions match. Yes, I may have gone a bit overboard with chattiness. This is actually made worse by the fact that we are going through patch loading twice during boot --- once through start_secondary() (or via microcode_presmp_init() on CPU0) and second time when the driver is initialized. Do we really need what microcode_init() does? > >> + cpu, mc_header->patch_id, uci->cpu_sig.rev); >> return 0; >> + } >> >> printk(KERN_DEBUG "microcode: CPU%d found a matching microcode " >> "update with version %#x (current=%#x)\n", >> @@ -173,7 +178,7 @@ static int apply_microcode(int cpu) >> return -EIO; >> } >> >> - printk(KERN_INFO "microcode: CPU%d updated from revision %#x to %#x\n", >> + printk(KERN_WARNING "microcode: CPU%d updated from revision %#x to %#x\n", >> cpu, uci->cpu_sig.rev, hdr->patch_id); >> >> uci->cpu_sig.rev = rev; >> @@ -181,7 +186,7 @@ static int apply_microcode(int cpu) >> return 0; >> } >> >> -static int get_next_ucode_from_buffer_amd( >> +static int get_ucode_from_buffer_amd( >> struct microcode_amd *mc_amd, >> const void *buf, >> size_t bufsize, >> @@ -194,8 +199,12 @@ static int get_next_ucode_from_buffer_amd( >> off = *offset; >> >> /* No more data */ >> - if ( off >= bufsize ) >> - return 1; >> + if ( off >= bufsize ) >> + { >> + printk(KERN_ERR "microcode: error! " >> + "ucode buffer overrun\n"); > > All on one line please (and the "error!" probably is superfluous). > > Also, is printing this really correct when off == bufsize? Or can > this not happen at all? Prior to this patch off >= bufsize was not an error but actually loop termination condition for the caller ("==" was the most likely case). Now the loop is broken out explicitly so when off >= bufsize (including "==") we have an error. Having said this, it is pretty much impossible to get it with current code. > >> + return -EINVAL; >> + } >> >> mpbuf = (const struct mpbhdr *)&bufp[off]; >> if ( mpbuf->type != UCODE_UCODE_TYPE ) >> @@ -205,8 +214,8 @@ static int get_next_ucode_from_buffer_amd( >> return -EINVAL; >> } >> >> - printk(KERN_DEBUG "microcode: size %zu, block size %u, offset %zu\n", >> - bufsize, mpbuf->len, off); >> + printk(KERN_DEBUG "microcode: CPU%d size %zu, block size %u, offset %zu\n", >> + raw_smp_processor_id(), bufsize, mpbuf->len, off); >> >> if ( (off + mpbuf->len) > bufsize ) >> { >> @@ -278,8 +287,8 @@ static int cpu_request_microcode(int cpu, const void *buf, size_t bufsize) >> { >> struct microcode_amd *mc_amd, *mc_old; >> size_t offset = bufsize; >> + size_t last_offset, applied_offset = 0; >> int error = 0; >> - int ret; >> struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu); >> >> /* We should bind the task to the CPU */ >> @@ -321,33 +330,32 @@ static int cpu_request_microcode(int cpu, const void >> *buf, size_t bufsize) >> */ >> mc_amd->mpb = NULL; >> mc_amd->mpb_size = 0; >> - while ( (ret = get_next_ucode_from_buffer_amd(mc_amd, buf, bufsize, >> - &offset)) == 0 ) >> + last_offset = offset; >> + while ( (error = get_ucode_from_buffer_amd(mc_amd, buf, bufsize, >> + &offset)) == 0 ) >> { >> - error = microcode_fits(mc_amd, cpu); >> - if (error <= 0) >> - continue; >> + if ( microcode_fits(mc_amd, cpu) ) >> + if ( apply_microcode(cpu) == 0 ) > > Fold the two if()-s into one, please. > > But then again you lose the return value of apply_microcode() > here, which is wrong. Right, I need to deal with it. Unfortunately apply_microcode() may get an error after a previous patch has been successfully loaded (for the very unlikely cases when container has multiple matches). I will probably still make the driver return an error in this case too. Thanks. -boris > >> + applied_offset = last_offset; >> >> - error = apply_microcode(cpu); >> - if (error == 0) >> - { >> - error = 1; >> + last_offset = offset; >> + >> + if ( offset >= bufsize ) >> break; >> - } >> } >> >> - if ( ret < 0 ) >> - error = ret; >> - >> /* On success keep the microcode patch for >> * re-apply on resume. >> */ >> - if ( error == 1 ) >> + if ( applied_offset != 0 ) >> { >> - xfree(mc_old); >> - error = 0; >> + error = get_ucode_from_buffer_amd(mc_amd, buf, bufsize, >> + &applied_offset); >> + if (error == 0) >> + xfree(mc_old); >> } >> - else >> + >> + if ( applied_offset == 0 || error != 0 ) >> { >> xfree(mc_amd); >> uci->mc.mc_amd = mc_old; >> @@ -364,10 +372,9 @@ static int microcode_resume_match(int cpu, const void *mc) >> struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu); >> struct microcode_amd *mc_amd = uci->mc.mc_amd; >> const struct microcode_amd *src = mc; >> - int res = microcode_fits(src, cpu); >> >> - if ( res <= 0 ) >> - return res; >> + if ( microcode_fits(src, cpu) == 0 ) > > microcode_fits() now returning bool_t clearly asks for using ! instead > of == 0 here. > > Jan > >> + return 0; >> >> if ( src != mc_amd ) >> { >> -- >> 1.7.10.4 > > >