From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S262318AbVFJAxi (ORCPT ); Thu, 9 Jun 2005 20:53:38 -0400 Received: (majordomo@vger.kernel.org) by vger.kernel.org id S262335AbVFJAxi (ORCPT ); Thu, 9 Jun 2005 20:53:38 -0400 Received: from gateway-1237.mvista.com ([12.44.186.158]:8182 "EHLO av.mvista.com") by vger.kernel.org with ESMTP id S262318AbVFJAxP (ORCPT ); Thu, 9 Jun 2005 20:53:15 -0400 Message-ID: <42A8E369.8080504@mvista.com> Date: Thu, 09 Jun 2005 17:48:41 -0700 From: George Anzinger Reply-To: george@mvista.com Organization: MontaVista Software User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.6) Gecko/20050323 Fedora/1.7.6-1.3.2 X-Accept-Language: en-us, en MIME-Version: 1.0 To: Parag Warudkar CC: Dominik Brodowski , john stultz , Nishanth Aravamudan , Andi Kleen , lkml , Tim Schmielau , albert@users.sourceforge.net, Ulrich Windl , Christoph Lameter , David Mosberger , Andrew Morton , paulus@samba.org, schwidefsky@de.ibm.com, keith maanthey , Chris McDermott , Max Asbock , mahuja@us.ibm.com, Darren Hart , "Darrick J. Wong" , Anton Blanchard , donf@us.ibm.com, mpm@selenic.com, benh@kernel.crashing.org Subject: Re: [PATCH 3/4] new timeofday x86-64 arch specific changes (v. B1) References: <060220051827.15835.429F4FA6000DF9D700003DDB220588617200009A9B9CD3040A029D0A05@comcast.net> <1117812275.3674.2.camel@leatherman> <20050605170511.GC12338@dominikbrodowski.de> <200506052304.35298.kernel-stuff@comcast.net> In-Reply-To: <200506052304.35298.kernel-stuff@comcast.net> Content-Type: text/plain; charset=us-ascii; format=flowed Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org X-Mailing-List: linux-kernel@vger.kernel.org Parag Warudkar wrote: > On Sunday 05 June 2005 13:05, Dominik Brodowski wrote: > >>IIRC (from the comment above) several chipsets suffer from this >>inconsistency, namely the widely used PIIX4(E) and ICH(4 only? or also >>other ICH-ones?). Therefore, we'd need at least some sort of boot-time >>check to decide which method to use... and based on the method, we can >>adjust the priority maybe? > > > To begin with, will the simple proof-of-concept patch like below work? (It > tests the chipset read in the same do{}while loop - if the loop executes only > once, it considers the chipset good - in which case it executes the faster > read_pmtmr_fast function.) Or does it need wider testing under different > circumstances to conclude that chipset is good? > > I tested the patch under Virtual PC which emulates a PIIX4 chipset. Test > passes there, meaning the do {}while loop executes only once. As I understand the problem it is that the counter is, sometimes, read while it is is rippling the carry up to the higher bits. This will only happen if the read is done just as the hardware is bumping the count and even then only if there is a carry. Exactly where in the carry sequency this happens is likely just a guess. In the HRT patch I take care of this by testing the read against a prior read (up to 1/HZ away). If the number is bigger and to too much bigger, I just do one read. Here is the full routine: quick_get_cpuctr(void) { static unsigned long last_read = 0; static int qgc_max = 0; int i; unsigned long rd_delta, rd_ans, rd = inl(acpi_pm_tmr_address); /* * This will be REALLY big if ever we move backward in time... */ rd_delta = (rd - last_read) & SIZE_MASK; last_read = rd; rd_ans = (rd - last_update) & SIZE_MASK; if (likely((rd_ans < (arch_cycles_per_jiffy << 1)) && (rd_delta < (arch_cycles_per_jiffy << 1)))) return rd_ans; for (i = 0; i < 10; i++) { rd = inl(acpi_pm_tmr_address); rd_delta = (rd - last_read) & SIZE_MASK; last_read = rd; if (unlikely(i > qgc_max)) qgc_max = i; /* * On my test machine (800MHZ dual PIII) this is always * seven. Seems long, but we will give it some slack... * We note that rd_delta (and all the vars) unsigned so * a backward movement will show as a really big number. */ if (likely(rd_delta < 20)) return (rd - last_update) & SIZE_MASK; } return (rd - last_update) & SIZE_MASK; } George -- > > Parag > > > ------------------------------------------------------------------------ > > --- linux-2.6-orig/arch/i386/kernel/timers/timer_pm.c 2005-03-02 02:37:48.000000000 -0500 > +++ linux-2.6.12-rc5/arch/i386/kernel/timers/timer_pm.c 2005-06-05 23:01:15.000000000 -0400 > @@ -35,6 +35,10 @@ > * in arch/i386/acpi/boot.c */ > u32 pmtmr_ioport = 0; > > +struct pmtmr_rd_func { > + u32 (*read_pmtmr) (void); > +}pmtmr_rd_func; > + > > /* value of the Power timer at last timer interrupt */ > static u32 offset_tick; > @@ -45,6 +49,11 @@ > > #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ > > +static inline u32 read_pmtmr_fast(void) > +{ > + return inl(pmtmr_ioport); > +} > + > /*helper function to safely read acpi pm timesource*/ > static inline u32 read_pmtmr(void) > { > @@ -76,14 +85,14 @@ > unsigned long count, delta; > > mach_prepare_counter(); > - value1 = read_pmtmr(); > + value1 = pmtmr_rd_func.read_pmtmr(); > mach_countup(&count); > - value2 = read_pmtmr(); > + value2 = pmtmr_rd_func.read_pmtmr(); > delta = (value2 - value1) & ACPI_PM_MASK; > > /* Check that the PMTMR delta is within 5% of what we expect */ > - if (delta < (PMTMR_EXPECTED_RATE * 19) / 20 || > - delta > (PMTMR_EXPECTED_RATE * 21) / 20) { > + if (delta < (PMTMR_EXPECTED_RATE * 18) / 20 || > + delta > (PMTMR_EXPECTED_RATE * 22) / 20) { > printk(KERN_INFO "PM-Timer running at invalid rate: %lu%% of normal - aborting.\n", 100UL * delta / PMTMR_EXPECTED_RATE); > return -1; > } > @@ -95,9 +104,16 @@ > static int init_pmtmr(char* override) > { > u32 value1, value2; > - unsigned int i; > + u32 v1=0,v2=0,v3=0; > + unsigned int i, loop_cnt=0; > > - if (override[0] && strncmp(override,"pmtmr",5)) > + /* Use slower but probably more correct read function by > + * default. This is overriden with a fast one if it is > + * suitable to do so below. > + */ > + pmtmr_rd_func.read_pmtmr = read_pmtmr; > + > + if (override[0] && strncmp(override,"pmtmr",5)) > return -ENODEV; > > if (!pmtmr_ioport) > @@ -106,9 +122,32 @@ > /* we use the TSC for delay_pmtmr, so make sure it exists */ > if (!cpu_has_tsc) > return -ENODEV; > + /* It has been reported that because of various broken > + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time > + * source is not latched, so you must read it multiple > + * times to insure a safe value is read. > + */ > + do { > + v1 = inl(pmtmr_ioport); > + v2 = inl(pmtmr_ioport); > + v3 = inl(pmtmr_ioport); > + loop_cnt++; > + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) > + || (v3 > v1 && v3 < v2)); > + > + if(loop_cnt == 1) { > + /*We have a good chipset*/ > + printk(KERN_INFO "PM Timer: Chipset passes port read test\n"); > + pmtmr_rd_func.read_pmtmr = read_pmtmr_fast; > + } > + > + else { > + printk(KERN_INFO "PM Timer: Chipset fails port read test:"); > + printk(KERN_INFO "Working around it."); > + } > > /* "verify" this timing source */ > - value1 = read_pmtmr(); > + value1 = pmtmr_rd_func.read_pmtmr(); > for (i = 0; i < 10000; i++) { > value2 = read_pmtmr(); > if (value2 == value1) > @@ -156,7 +195,7 @@ > > write_seqlock(&monotonic_lock); > > - offset_tick = read_pmtmr(); > + offset_tick = pmtmr_rd_func.read_pmtmr(); > > /* calculate tick interval */ > delta = (offset_tick - last_offset) & ACPI_PM_MASK; > @@ -202,7 +241,7 @@ > } while (read_seqretry(&monotonic_lock, seq)); > > /* Read the pmtmr */ > - this_offset = read_pmtmr(); > + this_offset = pmtmr_rd_func.read_pmtmr(); > > /* convert to nanoseconds */ > ret = (this_offset - last_offset) & ACPI_PM_MASK; > @@ -232,7 +271,7 @@ > u32 now, offset, delta = 0; > > offset = offset_tick; > - now = read_pmtmr(); > + now = pmtmr_rd_func.read_pmtmr(); > delta = (now - offset)&ACPI_PM_MASK; > > return (unsigned long) offset_delay + cyc2us(delta); -- George Anzinger george@mvista.com HRT (High-res-timers): http://sourceforge.net/projects/high-res-timers/