From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1755056AbcGEMkk (ORCPT ); Tue, 5 Jul 2016 08:40:40 -0400 Received: from mail-wm0-f66.google.com ([74.125.82.66]:36648 "EHLO mail-wm0-f66.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754519AbcGEMki (ORCPT ); Tue, 5 Jul 2016 08:40:38 -0400 Date: Tue, 5 Jul 2016 14:40:34 +0200 From: Frederic Weisbecker To: riel@redhat.com Cc: linux-kernel@vger.kernel.org, peterz@infradead.org, mingo@kernel.org, pbonzini@redhat.com, fweisbec@redhat.com, wanpeng.li@hotmail.com, efault@gmx.de, tglx@linutronix.de, rkrcmar@redhat.com Subject: Re: [PATCH 1/4] sched,time: count actually elapsed irq & softirq time Message-ID: <20160705124033.GA5332@lerouge> References: <1467315350-3152-1-git-send-email-riel@redhat.com> <1467315350-3152-2-git-send-email-riel@redhat.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1467315350-3152-2-git-send-email-riel@redhat.com> User-Agent: Mutt/1.5.24 (2015-08-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Thu, Jun 30, 2016 at 03:35:47PM -0400, riel@redhat.com wrote: > diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c > index 3d60e5d76fdb..018bae2ada36 100644 > --- a/kernel/sched/cputime.c > +++ b/kernel/sched/cputime.c > @@ -79,40 +79,50 @@ void irqtime_account_irq(struct task_struct *curr) > } > EXPORT_SYMBOL_GPL(irqtime_account_irq); > > -static int irqtime_account_hi_update(void) > +static cputime_t irqtime_account_hi_update(cputime_t maxtime) > { > u64 *cpustat = kcpustat_this_cpu->cpustat; > unsigned long flags; > - u64 latest_ns; > - int ret = 0; > + cputime_t irq_cputime; > > local_irq_save(flags); > - latest_ns = this_cpu_read(cpu_hardirq_time); > - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) > - ret = 1; > + irq_cputime = nsecs_to_cputime(this_cpu_read(cpu_hardirq_time)) - > + cpustat[CPUTIME_IRQ]; We might want to keep nsecs_to_cputime64(). If cputime_t == jiffies_t == unsigned long, we may have a problem after 49 days of interrupts. Arguably that's a lot of IRQs but lets be paranoid. > + irq_cputime = min(irq_cputime, maxtime); > + cpustat[CPUTIME_IRQ] += irq_cputime; > local_irq_restore(flags); > - return ret; > + return irq_cputime; > } > > -static int irqtime_account_si_update(void) > +static cputime_t irqtime_account_si_update(cputime_t maxtime) > { > u64 *cpustat = kcpustat_this_cpu->cpustat; > unsigned long flags; > - u64 latest_ns; > - int ret = 0; > + cputime_t softirq_cputime; > > local_irq_save(flags); > - latest_ns = this_cpu_read(cpu_softirq_time); > - if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) > - ret = 1; > + softirq_cputime = nsecs_to_cputime(this_cpu_read(cpu_softirq_time)) - Ditto. > + cpustat[CPUTIME_SOFTIRQ]; > + softirq_cputime = min(softirq_cputime, maxtime); > + cpustat[CPUTIME_SOFTIRQ] += softirq_cputime; > local_irq_restore(flags); > - return ret; > + return softirq_cputime; > } > > #else /* CONFIG_IRQ_TIME_ACCOUNTING */ > > #define sched_clock_irqtime (0) > > +static cputime_t irqtime_account_hi_update(cputime_t dummy) > +{ > + return 0; > +} > + > +static cputime_t irqtime_account_si_update(cputime_t dummy) > +{ > + return 0; > +} > + > #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ > > static inline void task_group_account_field(struct task_struct *p, int index, > @@ -257,32 +267,45 @@ void account_idle_time(cputime_t cputime) > cpustat[CPUTIME_IDLE] += (__force u64) cputime; > } > > -static __always_inline unsigned long steal_account_process_tick(unsigned long max_jiffies) > +static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) > { > #ifdef CONFIG_PARAVIRT > if (static_key_false(¶virt_steal_enabled)) { > + cputime_t steal_cputime; > u64 steal; > - unsigned long steal_jiffies; > > steal = paravirt_steal_clock(smp_processor_id()); > steal -= this_rq()->prev_steal_time; > + this_rq()->prev_steal_time += steal; We are accounting steal_cputime but you make it remember steal_nsecs. This is leaking quite some steal time in the way. Imagine that cputime_t == jiffies_t and HZ=100. paravirt_steal_clock() returns 199 nsecs. prev_steal_time gets added those 199. nsecs_to_cputime() return 1 jiffy (we are one nsec off the next jiffy). So account_steal_time() is accounting 1 jiffy and the 99 remaining nsecs are leaked. If some more steal time is to be accounted on the next tick, the 99 previous nsecs are forgotten. A non-leaking sequence would rather be: steal = paravirt_steal_clock(smp_processor_id()); steal -= this_rq()->prev_steal_time; steal_cputime = min(nsecs_to_cputime(steal), maxtime); account_steal_time(steal_cputime); this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime); Thanks! > > - /* > - * steal is in nsecs but our caller is expecting steal > - * time in jiffies. Lets cast the result to jiffies > - * granularity and account the rest on the next rounds. > - */ > - steal_jiffies = min(nsecs_to_jiffies(steal), max_jiffies); > - this_rq()->prev_steal_time += jiffies_to_nsecs(steal_jiffies); > + steal_cputime = min(nsecs_to_cputime(steal), maxtime); > + account_steal_time(steal_cputime); > > - account_steal_time(jiffies_to_cputime(steal_jiffies)); > - return steal_jiffies; > + return steal_cputime;