From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1758982AbcAUJDJ (ORCPT ); Thu, 21 Jan 2016 04:03:09 -0500 Received: from casper.infradead.org ([85.118.1.10]:55524 "EHLO casper.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1759043AbcAUJDE (ORCPT ); Thu, 21 Jan 2016 04:03:04 -0500 Date: Thu, 21 Jan 2016 10:02:57 +0100 From: Peter Zijlstra To: Huang Rui Cc: Borislav Petkov , Ingo Molnar , Andy Lutomirski , Thomas Gleixner , Robert Richter , Jacob Shin , John Stultz , =?utf-8?B?RnLvv71k77+9cmlj?= Weisbecker , linux-kernel@vger.kernel.org, spg_linux_kernel@amd.com, x86@kernel.org, Guenter Roeck , Andreas Herrmann , Suravee Suthikulpanit , Aravind Gopalakrishnan , Borislav Petkov , Fengguang Wu , Aaron Lu Subject: Re: [PATCH v2 5/5] perf/x86/amd/power: Add AMD accumulated power reporting mechanism Message-ID: <20160121090257.GC6357@twins.programming.kicks-ass.net> References: <1452739808-11871-1-git-send-email-ray.huang@amd.com> <1452739808-11871-6-git-send-email-ray.huang@amd.com> <20160119121250.GA6344@twins.programming.kicks-ass.net> <20160120044823.GA13477@hr-amur2> <20160120092244.GH6357@twins.programming.kicks-ass.net> <20160121070437.GA15130@hr-amur2> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <20160121070437.GA15130@hr-amur2> User-Agent: Mutt/1.5.21 (2012-12-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Thu, Jan 21, 2016 at 03:04:38PM +0800, Huang Rui wrote: > I just quickly looked at about the spinlock on -rt mode. Because > realtime linux kernel provides two kinds of spinlock, the original > spinlock_t will be replaced the one which is able to sleep, actually, > like mutex. And another one (you mentioned here, raw_spinlock_t) can > keep on non-sleep behavior, that is the real spinlock. > > And my lock here also will be nested under perf_event_context::lock, > right? Yep. > > I have a lockdep patch somewhere that checks these ordering things; I > > should rebase and post that again. > > > > Can you CC me when you post that patch next time? Sure. > > One should not use GFP_ATOMIC if at all possible, also no, -rt cannot do > > _any_ allocations from this site. > > > > OK, that's because allocation might sleep when IRQ disabled. That's > incorrect. Right. Its related to the above, the allocator locks are spinlock_t and as a consequence of them becoming a blocking lock, spin_lock_irq() will also no longer disable IRQs. The CPU_STARTING notifier however will still be called with IRQs disabled because it is CPU bringup. So on -rt even GFP_ATOMIC will no longer work here. > I draft an update diff that based on original patch, please take a > look. > > 8<-------------------------------------------------------------------------- > > diff --git a/arch/x86/kernel/cpu/perf_event_amd_power.c b/arch/x86/kernel/cpu/perf_event_amd_power.c > index 69ef234..e71d993 100644 > --- a/arch/x86/kernel/cpu/perf_event_amd_power.c > +++ b/arch/x86/kernel/cpu/perf_event_amd_power.c > @@ -46,10 +46,17 @@ static unsigned int cu_num; > static u64 max_cu_acc_power; > > struct power_pmu { > - spinlock_t lock; > + raw_spinlock_t lock; > struct list_head active_list; > struct pmu *pmu; /* pointer to power_pmu_class */ > local64_t cpu_sw_pwr_ptsc; > + /* > + * These two cpumasks is used for avoiding the allocations on > + * CPU_STARTING phase. Because power_cpu_prepare will be > + * called on IRQs disabled status. > + */ > + cpumask_var_t mask; > + cpumask_var_t tmp_mask; > }; > > static struct pmu pmu_class; > @@ -126,9 +133,9 @@ static void pmu_event_start(struct perf_event *event, int mode) > struct power_pmu *pmu = __this_cpu_read(amd_power_pmu); > unsigned long flags; > > - spin_lock_irqsave(&pmu->lock, flags); > + raw_spin_lock_irqsave(&pmu->lock, flags); > __pmu_event_start(pmu, event); > - spin_unlock_irqrestore(&pmu->lock, flags); > + raw_spin_unlock_irqrestore(&pmu->lock, flags); > } > > static void pmu_event_stop(struct perf_event *event, int mode) > @@ -137,7 +144,7 @@ static void pmu_event_stop(struct perf_event *event, int mode) > struct hw_perf_event *hwc = &event->hw; > unsigned long flags; > > - spin_lock_irqsave(&pmu->lock, flags); > + raw_spin_lock_irqsave(&pmu->lock, flags); > > /* mark event as deactivated and stopped */ > if (!(hwc->state & PERF_HES_STOPPED)) { > @@ -155,7 +162,7 @@ static void pmu_event_stop(struct perf_event *event, int mode) > hwc->state |= PERF_HES_UPTODATE; > } > > - spin_unlock_irqrestore(&pmu->lock, flags); > + raw_spin_unlock_irqrestore(&pmu->lock, flags); > } > > static int pmu_event_add(struct perf_event *event, int mode) > @@ -164,14 +171,14 @@ static int pmu_event_add(struct perf_event *event, int mode) > struct hw_perf_event *hwc = &event->hw; > unsigned long flags; > > - spin_lock_irqsave(&pmu->lock, flags); > + raw_spin_lock_irqsave(&pmu->lock, flags); > > hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; > > if (mode & PERF_EF_START) > __pmu_event_start(pmu, event); > > - spin_unlock_irqrestore(&pmu->lock, flags); > + raw_spin_unlock_irqrestore(&pmu->lock, flags); > > return 0; > } So for these 4 {start,stop,add,del} you can drop the irqsave/irqrestore thing as its guaranteed that IRQs will be disabled. > + cpumask_clear(pmu->mask); > + cpumask_clear(pmu->tmp_mask); > > for (i = 0; i < cores_per_cu; i++) > + cpumask_set_cpu(i, pmu->mask); > > + cpumask_shift_left(pmu->mask, pmu->mask, cu * cores_per_cu); Couldn't you simply use topology_sibling_cpumask(cpu) instead? > > static int power_cpu_init(int cpu) > { > + struct power_pmu *pmu = per_cpu(amd_power_pmu, cpu); > + int i, cu; > > + if (pmu) > + return 0; > > + cu = cpu / cores_per_cu; > > for (i = 0; i < cores_per_cu; i++) > + cpumask_set_cpu(i, pmu->mask); > > + cpumask_shift_left(pmu->mask, pmu->mask, cu * cores_per_cu); topology_sibling_cpumask(cpu) again? > > + if (!cpumask_and(pmu->tmp_mask, pmu->mask, &cpu_mask)) > cpumask_set_cpu(cpu, &cpu_mask); > > + return 0; > } > > static int power_cpu_prepare(int cpu) > { > struct power_pmu *pmu = per_cpu(amd_power_pmu, cpu); > int phys_id = topology_physical_package_id(cpu); > + int ret = 0; > > if (pmu) > return 0; > @@ -391,7 +380,17 @@ static int power_cpu_prepare(int cpu) > if (!pmu) > return -ENOMEM; > > + if (!zalloc_cpumask_var(&pmu->mask, GFP_KERNEL)) { > + ret = -ENOMEM; > + goto out; > + } > + > + if (!zalloc_cpumask_var(&pmu->tmp_mask, GFP_KERNEL)) { > + ret = -ENOMEM; > + goto out1; > + } > + > + raw_spin_lock_init(&pmu->lock); > > INIT_LIST_HEAD(&pmu->active_list); > > @@ -400,12 +399,21 @@ static int power_cpu_prepare(int cpu) > per_cpu(amd_power_pmu, cpu) = pmu; > > return 0; > + > +out1: > + free_cpumask_var(pmu->mask); > +out: > + kfree(pmu); > + > + return ret; > } > > static void power_cpu_kfree(int cpu) > { > struct power_pmu *pmu = per_cpu(amd_power_pmu, cpu); > > + free_cpumask_var(pmu->mask); > + free_cpumask_var(pmu->tmp_mask); > kfree(pmu); > } Yes this should work I think.