From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1753510AbcASMN1 (ORCPT ); Tue, 19 Jan 2016 07:13:27 -0500 Received: from bombadil.infradead.org ([198.137.202.9]:36747 "EHLO bombadil.infradead.org" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751628AbcASMNU (ORCPT ); Tue, 19 Jan 2016 07:13:20 -0500 Date: Tue, 19 Jan 2016 13:12:50 +0100 From: Peter Zijlstra To: Huang Rui Cc: Borislav Petkov , Ingo Molnar , Andy Lutomirski , Thomas Gleixner , Robert Richter , Jacob Shin , John Stultz , =?iso-8859-1?Q?Fr=E9d=E9ric?= Weisbecker , linux-kernel@vger.kernel.org, spg_linux_kernel@amd.com, x86@kernel.org, Guenter Roeck , Andreas Herrmann , Suravee Suthikulpanit , Aravind Gopalakrishnan , Borislav Petkov , Fengguang Wu , Aaron Lu Subject: Re: [PATCH v2 5/5] perf/x86/amd/power: Add AMD accumulated power reporting mechanism Message-ID: <20160119121250.GA6344@twins.programming.kicks-ass.net> References: <1452739808-11871-1-git-send-email-ray.huang@amd.com> <1452739808-11871-6-git-send-email-ray.huang@amd.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline In-Reply-To: <1452739808-11871-6-git-send-email-ray.huang@amd.com> User-Agent: Mutt/1.5.21 (2012-12-30) Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On Thu, Jan 14, 2016 at 10:50:08AM +0800, Huang Rui wrote: > +struct power_pmu { > + spinlock_t lock; This should be a raw_spinlock_t, as it'll be nested under other raw_spinlock_t's. > + struct list_head active_list; > + struct pmu *pmu; /* pointer to power_pmu_class */ > + local64_t cpu_sw_pwr_ptsc; > +}; > +static void pmu_event_start(struct perf_event *event, int mode) > +{ > + struct power_pmu *pmu = __this_cpu_read(amd_power_pmu); > + unsigned long flags; > + > + spin_lock_irqsave(&pmu->lock, flags); IRQs will be disabled here. > + __pmu_event_start(pmu, event); > + spin_unlock_irqrestore(&pmu->lock, flags); > +} > + > +static void pmu_event_stop(struct perf_event *event, int mode) > +{ > + struct power_pmu *pmu = __this_cpu_read(amd_power_pmu); > + struct hw_perf_event *hwc = &event->hw; > + unsigned long flags; > + > + spin_lock_irqsave(&pmu->lock, flags); idem > + > + /* mark event as deactivated and stopped */ > + if (!(hwc->state & PERF_HES_STOPPED)) { > + list_del(&event->active_entry); > + hwc->state |= PERF_HES_STOPPED; > + } > + > + /* check if update of sw counter is necessary */ > + if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { > + /* > + * Drain the remaining delta count out of a event > + * that we are disabling: > + */ > + event_update(event, pmu); > + hwc->state |= PERF_HES_UPTODATE; > + } > + > + spin_unlock_irqrestore(&pmu->lock, flags); > +} > + > +static int pmu_event_add(struct perf_event *event, int mode) > +{ > + struct power_pmu *pmu = __this_cpu_read(amd_power_pmu); > + struct hw_perf_event *hwc = &event->hw; > + unsigned long flags; > + > + spin_lock_irqsave(&pmu->lock, flags); > + idem > + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; > + > + if (mode & PERF_EF_START) > + __pmu_event_start(pmu, event); > + > + spin_unlock_irqrestore(&pmu->lock, flags); > + > + return 0; > +} > +static int power_cpu_init(int cpu) > +{ > + int i, cu, ret = 0; > + cpumask_var_t mask, dummy_mask; > + > + cu = cpu / cores_per_cu; > + > + if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) > + return -ENOMEM; > + > + if (!zalloc_cpumask_var(&dummy_mask, GFP_KERNEL)) { > + ret = -ENOMEM; > + goto out; > + } > + > + for (i = 0; i < cores_per_cu; i++) > + cpumask_set_cpu(i, mask); > + > + cpumask_shift_left(mask, mask, cu * cores_per_cu); > + > + if (!cpumask_and(dummy_mask, mask, &cpu_mask)) > + cpumask_set_cpu(cpu, &cpu_mask); > + > + free_cpumask_var(dummy_mask); > +out: > + free_cpumask_var(mask); > + > + return ret; > +} > +static int power_cpu_notifier(struct notifier_block *self, > + unsigned long action, void *hcpu) > +{ > + unsigned int cpu = (long)hcpu; > + > + switch (action & ~CPU_TASKS_FROZEN) { > + case CPU_UP_PREPARE: > + if (power_cpu_prepare(cpu)) > + return NOTIFY_BAD; > + break; > + case CPU_STARTING: > + if (power_cpu_init(cpu)) > + return NOTIFY_BAD; this is called with IRQs disabled, which makes those GFP_KERNEL allocs above a pretty bad idea. Also, note that -rt cannot actually do _any_ allocations/frees from STARTING. Please move the allocs/frees to PREPARE/ONLINE. > + break; > + case CPU_ONLINE: > + case CPU_DEAD: > + power_cpu_kfree(cpu); > + break; > + case CPU_DOWN_PREPARE: > + if (power_cpu_exit(cpu)) > + return NOTIFY_BAD; > + break; > + default: > + break; > + } > + > + return NOTIFY_OK; > +}