Re: [PATCH 2/5] perf: generic intel uncore support

The Linux Kernel Mailing List
 help / color / mirror / Atom feed

From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: "Yan, Zheng" <zheng.z.yan@intel.com>
Cc: mingo@elte.hu, andi@firstfloor.org, eranian@google.com,
	linux-kernel@vger.kernel.org, ming.m.lin@intel.com
Subject: Re: [PATCH 2/5] perf: generic intel uncore support
Date: Sat, 31 Mar 2012 05:18:22 +0200	[thread overview]
Message-ID: <1333163902.2960.63.camel@laptop> (raw)
In-Reply-To: <1332916998-10628-3-git-send-email-zheng.z.yan@intel.com>

On Wed, 2012-03-28 at 14:43 +0800, Yan, Zheng wrote:
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
> new file mode 100644
> index 0000000..d159e3e
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
> @@ -0,0 +1,814 @@
> +#include "perf_event_intel_uncore.h"
> +
> +static struct intel_uncore_type *empty_uncore[] = { NULL, };
> +static struct intel_uncore_type **msr_uncores = empty_uncore;
> +
> +/* constraint for box with 2 counters */
> +static struct event_constraint unconstrained_2 =
> +       EVENT_CONSTRAINT(0, 0x3, 0);
> +/* constraint for box with 3 counters */
> +static struct event_constraint unconstrained_3 =
> +       EVENT_CONSTRAINT(0, 0x7, 0);
> +/* constraint for box with 4 counters */
> +static struct event_constraint unconstrained_4 =
> +       EVENT_CONSTRAINT(0, 0xf, 0);
> +/* constraint for box with 8 counters */
> +static struct event_constraint unconstrained_8 =
> +       EVENT_CONSTRAINT(0, 0xff, 0);
> +/* constraint for the fixed countesr */
> +static struct event_constraint constraint_fixed =
> +       EVENT_CONSTRAINT((u64)-1, 1 << UNCORE_PMC_IDX_FIXED, (u64)-1);

Since they're all different, why not have an struct event_constraint
unconstrained member in your struct intel_uncore_pmu and fill it out
whenever you create that.

> +static DEFINE_SPINLOCK(uncore_box_lock);

> +/*
> + * The overflow interrupt is unavailable for SandyBridge-EP, is broken
> + * for SandyBridge. So we use hrtimer to periodically poll the counter
> + */

To avoid overlow and accumulate into the software u64, right? Not to
actually sample anything.

Might also want to say is broken for anything else, since afaik uncore
PMI has been broken for everything with an uncore.


> +static struct intel_uncore_box *
> +__uncore_pmu_find_box(struct intel_uncore_pmu *pmu, int phyid)
> +{
> +       struct intel_uncore_box *box;
> +       struct hlist_head *head;
> +       struct hlist_node *node;
> +
> +       head = &pmu->box_hash[phyid % UNCORE_BOX_HASH_SIZE];
> +
> +       hlist_for_each_entry_rcu(box, node, head, hlist) {
> +               if (box->phy_id == phyid)
> +                       return box;
> +       }
> +
> +       return NULL;
> +}
> +
> +static struct intel_uncore_box *
> +uncore_pmu_find_box(struct intel_uncore_pmu *pmu, int phyid)
> +{
> +       struct intel_uncore_box *box;
> +
> +       rcu_read_lock();
> +       box = __uncore_pmu_find_box(pmu, phyid);
> +       rcu_read_unlock();
> +
> +       return box;
> +}
> +
> +/* caller should hold the uncore_box_lock */
> +static void uncore_pmu_add_box(struct intel_uncore_pmu *pmu,
> +                               struct intel_uncore_box *box)
> +{
> +       struct hlist_head *head;
> +
> +       head = &pmu->box_hash[box->phy_id % UNCORE_BOX_HASH_SIZE];
> +       hlist_add_head_rcu(&box->hlist, head);
> +}
> +
> +static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event)
> +{
> +       return container_of(event->pmu, struct intel_uncore_pmu, pmu);
> +}
> +
> +static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
> +{
> +       int phyid = topology_physical_package_id(smp_processor_id());

Who says that this event has anything to do with the current cpu?

> +       return uncore_pmu_find_box(uncore_event_to_pmu(event), phyid);
> +}

So why not simply use a per-cpu allocation and have something like:

struct intel_uncore_pmu {
	...
	struct intel_uncore_box * __percpu box;
};

static inline
struct intel_uncore_box *uncore_event_to_box(struct perf_event *event)
{
	return per_cpu_ptr(event->pmu->box, event->cpu);
}

And be done with it?

> +static int uncore_collect_events(struct intel_uncore_box *box,
> +                         struct perf_event *leader, bool dogrp)
> +{
> +       struct perf_event *event;
> +       int n, max_count;
> +
> +       max_count = box->pmu->type->num_counters;
> +       if (box->pmu->type->fixed_ctl)
> +               max_count++;
> +
> +       if (box->n_events >= max_count)
> +               return -EINVAL;
> +
> +       /*
> +        * adding the same events twice to the uncore PMU may cause
> +        * general protection fault
> +        */

Is that an errata or a 'feature' of some specific box types, or what?

> +       for (n = 0; n < box->n_events; n++) {
> +               event = box->event_list[n];
> +               if (event->hw.config == leader->hw.config)
> +                       return -EINVAL;
> +       }
> +
> +       n = box->n_events;
> +       box->event_list[n] = leader;
> +       n++;
> +       if (!dogrp)
> +               return n;
> +
> +       list_for_each_entry(event, &leader->sibling_list, group_entry) {
> +               if (event->state <= PERF_EVENT_STATE_OFF)
> +                       continue;
> +
> +               if (n >= max_count)
> +                       return -EINVAL;
> +
> +               box->event_list[n] = event;
> +               n++;
> +       }
> +       return n;
> +}
> +
> +static struct event_constraint *
> +uncore_event_constraint(struct intel_uncore_type *type,
> +                       struct perf_event *event)
> +{
> +       struct event_constraint *c;
> +
> +       if (event->hw.config == (u64)-1)
> +               return &constraint_fixed;
> +
> +       if (type->constraints) {
> +               for_each_event_constraint(c, type->constraints) {
> +                       if ((event->hw.config & c->cmask) == c->code)
> +                               return c;
> +               }
> +       }
> +
> +       if (type->num_counters == 2)
> +               return &unconstrained_2;
> +       if (type->num_counters == 3)
> +               return &unconstrained_3;
> +       if (type->num_counters == 4)
> +               return &unconstrained_4;
> +       if (type->num_counters == 8)
> +               return &unconstrained_8;
> +
> +       WARN_ON_ONCE(1);
> +       return &unconstrained_2;

	return event->pmu->unconstrained;

seems much saner to me..

> +}
> +
> +static int uncore_assign_events(struct intel_uncore_box *box,
> +                               int assign[], int n)
> +{
> +       struct event_constraint *c, *constraints[UNCORE_PMC_IDX_MAX];
> +       int i, ret, wmin, wmax;
> +
> +       for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
> +               c = uncore_event_constraint(box->pmu->type,
> +                                       box->event_list[i]);
> +               constraints[i] = c;
> +               wmin = min(wmin, c->weight);
> +               wmax = max(wmax, c->weight);
> +       }

No fast path then?

> +       ret = perf_assign_events(constraints, n, wmin, wmax, assign);
> +       return ret ? -EINVAL : 0;
> +}


> +static void uncore_pmu_event_start(struct perf_event *event, int flags)
> +{
> +       struct intel_uncore_box *box = uncore_event_to_box(event);
> +
> +       raw_spin_lock(&box->lock);
> +       __uncore_pmu_event_start(box, event, flags);
> +       raw_spin_unlock(&box->lock);
> +}

> +static void uncore_pmu_event_stop(struct perf_event *event, int flags)
> +{
> +       struct intel_uncore_box *box = uncore_event_to_box(event);
> +
> +       raw_spin_lock(&box->lock);
> +       __uncore_pmu_event_stop(box, event, flags);
> +       raw_spin_unlock(&box->lock);
> +}

> +static int uncore_pmu_event_add(struct perf_event *event, int flags)
> +{
> +       struct intel_uncore_box *box = uncore_event_to_box(event);
> +       struct hw_perf_event *hwc = &event->hw;
> +       int assign[UNCORE_PMC_IDX_MAX];
> +       int i, n, ret;
> +
> +       if (!box)
> +               return -ENODEV;
> +
> +       raw_spin_lock(&box->lock);

> +       raw_spin_unlock(&box->lock);
> +       return ret;
> +}
> +
> +static void uncore_pmu_event_del(struct perf_event *event, int flags)
> +{
> +       struct intel_uncore_box *box = uncore_event_to_box(event);
> +       int i;
> +
> +       raw_spin_lock(&box->lock);

> +       raw_spin_unlock(&box->lock);
> +}

So what's up with all this box->lock business.. why does that lock
exist?

> +static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
> +{
> +       int ret;
> +
> +       pmu->pmu.attr_groups    = pmu->type->attr_groups;
> +       pmu->pmu.task_ctx_nr    = perf_invalid_context;
> +       pmu->pmu.event_init     = uncore_pmu_event_init;
> +       pmu->pmu.add            = uncore_pmu_event_add;
> +       pmu->pmu.del            = uncore_pmu_event_del;
> +       pmu->pmu.start          = uncore_pmu_event_start;
> +       pmu->pmu.stop           = uncore_pmu_event_stop;
> +       pmu->pmu.read           = uncore_pmu_event_read;

Won't this look better as a C99 struct init? Something like:

	pmu->pmu = (struct pmu){
		.attr_groups	= pmu->type->attr_groups,
		.task_ctx_nr	= perf_invalid_context,
		.event_init	= uncore_pmu_event_init,
		...
	};

> +       if (pmu->type->num_boxes == 1)
> +               sprintf(pmu->name, "uncore_%s", pmu->type->name);
> +       else
> +               sprintf(pmu->name, "uncore_%s%d", pmu->type->name,
> +                       pmu->pmu_idx);
> +
> +       ret = perf_pmu_register(&pmu->pmu, pmu->name, -1);
> +       return ret;
> +}


> +static int __init uncore_type_init(struct intel_uncore_type *type)
> +{
> +       struct intel_uncore_pmu *pmus;
> +       struct attribute_group *events_group;
> +       struct attribute **attrs;
> +       int i, j;
> +
> +       pmus = kzalloc(sizeof(*pmus) * type->num_boxes, GFP_KERNEL);
> +       if (!pmus)
> +               return -ENOMEM;

Hmm, but if you have a pmu per number of boxes, then what do you need
that  pmu->box reference for?

> +
> +       for (i = 0; i < type->num_boxes; i++) {
> +               pmus[i].func_id = -1;
> +               pmus[i].pmu_idx = i;
> +               pmus[i].type = type;
> +
> +               for (j = 0; j < ARRAY_SIZE(pmus[0].box_hash); j++)
> +                       INIT_HLIST_HEAD(&pmus[i].box_hash[j]);
> +       }
> +
> +       if (type->event_descs) {
> +               for (i = 0; ; i++) {
> +                       if (!type->event_descs[i].attr.attr.name)
> +                               break;
> +               }
> +
> +               events_group = kzalloc(sizeof(struct attribute *) * (i + 1) +
> +                               sizeof(*events_group), GFP_KERNEL);
> +               if (!events_group)
> +                       goto fail;
> +
> +               attrs = (struct attribute **)(events_group + 1);
> +               events_group->name = "events";
> +               events_group->attrs = attrs;
> +
> +               for (j = 0; j < i; j++)
> +                       attrs[j] = &type->event_descs[j].attr.attr;
> +
> +               type->attr_groups[1] = events_group;
> +       }
> +       type->pmus = pmus;
> +       return 0;
> +fail:
> +       uncore_type_exit(type);
> +       return -ENOMEM;
> +}
> +


Aside from all this, there's still the problem that you don't place all
events for a particular phys_id onto a single cpu. It doesn't matter
which cpu in that package it is, but all events should go to the same.

This means that on unplug of that cpu, you have to migrate all these
events etc..

I suspect doing this will also allow you to get rid of that box->lock
thing.

next prev parent reply	other threads:[~2012-03-31 17:25 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-28  6:43 [RFC PATCH 0/5] perf: Intel uncore pmu counting support Yan, Zheng
2012-03-28  6:43 ` [PATCH 1/5] perf: Export perf_assign_events Yan, Zheng
2012-03-28  6:43 ` [PATCH 2/5] perf: generic intel uncore support Yan, Zheng
2012-03-28  9:24   ` Andi Kleen
2012-03-28  9:38     ` Peter Zijlstra
2012-03-28 11:24     ` Yan, Zheng
2012-03-31  3:18   ` Peter Zijlstra [this message]
2012-04-01  3:11     ` Yan, Zheng
2012-04-02 22:10       ` Peter Zijlstra
2012-04-02 22:11       ` Peter Zijlstra
2012-04-03  8:28         ` Yan, Zheng
2012-04-03 14:29           ` Peter Zijlstra
2012-04-04  1:47             ` Yan, Zheng
2012-04-10  0:48             ` Yan, Zheng
2012-04-16 12:11               ` Peter Zijlstra
2012-04-02 22:16       ` Peter Zijlstra
2012-04-02 22:24       ` Peter Zijlstra
2012-04-16 12:07       ` Peter Zijlstra
2012-04-17  6:56         ` Yan, Zheng
2012-03-28  6:43 ` [PATCH 3/5] perf: Add Nehalem and Sandy Bridge " Yan, Zheng
2012-03-28  6:43 ` [PATCH 4/5] perf: Generic pci uncore device support Yan, Zheng
2012-03-28  6:43 ` [PATCH 5/5] perf: Add Sandy Bridge-EP uncore support Yan, Zheng
2012-03-28  6:49 ` [RFC PATCH 0/5] perf: Intel uncore pmu counting support Ingo Molnar
2012-03-28  8:49   ` Peter Zijlstra
2012-03-28  9:02     ` Yan, Zheng
2012-03-28  8:57   ` Andi Kleen
2012-03-28  9:30     ` Ingo Molnar
2012-03-28 10:58     ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1333163902.2960.63.camel@laptop \
    --to=a.p.zijlstra@chello.nl \
    --cc=andi@firstfloor.org \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ming.m.lin@intel.com \
    --cc=mingo@elte.hu \
    --cc=zheng.z.yan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox