* [RFC 0/2] Support perf stat with i915 PMU
@ 2017-08-15 17:56 Dmitry Rogozhkin
2017-08-15 17:56 ` [RFC 1/2] drm/i915/pmu: reorder function to suite next patch Dmitry Rogozhkin
2017-08-15 17:56 ` [RFC 2/2] drm/i915/pmu: serve global events and support perf stat Dmitry Rogozhkin
0 siblings, 2 replies; 4+ messages in thread
From: Dmitry Rogozhkin @ 2017-08-15 17:56 UTC (permalink / raw)
To: intel-gfx; +Cc: Peter Zijlstra
These patches depend on the RFC patches enabling i915 PMU from Tvrtko:
https://patchwork.freedesktop.org/series/27488/
Thus, CI failure to build them is expected. I think that my patches should
be squeashed in Tvrtko's one actually.
The first patch simply reorders functions and does nothing now comparing to
Tvrtko's patches. Next patches adds fixes according to PMU API comments
and clarifications from PMU aware engineers.
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Dmitry Rogozhkin (2):
drm/i915/pmu: reorder function to suite next patch
drm/i915/pmu: serve global events and support perf stat
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_pmu.c | 222 +++++++++++++++++++++-------------------
2 files changed, 118 insertions(+), 105 deletions(-)
--
1.8.3.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
* [RFC 1/2] drm/i915/pmu: reorder function to suite next patch
2017-08-15 17:56 [RFC 0/2] Support perf stat with i915 PMU Dmitry Rogozhkin
@ 2017-08-15 17:56 ` Dmitry Rogozhkin
2017-08-15 17:56 ` [RFC 2/2] drm/i915/pmu: serve global events and support perf stat Dmitry Rogozhkin
1 sibling, 0 replies; 4+ messages in thread
From: Dmitry Rogozhkin @ 2017-08-15 17:56 UTC (permalink / raw)
To: intel-gfx; +Cc: Peter Zijlstra
This patch is doing nover except reordering functions to highlight
changes in the next patch.
Change-Id: I0cd298780503ae8f6f8035b86c59fc8b5191356b
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
drivers/gpu/drm/i915/i915_pmu.c | 180 ++++++++++++++++++++--------------------
1 file changed, 90 insertions(+), 90 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 3272ec0..bcdf2bc 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -363,6 +363,88 @@ static bool engine_needs_busy_stats(struct intel_engine_cs *engine)
(engine->pmu.enable & BIT(I915_SAMPLE_BUSY));
}
+static u64 count_interrupts(struct drm_i915_private *i915)
+{
+ /* open-coded kstat_irqs() */
+ struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
+ u64 sum = 0;
+ int cpu;
+
+ if (!desc || !desc->kstat_irqs)
+ return 0;
+
+ for_each_possible_cpu(cpu)
+ sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
+
+ return sum;
+}
+
+static void i915_pmu_event_read(struct perf_event *event)
+{
+ struct drm_i915_private *i915 =
+ container_of(event->pmu, typeof(*i915), pmu.base);
+ u64 val = 0;
+
+ if (is_engine_event(event)) {
+ u8 sample = engine_event_sample(event);
+ struct intel_engine_cs *engine;
+
+ engine = intel_engine_lookup_user(i915,
+ engine_event_class(event),
+ engine_event_instance(event));
+
+ if (WARN_ON_ONCE(!engine)) {
+ /* Do nothing */
+ } else if (sample == I915_SAMPLE_BUSY &&
+ engine->pmu.busy_stats) {
+ val = ktime_to_ns(intel_engine_get_busy_time(engine));
+ } else {
+ val = engine->pmu.sample[sample];
+ }
+ } else switch (event->attr.config) {
+ case I915_PMU_ACTUAL_FREQUENCY:
+ val = i915->pmu.sample[__I915_SAMPLE_FREQ_ACT];
+ break;
+ case I915_PMU_REQUESTED_FREQUENCY:
+ val = i915->pmu.sample[__I915_SAMPLE_FREQ_REQ];
+ break;
+ case I915_PMU_ENERGY:
+ val = intel_energy_uJ(i915);
+ break;
+ case I915_PMU_INTERRUPTS:
+ val = count_interrupts(i915);
+ break;
+
+ case I915_PMU_RC6_RESIDENCY:
+ if (!i915->gt.awake)
+ return;
+
+ val = intel_rc6_residency_ns(i915,
+ IS_VALLEYVIEW(i915) ?
+ VLV_GT_RENDER_RC6 :
+ GEN6_GT_GFX_RC6);
+ break;
+
+ case I915_PMU_RC6p_RESIDENCY:
+ if (!i915->gt.awake)
+ return;
+
+ if (!IS_VALLEYVIEW(i915))
+ val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
+ break;
+
+ case I915_PMU_RC6pp_RESIDENCY:
+ if (!i915->gt.awake)
+ return;
+
+ if (!IS_VALLEYVIEW(i915))
+ val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
+ break;
+ }
+
+ local64_set(&event->count, val);
+}
+
static void i915_pmu_enable(struct perf_event *event)
{
struct drm_i915_private *i915 =
@@ -440,23 +522,6 @@ static void i915_pmu_disable(struct perf_event *event)
i915_pmu_timer_cancel(event);
}
-static int i915_pmu_event_add(struct perf_event *event, int flags)
-{
- struct hw_perf_event *hwc = &event->hw;
-
- if (flags & PERF_EF_START)
- i915_pmu_enable(event);
-
- hwc->state = !(flags & PERF_EF_START);
-
- return 0;
-}
-
-static void i915_pmu_event_del(struct perf_event *event, int flags)
-{
- i915_pmu_disable(event);
-}
-
static void i915_pmu_event_start(struct perf_event *event, int flags)
{
i915_pmu_enable(event);
@@ -467,86 +532,21 @@ static void i915_pmu_event_stop(struct perf_event *event, int flags)
i915_pmu_disable(event);
}
-static u64 count_interrupts(struct drm_i915_private *i915)
+static int i915_pmu_event_add(struct perf_event *event, int flags)
{
- /* open-coded kstat_irqs() */
- struct irq_desc *desc = irq_to_desc(i915->drm.pdev->irq);
- u64 sum = 0;
- int cpu;
+ struct hw_perf_event *hwc = &event->hw;
- if (!desc || !desc->kstat_irqs)
- return 0;
+ if (flags & PERF_EF_START)
+ i915_pmu_enable(event);
- for_each_possible_cpu(cpu)
- sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
+ hwc->state = !(flags & PERF_EF_START);
- return sum;
+ return 0;
}
-static void i915_pmu_event_read(struct perf_event *event)
+static void i915_pmu_event_del(struct perf_event *event, int flags)
{
- struct drm_i915_private *i915 =
- container_of(event->pmu, typeof(*i915), pmu.base);
- u64 val = 0;
-
- if (is_engine_event(event)) {
- u8 sample = engine_event_sample(event);
- struct intel_engine_cs *engine;
-
- engine = intel_engine_lookup_user(i915,
- engine_event_class(event),
- engine_event_instance(event));
-
- if (WARN_ON_ONCE(!engine)) {
- /* Do nothing */
- } else if (sample == I915_SAMPLE_BUSY &&
- engine->pmu.busy_stats) {
- val = ktime_to_ns(intel_engine_get_busy_time(engine));
- } else {
- val = engine->pmu.sample[sample];
- }
- } else switch (event->attr.config) {
- case I915_PMU_ACTUAL_FREQUENCY:
- val = i915->pmu.sample[__I915_SAMPLE_FREQ_ACT];
- break;
- case I915_PMU_REQUESTED_FREQUENCY:
- val = i915->pmu.sample[__I915_SAMPLE_FREQ_REQ];
- break;
- case I915_PMU_ENERGY:
- val = intel_energy_uJ(i915);
- break;
- case I915_PMU_INTERRUPTS:
- val = count_interrupts(i915);
- break;
-
- case I915_PMU_RC6_RESIDENCY:
- if (!i915->gt.awake)
- return;
-
- val = intel_rc6_residency_ns(i915,
- IS_VALLEYVIEW(i915) ?
- VLV_GT_RENDER_RC6 :
- GEN6_GT_GFX_RC6);
- break;
-
- case I915_PMU_RC6p_RESIDENCY:
- if (!i915->gt.awake)
- return;
-
- if (!IS_VALLEYVIEW(i915))
- val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
- break;
-
- case I915_PMU_RC6pp_RESIDENCY:
- if (!i915->gt.awake)
- return;
-
- if (!IS_VALLEYVIEW(i915))
- val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
- break;
- }
-
- local64_set(&event->count, val);
+ i915_pmu_disable(event);
}
static int i915_pmu_event_event_idx(struct perf_event *event)
--
1.8.3.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [RFC 2/2] drm/i915/pmu: serve global events and support perf stat
2017-08-15 17:56 [RFC 0/2] Support perf stat with i915 PMU Dmitry Rogozhkin
2017-08-15 17:56 ` [RFC 1/2] drm/i915/pmu: reorder function to suite next patch Dmitry Rogozhkin
@ 2017-08-15 17:56 ` Dmitry Rogozhkin
2017-08-18 0:19 ` Rogozhkin, Dmitry V
1 sibling, 1 reply; 4+ messages in thread
From: Dmitry Rogozhkin @ 2017-08-15 17:56 UTC (permalink / raw)
To: intel-gfx; +Cc: Peter Zijlstra
This patch should probably be squashed with Tvrtko's PMU enabling
patch...
i915 events don't have a CPU context to work with, so i915
PMU should work in global mode, i.e. expose perf_invalid_context.
This will make the following call to perf:
perf stat workload.sh
to error out with "failed to read counter". Correct usage would be:
perf stat -a -C0 workload.sh
And we will get expected output:
Another change required to make perf stat happy is properly support
pmu->del(): comments in del() declaration says that del() is required
to call stop(event, PERF_EF_UPDATE) and perf stat implicitly gets
event count thru this.
Finally, if perf stat will be ran as follows:
perf stat -a workload.sh
i.e. without '-C0' option, then event will be initilized as many times
as there are CPUs. Thus, patch adds PMU refcounting support to avoid
multiple init/close of internal things. The issue which I did not
overcome is that in this case counters will be multiplied on number
of CPUs. Not sure whether it is possible to have some event enabling
CPU mask or rather I need to simply error out.
Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_pmu.c | 106 ++++++++++++++++++++++------------------
2 files changed, 60 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b59da2c..215d47b 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2663,6 +2663,7 @@ struct drm_i915_private {
struct {
struct pmu base;
spinlock_t lock;
+ unsigned int ref;
struct hrtimer timer;
bool timer_enabled;
u64 enable;
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index bcdf2bc..0972203 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -451,34 +451,39 @@ static void i915_pmu_enable(struct perf_event *event)
container_of(event->pmu, typeof(*i915), pmu.base);
struct intel_engine_cs *engine = NULL;
unsigned long flags;
+ bool start_timer = false;
spin_lock_irqsave(&i915->pmu.lock, flags);
- if (is_engine_event(event)) {
- engine = intel_engine_lookup_user(i915,
- engine_event_class(event),
- engine_event_instance(event));
- GEM_BUG_ON(!engine);
- engine->pmu.enable |= BIT(engine_event_sample(event));
- }
-
- i915->pmu.enable |= event_enabled_mask(event);
+ if (i915->pmu.ref++ == 0) {
+ if (is_engine_event(event)) {
+ engine = intel_engine_lookup_user(i915,
+ engine_event_class(event),
+ engine_event_instance(event));
+ GEM_BUG_ON(!engine);
+ engine->pmu.enable |= BIT(engine_event_sample(event));
+ }
- if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
- hrtimer_start_range_ns(&i915->pmu.timer,
- ns_to_ktime(PERIOD), 0,
- HRTIMER_MODE_REL_PINNED);
- i915->pmu.timer_enabled = true;
- } else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
- !engine->pmu.busy_stats) {
- engine->pmu.busy_stats = true;
- if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
- queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+ i915->pmu.enable |= event_enabled_mask(event);
+
+ if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
+ hrtimer_start_range_ns(&i915->pmu.timer,
+ ns_to_ktime(PERIOD), 0,
+ HRTIMER_MODE_REL_PINNED);
+ i915->pmu.timer_enabled = true;
+ } else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
+ !engine->pmu.busy_stats) {
+ engine->pmu.busy_stats = true;
+ if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
+ queue_work(i915->wq, &engine->pmu.enable_busy_stats);
+ }
+ start_timer = true;
}
spin_unlock_irqrestore(&i915->pmu.lock, flags);
- i915_pmu_timer_start(event);
+ if (start_timer)
+ i915_pmu_timer_start(event);
}
static void i915_pmu_disable(struct perf_event *event)
@@ -486,40 +491,45 @@ static void i915_pmu_disable(struct perf_event *event)
struct drm_i915_private *i915 =
container_of(event->pmu, typeof(*i915), pmu.base);
unsigned long flags;
+ bool timer_cancel = true;
u64 mask;
spin_lock_irqsave(&i915->pmu.lock, flags);
- if (is_engine_event(event)) {
- struct intel_engine_cs *engine;
- enum intel_engine_id id;
-
- engine = intel_engine_lookup_user(i915,
- engine_event_class(event),
- engine_event_instance(event));
- GEM_BUG_ON(!engine);
- engine->pmu.enable &= ~BIT(engine_event_sample(event));
- if (engine->pmu.busy_stats &&
- !engine_needs_busy_stats(engine)) {
- engine->pmu.busy_stats = false;
- queue_delayed_work(i915->wq,
- &engine->pmu.disable_busy_stats,
- round_jiffies_up_relative(2 * HZ));
+ if (i915->pmu.ref && --i915->pmu.ref == 0) {
+ if (is_engine_event(event)) {
+ struct intel_engine_cs *engine;
+ enum intel_engine_id id;
+
+ engine = intel_engine_lookup_user(i915,
+ engine_event_class(event),
+ engine_event_instance(event));
+ GEM_BUG_ON(!engine);
+ engine->pmu.enable &= ~BIT(engine_event_sample(event));
+ if (engine->pmu.busy_stats &&
+ !engine_needs_busy_stats(engine)) {
+ engine->pmu.busy_stats = false;
+ queue_delayed_work(i915->wq,
+ &engine->pmu.disable_busy_stats,
+ round_jiffies_up_relative(2 * HZ));
+ }
+ mask = 0;
+ for_each_engine(engine, i915, id)
+ mask |= engine->pmu.enable;
+ mask = (~mask) & ENGINE_SAMPLE_MASK;
+ } else {
+ mask = event_enabled_mask(event);
}
- mask = 0;
- for_each_engine(engine, i915, id)
- mask |= engine->pmu.enable;
- mask = (~mask) & ENGINE_SAMPLE_MASK;
- } else {
- mask = event_enabled_mask(event);
- }
- i915->pmu.enable &= ~mask;
- i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
+ i915->pmu.enable &= ~mask;
+ i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
+ timer_cancel = true;
+ }
spin_unlock_irqrestore(&i915->pmu.lock, flags);
- i915_pmu_timer_cancel(event);
+ if (timer_cancel)
+ i915_pmu_timer_cancel(event);
}
static void i915_pmu_event_start(struct perf_event *event, int flags)
@@ -529,6 +539,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
static void i915_pmu_event_stop(struct perf_event *event, int flags)
{
+ if (flags & PERF_EF_UPDATE)
+ i915_pmu_event_read(event);
i915_pmu_disable(event);
}
@@ -546,7 +558,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags)
static void i915_pmu_event_del(struct perf_event *event, int flags)
{
- i915_pmu_disable(event);
+ i915_pmu_event_stop(event, PERF_EF_UPDATE);
}
static int i915_pmu_event_event_idx(struct perf_event *event)
@@ -687,7 +699,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
return;
i915->pmu.base.attr_groups = i915_pmu_attr_groups;
- i915->pmu.base.task_ctx_nr = perf_sw_context;
+ i915->pmu.base.task_ctx_nr = perf_invalid_context;
i915->pmu.base.event_init = i915_pmu_event_init;
i915->pmu.base.add = i915_pmu_event_add;
i915->pmu.base.del = i915_pmu_event_del;
--
1.8.3.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RFC 2/2] drm/i915/pmu: serve global events and support perf stat
2017-08-15 17:56 ` [RFC 2/2] drm/i915/pmu: serve global events and support perf stat Dmitry Rogozhkin
@ 2017-08-18 0:19 ` Rogozhkin, Dmitry V
0 siblings, 0 replies; 4+ messages in thread
From: Rogozhkin, Dmitry V @ 2017-08-18 0:19 UTC (permalink / raw)
To: intel-gfx@lists.freedesktop.org; +Cc: peterz@infradead.org
On Tue, 2017-08-15 at 10:56 -0700, Dmitry Rogozhkin wrote:
> This patch should probably be squashed with Tvrtko's PMU enabling
> patch...
>
> i915 events don't have a CPU context to work with, so i915
> PMU should work in global mode, i.e. expose perf_invalid_context.
> This will make the following call to perf:
> perf stat workload.sh
> to error out with "failed to read counter". Correct usage would be:
> perf stat -a -C0 workload.sh
> And we will get expected output:
>
> Another change required to make perf stat happy is properly support
> pmu->del(): comments in del() declaration says that del() is required
> to call stop(event, PERF_EF_UPDATE) and perf stat implicitly gets
> event count thru this.
>
> Finally, if perf stat will be ran as follows:
> perf stat -a workload.sh
> i.e. without '-C0' option, then event will be initilized as many times
> as there are CPUs. Thus, patch adds PMU refcounting support to avoid
> multiple init/close of internal things. The issue which I did not
> overcome is that in this case counters will be multiplied on number
> of CPUs. Not sure whether it is possible to have some event enabling
> CPU mask or rather I need to simply error out.
>
> Change-Id: I7d1abe747a4399196e72253f7b66441a6528dbee
> Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Peter Zijlstra <peterz@infradead.org>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 1 +
> drivers/gpu/drm/i915/i915_pmu.c | 106 ++++++++++++++++++++++------------------
> 2 files changed, 60 insertions(+), 47 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index b59da2c..215d47b 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2663,6 +2663,7 @@ struct drm_i915_private {
> struct {
> struct pmu base;
> spinlock_t lock;
> + unsigned int ref;
> struct hrtimer timer;
> bool timer_enabled;
> u64 enable;
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index bcdf2bc..0972203 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -451,34 +451,39 @@ static void i915_pmu_enable(struct perf_event *event)
> container_of(event->pmu, typeof(*i915), pmu.base);
> struct intel_engine_cs *engine = NULL;
> unsigned long flags;
> + bool start_timer = false;
>
> spin_lock_irqsave(&i915->pmu.lock, flags);
>
> - if (is_engine_event(event)) {
> - engine = intel_engine_lookup_user(i915,
> - engine_event_class(event),
> - engine_event_instance(event));
> - GEM_BUG_ON(!engine);
> - engine->pmu.enable |= BIT(engine_event_sample(event));
> - }
> -
> - i915->pmu.enable |= event_enabled_mask(event);
> + if (i915->pmu.ref++ == 0) {
This was a stupid me: with this I support single busy_stat event only.
Correct way to do that is to make busy_stats a refcount. I did that in
the updated patch which I just sent. I wonder whether timer staff should
be updated with the similar way, but I am not yet tried this part of the
code to judge.
> + if (is_engine_event(event)) {
> + engine = intel_engine_lookup_user(i915,
> + engine_event_class(event),
> + engine_event_instance(event));
> + GEM_BUG_ON(!engine);
> + engine->pmu.enable |= BIT(engine_event_sample(event));
> + }
>
> - if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
> - hrtimer_start_range_ns(&i915->pmu.timer,
> - ns_to_ktime(PERIOD), 0,
> - HRTIMER_MODE_REL_PINNED);
> - i915->pmu.timer_enabled = true;
> - } else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
> - !engine->pmu.busy_stats) {
> - engine->pmu.busy_stats = true;
> - if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
> - queue_work(i915->wq, &engine->pmu.enable_busy_stats);
> + i915->pmu.enable |= event_enabled_mask(event);
> +
> + if (pmu_needs_timer(i915, true) && !i915->pmu.timer_enabled) {
> + hrtimer_start_range_ns(&i915->pmu.timer,
> + ns_to_ktime(PERIOD), 0,
> + HRTIMER_MODE_REL_PINNED);
> + i915->pmu.timer_enabled = true;
> + } else if (is_engine_event(event) && engine_needs_busy_stats(engine) &&
> + !engine->pmu.busy_stats) {
> + engine->pmu.busy_stats = true;
> + if (!cancel_delayed_work(&engine->pmu.disable_busy_stats))
> + queue_work(i915->wq, &engine->pmu.enable_busy_stats);
> + }
> + start_timer = true;
> }
>
> spin_unlock_irqrestore(&i915->pmu.lock, flags);
>
> - i915_pmu_timer_start(event);
> + if (start_timer)
> + i915_pmu_timer_start(event);
> }
>
> static void i915_pmu_disable(struct perf_event *event)
> @@ -486,40 +491,45 @@ static void i915_pmu_disable(struct perf_event *event)
> struct drm_i915_private *i915 =
> container_of(event->pmu, typeof(*i915), pmu.base);
> unsigned long flags;
> + bool timer_cancel = true;
> u64 mask;
>
> spin_lock_irqsave(&i915->pmu.lock, flags);
>
> - if (is_engine_event(event)) {
> - struct intel_engine_cs *engine;
> - enum intel_engine_id id;
> -
> - engine = intel_engine_lookup_user(i915,
> - engine_event_class(event),
> - engine_event_instance(event));
> - GEM_BUG_ON(!engine);
> - engine->pmu.enable &= ~BIT(engine_event_sample(event));
> - if (engine->pmu.busy_stats &&
> - !engine_needs_busy_stats(engine)) {
> - engine->pmu.busy_stats = false;
> - queue_delayed_work(i915->wq,
> - &engine->pmu.disable_busy_stats,
> - round_jiffies_up_relative(2 * HZ));
> + if (i915->pmu.ref && --i915->pmu.ref == 0) {
> + if (is_engine_event(event)) {
> + struct intel_engine_cs *engine;
> + enum intel_engine_id id;
> +
> + engine = intel_engine_lookup_user(i915,
> + engine_event_class(event),
> + engine_event_instance(event));
> + GEM_BUG_ON(!engine);
> + engine->pmu.enable &= ~BIT(engine_event_sample(event));
> + if (engine->pmu.busy_stats &&
> + !engine_needs_busy_stats(engine)) {
> + engine->pmu.busy_stats = false;
> + queue_delayed_work(i915->wq,
> + &engine->pmu.disable_busy_stats,
> + round_jiffies_up_relative(2 * HZ));
> + }
> + mask = 0;
> + for_each_engine(engine, i915, id)
> + mask |= engine->pmu.enable;
> + mask = (~mask) & ENGINE_SAMPLE_MASK;
> + } else {
> + mask = event_enabled_mask(event);
> }
> - mask = 0;
> - for_each_engine(engine, i915, id)
> - mask |= engine->pmu.enable;
> - mask = (~mask) & ENGINE_SAMPLE_MASK;
> - } else {
> - mask = event_enabled_mask(event);
> - }
>
> - i915->pmu.enable &= ~mask;
> - i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
> + i915->pmu.enable &= ~mask;
> + i915->pmu.timer_enabled &= pmu_needs_timer(i915, true);
> + timer_cancel = true;
> + }
>
> spin_unlock_irqrestore(&i915->pmu.lock, flags);
>
> - i915_pmu_timer_cancel(event);
> + if (timer_cancel)
> + i915_pmu_timer_cancel(event);
> }
>
> static void i915_pmu_event_start(struct perf_event *event, int flags)
> @@ -529,6 +539,8 @@ static void i915_pmu_event_start(struct perf_event *event, int flags)
>
> static void i915_pmu_event_stop(struct perf_event *event, int flags)
> {
> + if (flags & PERF_EF_UPDATE)
> + i915_pmu_event_read(event);
> i915_pmu_disable(event);
> }
>
> @@ -546,7 +558,7 @@ static int i915_pmu_event_add(struct perf_event *event, int flags)
>
> static void i915_pmu_event_del(struct perf_event *event, int flags)
> {
> - i915_pmu_disable(event);
> + i915_pmu_event_stop(event, PERF_EF_UPDATE);
> }
>
> static int i915_pmu_event_event_idx(struct perf_event *event)
> @@ -687,7 +699,7 @@ void i915_pmu_register(struct drm_i915_private *i915)
> return;
>
> i915->pmu.base.attr_groups = i915_pmu_attr_groups;
> - i915->pmu.base.task_ctx_nr = perf_sw_context;
> + i915->pmu.base.task_ctx_nr = perf_invalid_context;
> i915->pmu.base.event_init = i915_pmu_event_init;
> i915->pmu.base.add = i915_pmu_event_add;
> i915->pmu.base.del = i915_pmu_event_del;
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2017-08-18 0:19 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-15 17:56 [RFC 0/2] Support perf stat with i915 PMU Dmitry Rogozhkin
2017-08-15 17:56 ` [RFC 1/2] drm/i915/pmu: reorder function to suite next patch Dmitry Rogozhkin
2017-08-15 17:56 ` [RFC 2/2] drm/i915/pmu: serve global events and support perf stat Dmitry Rogozhkin
2017-08-18 0:19 ` Rogozhkin, Dmitry V
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).