From: namhyung@kernel.org
To: Peter Zijlstra <peterz@infradead.org>, Ingo Molnar <mingo@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
LKML <linux-kernel@vger.kernel.org>,
Mingwei Zhang <mizhang@google.com>,
Namhyung Kim <namhyung@kernel.org>,
Ian Rogers <irogers@google.com>,
Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH v2 1/2] perf/core: Update perf_adjust_freq_unthr_context()
Date: Thu, 11 Jan 2024 12:43:47 -0800 [thread overview]
Message-ID: <20240111204348.669673-1-namhyung@kernel.org> (raw)
From: Namhyung Kim <namhyung@kernel.org>
It was unnecessarily disabling and enabling PMUs for each event. It
should be done at PMU level, otherwise it'd increase the interrupt
latency when there are slow (uncore or virtual) PMUs and lots of events
on the PMUs.
Add pmu_ctx->nr_freq counter to check it at each PMU. As pmu context
has separate active lists for pinned group and flexible group, factor
out a new function to do the job. Also it can skip pmu contexts with
no active events (due to multiplexing or hybrid PMUs).
Another minor optimization is that it can skip PMUs w/ CAP_NO_INTERRUPT
even if it needs to unthrottle sampling events.
Reviewed-by: Ian Rogers <irogers@google.com>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Mingwei Zhang <mizhang@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
v2: Check if pmu context is active (Mark)
include/linux/perf_event.h | 6 ++++
kernel/events/core.c | 70 ++++++++++++++++++++++++--------------
2 files changed, 50 insertions(+), 26 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d2a15c0c6f8a..3e33b366347a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -883,6 +883,7 @@ struct perf_event_pmu_context {
unsigned int nr_events;
unsigned int nr_cgroups;
+ unsigned int nr_freq;
atomic_t refcount; /* event <-> epc */
struct rcu_head rcu_head;
@@ -897,6 +898,11 @@ struct perf_event_pmu_context {
int rotate_necessary;
};
+static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc)
+{
+ return !list_empty(&epc->flexible_active) || !list_empty(&epc->pinned_active);
+}
+
struct perf_event_groups {
struct rb_root tree;
u64 index;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 59b332cce9e7..e9ce79c8c145 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2277,8 +2277,10 @@ event_sched_out(struct perf_event *event, struct perf_event_context *ctx)
if (!is_software_event(event))
cpc->active_oncpu--;
- if (event->attr.freq && event->attr.sample_freq)
+ if (event->attr.freq && event->attr.sample_freq) {
ctx->nr_freq--;
+ epc->nr_freq--;
+ }
if (event->attr.exclusive || !cpc->active_oncpu)
cpc->exclusive = 0;
@@ -2533,9 +2535,10 @@ event_sched_in(struct perf_event *event, struct perf_event_context *ctx)
if (!is_software_event(event))
cpc->active_oncpu++;
- if (event->attr.freq && event->attr.sample_freq)
+ if (event->attr.freq && event->attr.sample_freq) {
ctx->nr_freq++;
-
+ epc->nr_freq++;
+ }
if (event->attr.exclusive)
cpc->exclusive = 1;
@@ -4098,30 +4101,14 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count, bo
}
}
-/*
- * combine freq adjustment with unthrottling to avoid two passes over the
- * events. At the same time, make sure, having freq events does not change
- * the rate of unthrottling as that would introduce bias.
- */
-static void
-perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
+static void perf_adjust_freq_unthr_events(struct list_head *event_list)
{
struct perf_event *event;
struct hw_perf_event *hwc;
u64 now, period = TICK_NSEC;
s64 delta;
- /*
- * only need to iterate over all events iff:
- * - context have events in frequency mode (needs freq adjust)
- * - there are events to unthrottle on this cpu
- */
- if (!(ctx->nr_freq || unthrottle))
- return;
-
- raw_spin_lock(&ctx->lock);
-
- list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+ list_for_each_entry(event, event_list, active_list) {
if (event->state != PERF_EVENT_STATE_ACTIVE)
continue;
@@ -4129,8 +4116,6 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
if (!event_filter_match(event))
continue;
- perf_pmu_disable(event->pmu);
-
hwc = &event->hw;
if (hwc->interrupts == MAX_INTERRUPTS) {
@@ -4140,7 +4125,7 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
}
if (!event->attr.freq || !event->attr.sample_freq)
- goto next;
+ continue;
/*
* stop the event and update event->count
@@ -4162,8 +4147,41 @@ perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
perf_adjust_period(event, period, delta, false);
event->pmu->start(event, delta > 0 ? PERF_EF_RELOAD : 0);
- next:
- perf_pmu_enable(event->pmu);
+ }
+}
+
+/*
+ * combine freq adjustment with unthrottling to avoid two passes over the
+ * events. At the same time, make sure, having freq events does not change
+ * the rate of unthrottling as that would introduce bias.
+ */
+static void
+perf_adjust_freq_unthr_context(struct perf_event_context *ctx, bool unthrottle)
+{
+ struct perf_event_pmu_context *pmu_ctx;
+
+ /*
+ * only need to iterate over all events iff:
+ * - context have events in frequency mode (needs freq adjust)
+ * - there are events to unthrottle on this cpu
+ */
+ if (!(ctx->nr_freq || unthrottle))
+ return;
+
+ raw_spin_lock(&ctx->lock);
+
+ list_for_each_entry(pmu_ctx, &ctx->pmu_ctx_list, pmu_ctx_entry) {
+ if (!(pmu_ctx->nr_freq || unthrottle))
+ continue;
+ if (!perf_pmu_ctx_is_active(pmu_ctx))
+ continue;
+ if (pmu_ctx->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT)
+ continue;
+
+ perf_pmu_disable(pmu_ctx->pmu);
+ perf_adjust_freq_unthr_events(&pmu_ctx->pinned_active);
+ perf_adjust_freq_unthr_events(&pmu_ctx->flexible_active);
+ perf_pmu_enable(pmu_ctx->pmu);
}
raw_spin_unlock(&ctx->lock);
--
2.43.0.275.g3460e3d667-goog
next reply other threads:[~2024-01-11 20:43 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-11 20:43 namhyung [this message]
2024-01-11 20:43 ` [PATCH v2 2/2] perf/core: Reduce PMU access to adjust sample freq namhyung
2024-01-19 2:41 ` kernel test robot
2024-01-19 21:01 ` Namhyung Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240111204348.669673-1-namhyung@kernel.org \
--to=namhyung@kernel.org \
--cc=acme@kernel.org \
--cc=alexander.shishkin@linux.intel.com \
--cc=irogers@google.com \
--cc=kan.liang@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@kernel.org \
--cc=mizhang@google.com \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox