All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: x86@kernel.org
Cc: peterz@infradead.org, linux-kernel@vger.kernel.org,
	rostedt@goodmis.org, mhiramat@kernel.org, bristot@redhat.com,
	jbaron@akamai.com, torvalds@linux-foundation.org,
	tglx@linutronix.de, mingo@kernel.org, namit@vmware.com,
	hpa@zytor.com, luto@kernel.org, ard.biesheuvel@linaro.org,
	jpoimboe@redhat.com, pbonzini@redhat.com,
	mathieu.desnoyers@efficios.com
Subject: [PATCH v4 17/18] x86/perf, static_call: Optimize x86_pmu methods
Date: Fri, 01 May 2020 22:29:06 +0200	[thread overview]
Message-ID: <20200501202944.763303401@infradead.org> (raw)
In-Reply-To: 20200501202849.647891881@infradead.org

Replace many of the indirect calls with static_call().

XXX run performance numbers

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/events/core.c |  140 +++++++++++++++++++++++++++++++++++--------------
 1 file changed, 100 insertions(+), 40 deletions(-)

--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -28,6 +28,7 @@
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/nospec.h>
+#include <linux/static_call.h>
 
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
@@ -52,6 +53,40 @@ DEFINE_PER_CPU(struct cpu_hw_events, cpu
 DEFINE_STATIC_KEY_FALSE(rdpmc_never_available_key);
 DEFINE_STATIC_KEY_FALSE(rdpmc_always_available_key);
 
+/*
+ * This here 'abuses' DEFINE_STATIC_COND_CALL() to get a static_call defined
+ * from just a typename, as opposed to an actual function. While some of these
+ * will indeed be used with static_cond_call(), many of them will not.
+ *
+ * Just like you can unconditionally call a function pointer that's initialized
+ * to NULL, as long as you make sure to set it to a valid function _before_
+ * use, so too can you use static_call() on a DEFINE_STATIC_COND_CALL(), as
+ * long as you use static_call_update() to set it before use.
+ */
+DEFINE_STATIC_COND_CALL(x86_pmu_handle_irq,  *x86_pmu.handle_irq);
+DEFINE_STATIC_COND_CALL(x86_pmu_disable_all, *x86_pmu.disable_all);
+DEFINE_STATIC_COND_CALL(x86_pmu_enable_all,  *x86_pmu.enable_all);
+DEFINE_STATIC_COND_CALL(x86_pmu_enable,	     *x86_pmu.enable);
+DEFINE_STATIC_COND_CALL(x86_pmu_disable,     *x86_pmu.disable);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_add,  *x86_pmu.add);
+DEFINE_STATIC_COND_CALL(x86_pmu_del,  *x86_pmu.del);
+DEFINE_STATIC_COND_CALL(x86_pmu_read, *x86_pmu.read);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_schedule_events,       *x86_pmu.schedule_events);
+DEFINE_STATIC_COND_CALL(x86_pmu_get_event_constraints, *x86_pmu.get_event_constraints);
+DEFINE_STATIC_COND_CALL(x86_pmu_put_event_constraints, *x86_pmu.put_event_constraints);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_start_scheduling,  *x86_pmu.start_scheduling);
+DEFINE_STATIC_COND_CALL(x86_pmu_commit_scheduling, *x86_pmu.commit_scheduling);
+DEFINE_STATIC_COND_CALL(x86_pmu_stop_scheduling,   *x86_pmu.stop_scheduling);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_sched_task,    *x86_pmu.sched_task);
+DEFINE_STATIC_COND_CALL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
+
+DEFINE_STATIC_COND_CALL(x86_pmu_drain_pebs,   *x86_pmu.drain_pebs);
+DEFINE_STATIC_COND_CALL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
+
 u64 __read_mostly hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -660,7 +695,7 @@ static void x86_pmu_disable(struct pmu *
 	cpuc->enabled = 0;
 	barrier();
 
-	x86_pmu.disable_all();
+	static_call(x86_pmu_disable_all)();
 }
 
 void x86_pmu_enable_all(int added)
@@ -907,8 +942,7 @@ int x86_schedule_events(struct cpu_hw_ev
 	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		n0 -= cpuc->n_txn;
 
-	if (x86_pmu.start_scheduling)
-		x86_pmu.start_scheduling(cpuc);
+	static_cond_call(x86_pmu_start_scheduling)(cpuc);
 
 	for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
 		c = cpuc->event_constraint[i];
@@ -925,7 +959,7 @@ int x86_schedule_events(struct cpu_hw_ev
 		 * change due to external factors (sibling state, allow_tfa).
 		 */
 		if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
-			c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+			c = static_call(x86_pmu_get_event_constraints)(cpuc, i, cpuc->event_list[i]);
 			cpuc->event_constraint[i] = c;
 		}
 
@@ -1008,8 +1042,7 @@ int x86_schedule_events(struct cpu_hw_ev
 	if (!unsched && assign) {
 		for (i = 0; i < n; i++) {
 			e = cpuc->event_list[i];
-			if (x86_pmu.commit_scheduling)
-				x86_pmu.commit_scheduling(cpuc, i, assign[i]);
+			static_cond_call(x86_pmu_commit_scheduling)(cpuc, i, assign[i]);
 		}
 	} else {
 		for (i = n0; i < n; i++) {
@@ -1018,15 +1051,13 @@ int x86_schedule_events(struct cpu_hw_ev
 			/*
 			 * release events that failed scheduling
 			 */
-			if (x86_pmu.put_event_constraints)
-				x86_pmu.put_event_constraints(cpuc, e);
+			static_cond_call(x86_pmu_put_event_constraints)(cpuc, e);
 
 			cpuc->event_constraint[i] = NULL;
 		}
 	}
 
-	if (x86_pmu.stop_scheduling)
-		x86_pmu.stop_scheduling(cpuc);
+	static_cond_call(x86_pmu_stop_scheduling)(cpuc);
 
 	return unsched ? -EINVAL : 0;
 }
@@ -1217,7 +1248,7 @@ static void x86_pmu_enable(struct pmu *p
 	cpuc->enabled = 1;
 	barrier();
 
-	x86_pmu.enable_all(added);
+	static_call(x86_pmu_enable_all)(added);
 }
 
 static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
@@ -1338,7 +1369,7 @@ static int x86_pmu_add(struct perf_event
 	if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
 		goto done_collect;
 
-	ret = x86_pmu.schedule_events(cpuc, n, assign);
+	ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
 	if (ret)
 		goto out;
 	/*
@@ -1356,13 +1387,11 @@ static int x86_pmu_add(struct perf_event
 	cpuc->n_added += n - n0;
 	cpuc->n_txn += n - n0;
 
-	if (x86_pmu.add) {
-		/*
-		 * This is before x86_pmu_enable() will call x86_pmu_start(),
-		 * so we enable LBRs before an event needs them etc..
-		 */
-		x86_pmu.add(event);
-	}
+	/*
+	 * This is before x86_pmu_enable() will call x86_pmu_start(),
+	 * so we enable LBRs before an event needs them etc..
+	 */
+	static_cond_call(x86_pmu_add)(event);
 
 	ret = 0;
 out:
@@ -1390,7 +1419,7 @@ static void x86_pmu_start(struct perf_ev
 	cpuc->events[idx] = event;
 	__set_bit(idx, cpuc->active_mask);
 	__set_bit(idx, cpuc->running);
-	x86_pmu.enable(event);
+	static_call(x86_pmu_enable)(event);
 	perf_event_update_userpage(event);
 }
 
@@ -1460,7 +1489,7 @@ void x86_pmu_stop(struct perf_event *eve
 	struct hw_perf_event *hwc = &event->hw;
 
 	if (test_bit(hwc->idx, cpuc->active_mask)) {
-		x86_pmu.disable(event);
+		static_call(x86_pmu_disable)(event);
 		__clear_bit(hwc->idx, cpuc->active_mask);
 		cpuc->events[hwc->idx] = NULL;
 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
@@ -1510,8 +1539,7 @@ static void x86_pmu_del(struct perf_even
 	if (i >= cpuc->n_events - cpuc->n_added)
 		--cpuc->n_added;
 
-	if (x86_pmu.put_event_constraints)
-		x86_pmu.put_event_constraints(cpuc, event);
+	static_cond_call(x86_pmu_put_event_constraints)(cpuc, event);
 
 	/* Delete the array entry. */
 	while (++i < cpuc->n_events) {
@@ -1524,13 +1552,12 @@ static void x86_pmu_del(struct perf_even
 	perf_event_update_userpage(event);
 
 do_del:
-	if (x86_pmu.del) {
-		/*
-		 * This is after x86_pmu_stop(); so we disable LBRs after any
-		 * event can need them etc..
-		 */
-		x86_pmu.del(event);
-	}
+
+	/*
+	 * This is after x86_pmu_stop(); so we disable LBRs after any
+	 * event can need them etc..
+	 */
+	static_cond_call(x86_pmu_del)(event);
 }
 
 int x86_pmu_handle_irq(struct pt_regs *regs)
@@ -1608,7 +1635,7 @@ perf_event_nmi_handler(unsigned int cmd,
 		return NMI_DONE;
 
 	start_clock = sched_clock();
-	ret = x86_pmu.handle_irq(regs);
+	ret = static_call(x86_pmu_handle_irq)(regs);
 	finish_clock = sched_clock();
 
 	perf_sample_event_took(finish_clock - start_clock);
@@ -1821,6 +1848,38 @@ ssize_t x86_event_sysfs_show(char *page,
 static struct attribute_group x86_pmu_attr_group;
 static struct attribute_group x86_pmu_caps_group;
 
+static void x86_pmu_static_call_update(void)
+{
+	static_call_update(x86_pmu_handle_irq, x86_pmu.handle_irq);
+	static_call_update(x86_pmu_disable_all, x86_pmu.disable_all);
+	static_call_update(x86_pmu_enable_all, x86_pmu.enable_all);
+	static_call_update(x86_pmu_enable, x86_pmu.enable);
+	static_call_update(x86_pmu_disable, x86_pmu.disable);
+
+	static_call_update(x86_pmu_add, x86_pmu.add);
+	static_call_update(x86_pmu_del, x86_pmu.del);
+	static_call_update(x86_pmu_read, x86_pmu.read);
+
+	static_call_update(x86_pmu_schedule_events, x86_pmu.schedule_events);
+	static_call_update(x86_pmu_get_event_constraints, x86_pmu.get_event_constraints);
+	static_call_update(x86_pmu_put_event_constraints, x86_pmu.put_event_constraints);
+
+	static_call_update(x86_pmu_start_scheduling, x86_pmu.start_scheduling);
+	static_call_update(x86_pmu_commit_scheduling, x86_pmu.commit_scheduling);
+	static_call_update(x86_pmu_stop_scheduling, x86_pmu.stop_scheduling);
+
+	static_call_update(x86_pmu_sched_task, x86_pmu.sched_task);
+	static_call_update(x86_pmu_swap_task_ctx, x86_pmu.swap_task_ctx);
+
+	static_call_update(x86_pmu_drain_pebs, x86_pmu.drain_pebs);
+	static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);
+}
+
+static void _x86_pmu_read(struct perf_event *event)
+{
+	x86_perf_event_update(event);
+}
+
 static int __init init_hw_perf_events(void)
 {
 	struct x86_pmu_quirk *quirk;
@@ -1889,6 +1948,11 @@ static int __init init_hw_perf_events(vo
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
+	if (!x86_pmu.read)
+		x86_pmu.read = _x86_pmu_read;
+
+	x86_pmu_static_call_update();
+
 	/*
 	 * Install callbacks. Core will call them for each online
 	 * cpu.
@@ -1925,11 +1989,9 @@ static int __init init_hw_perf_events(vo
 }
 early_initcall(init_hw_perf_events);
 
-static inline void x86_pmu_read(struct perf_event *event)
+static void x86_pmu_read(struct perf_event *event)
 {
-	if (x86_pmu.read)
-		return x86_pmu.read(event);
-	x86_perf_event_update(event);
+	static_call(x86_pmu_read)(event);
 }
 
 /*
@@ -2006,7 +2068,7 @@ static int x86_pmu_commit_txn(struct pmu
 	if (!x86_pmu_initialized())
 		return -EAGAIN;
 
-	ret = x86_pmu.schedule_events(cpuc, n, assign);
+	ret = static_call(x86_pmu_schedule_events)(cpuc, n, assign);
 	if (ret)
 		return ret;
 
@@ -2299,15 +2361,13 @@ static const struct attribute_group *x86
 
 static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
 {
-	if (x86_pmu.sched_task)
-		x86_pmu.sched_task(ctx, sched_in);
+	static_cond_call(x86_pmu_sched_task)(ctx, sched_in);
 }
 
 static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
 				  struct perf_event_context *next)
 {
-	if (x86_pmu.swap_task_ctx)
-		x86_pmu.swap_task_ctx(prev, next);
+	static_cond_call(x86_pmu_swap_task_ctx)(prev, next);
 }
 
 void perf_check_microcode(void)



  parent reply	other threads:[~2020-05-01 20:32 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-01 20:28 [PATCH v4 00/18] Add static_call() Peter Zijlstra
2020-05-01 20:28 ` [PATCH v4 01/18] notifier: Fix broken error handling pattern Peter Zijlstra
2020-05-01 23:35   ` Steven Rostedt
2020-05-01 20:28 ` [PATCH v4 02/18] module: Fix up module_notifier return values Peter Zijlstra
2020-05-01 20:28 ` [PATCH v4 03/18] module: Properly propagate MODULE_STATE_COMING failure Peter Zijlstra
2020-05-01 20:28 ` [PATCH v4 04/18] jump_label,module: Fix module lifetime for __jump_label_mod_text_reserved Peter Zijlstra
2020-05-01 20:28 ` [PATCH v4 05/18] compiler.h: Make __ADDRESSABLE() symbol truly unique Peter Zijlstra
2020-05-01 20:28 ` [PATCH v4 06/18] static_call: Add basic static call infrastructure Peter Zijlstra
2020-05-05 21:21   ` Josh Poimboeuf
2020-05-01 20:28 ` [PATCH v4 07/18] static_call: Add inline " Peter Zijlstra
2020-05-05 22:10   ` Josh Poimboeuf
2020-05-06 16:15     ` Peter Zijlstra
2020-05-01 20:28 ` [PATCH v4 08/18] static_call: Avoid kprobes on inline static_call()s Peter Zijlstra
2020-05-01 20:28 ` [PATCH v4 09/18] x86/static_call: Add out-of-line static call implementation Peter Zijlstra
2020-05-06 16:16   ` Peter Zijlstra
2020-05-01 20:28 ` [PATCH v4 10/18] x86/static_call: Add inline static call implementation for x86-64 Peter Zijlstra
2020-05-01 20:29 ` [PATCH v4 11/18] static_call: Simple self-test Peter Zijlstra
2020-05-01 20:29 ` [PATCH v4 12/18] tracepoint: Optimize using static_call() Peter Zijlstra
2020-05-13  8:48   ` [tracepoint] 01edfaf177: WARNING:at_kernel/static_call.c:#__static_call_update kernel test robot
2020-05-13  8:48     ` kernel test robot
2020-05-15 17:13     ` Peter Zijlstra
2020-05-15 17:13       ` Peter Zijlstra
2020-05-01 20:29 ` [PATCH v4 13/18] x86/alternatives: Teach text_poke_bp() to emulate RET Peter Zijlstra
2020-05-01 20:29 ` [PATCH v4 14/18] static_call: Add static_cond_call() Peter Zijlstra
2020-05-02 13:08   ` Rasmus Villemoes
2020-05-03 12:58     ` Peter Zijlstra
2020-05-04  7:20       ` Rasmus Villemoes
2020-05-04 20:14         ` Peter Zijlstra
2020-05-05  7:50           ` Rasmus Villemoes
2020-05-05  9:38             ` Peter Zijlstra
2020-05-05  9:36           ` Peter Zijlstra
2020-05-05 18:13             ` Nick Desaulniers
2020-05-05 18:27               ` Nick Desaulniers
2020-05-05 18:48                 ` Linus Torvalds
2020-05-05 19:00                   ` Mathieu Desnoyers
2020-05-05 19:57                     ` Nick Desaulniers
2020-05-05 20:27                       ` Mathieu Desnoyers
2020-05-06 13:55                         ` Peter Zijlstra
2020-05-06 14:01                           ` Mathieu Desnoyers
2020-05-06 16:18                             ` Peter Zijlstra
2020-05-06 13:51               ` Peter Zijlstra
2020-05-06 16:00                 ` Peter Zijlstra
2020-05-06 17:16                 ` Linus Torvalds
2020-05-06 19:57                   ` Andy Lutomirski
2020-05-06 17:24   ` Josh Poimboeuf
2020-05-06 17:58     ` Peter Zijlstra
2020-05-06 18:09       ` Josh Poimboeuf
2020-05-06 18:16         ` Peter Zijlstra
2020-05-08 15:27     ` Peter Zijlstra
2020-05-08 15:47       ` Josh Poimboeuf
2020-05-08 16:17         ` Peter Zijlstra
2020-05-01 20:29 ` [PATCH v4 15/18] static_call: Handle tail-calls Peter Zijlstra
2020-05-06 18:10   ` Peter Zijlstra
2020-05-01 20:29 ` [PATCH v4 16/18] static_call: Allow early init Peter Zijlstra
2020-05-06 21:15   ` Josh Poimboeuf
2020-05-08 13:31     ` Peter Zijlstra
2020-05-08 14:27       ` Josh Poimboeuf
2020-05-08 15:30         ` Peter Zijlstra
2020-05-01 20:29 ` Peter Zijlstra [this message]
2020-05-01 20:29 ` [PATCH v4 18/18] objtool: Clean up elf_write() condition Peter Zijlstra
2020-05-06 17:32 ` [PATCH v4 00/18] Add static_call() Josh Poimboeuf
2020-05-06 18:05   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200501202944.763303401@infradead.org \
    --to=peterz@infradead.org \
    --cc=ard.biesheuvel@linaro.org \
    --cc=bristot@redhat.com \
    --cc=hpa@zytor.com \
    --cc=jbaron@akamai.com \
    --cc=jpoimboe@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=namit@vmware.com \
    --cc=pbonzini@redhat.com \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.