virtualization.lists.linux-foundation.org archive mirror
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: mingo@kernel.org
Cc: mark.rutland@arm.com, juri.lelli@redhat.com,
	daniel.lezcano@linaro.org, wanpengli@tencent.com,
	kvm@vger.kernel.org, rafael@kernel.org, peterz@infradead.org,
	dave.hansen@linux.intel.com,
	virtualization@lists.linux-foundation.org, bsegall@google.com,
	amakhalov@vmware.com, will@kernel.org, tglx@linutronix.de,
	vschneid@redhat.com, hpa@zytor.com, x86@kernel.org,
	pv-drivers@vmware.com, mgorman@suse.de,
	linux-trace-kernel@vger.kernel.org, linux-pm@vger.kernel.org,
	boqun.feng@gmail.com, rostedt@goodmis.org, bp@alien8.de,
	vincent.guittot@linaro.org, boris.ostrovsky@oracle.com,
	dietmar.eggemann@arm.com, jgross@suse.com, seanjc@google.com,
	linux-kernel@vger.kernel.org, mhiramat@kernel.org,
	pbonzini@redhat.com, bristot@redhat.com
Subject: [PATCH 4/6] x86: Mark sched_clock() noinstr
Date: Mon, 23 Jan 2023 21:50:13 +0100	[thread overview]
Message-ID: <20230123205515.118011340@infradead.org> (raw)
In-Reply-To: 20230123205009.790550642@infradead.org

In order to use sched_clock() from noinstr code, mark it and all it's
implenentations noinstr.

The whole pvclock thing (used by KVM/Xen) is a bit of a pain,
since it calls out to watchdogs, create a
pvclock_clocksource_read_nowd() variant doesn't do that and can be
noinstr.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/kvmclock.h |    2 +-
 arch/x86/include/asm/paravirt.h |    2 +-
 arch/x86/include/asm/pvclock.h  |    3 ++-
 arch/x86/kernel/cpu/vmware.c    |    2 +-
 arch/x86/kernel/kvmclock.c      |    6 +++---
 arch/x86/kernel/pvclock.c       |   19 +++++++++++++++----
 arch/x86/kernel/tsc.c           |    7 +++----
 arch/x86/xen/time.c             |   12 ++++++++++--
 include/linux/math64.h          |    4 ++--
 9 files changed, 38 insertions(+), 19 deletions(-)

--- a/arch/x86/include/asm/kvmclock.h
+++ b/arch/x86/include/asm/kvmclock.h
@@ -8,7 +8,7 @@ extern struct clocksource kvm_clock;
 
 DECLARE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
 
-static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
+static __always_inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
 {
 	return &this_cpu_read(hv_clock_per_cpu)->pvti;
 }
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -26,7 +26,7 @@ DECLARE_STATIC_CALL(pv_sched_clock, dumm
 
 void paravirt_set_sched_clock(u64 (*func)(void));
 
-static inline u64 paravirt_sched_clock(void)
+static __always_inline u64 paravirt_sched_clock(void)
 {
 	return static_call(pv_sched_clock)();
 }
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -7,6 +7,7 @@
 
 /* some helper functions for xen and kvm pv clock sources */
 u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
+u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src);
 u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src);
 void pvclock_set_flags(u8 flags);
 unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src);
@@ -39,7 +40,7 @@ bool pvclock_read_retry(const struct pvc
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * yielding a 64-bit result.
  */
-static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
+static __always_inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 {
 	u64 product;
 #ifdef __i386__
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -143,7 +143,7 @@ static __init int parse_no_stealacc(char
 }
 early_param("no-steal-acc", parse_no_stealacc);
 
-static unsigned long long notrace vmware_sched_clock(void)
+static noinstr u64 vmware_sched_clock(void)
 {
 	unsigned long long ns;
 
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -71,12 +71,12 @@ static int kvm_set_wallclock(const struc
 	return -ENODEV;
 }
 
-static u64 kvm_clock_read(void)
+static noinstr u64 kvm_clock_read(void)
 {
 	u64 ret;
 
 	preempt_disable_notrace();
-	ret = pvclock_clocksource_read(this_cpu_pvti());
+	ret = pvclock_clocksource_read_nowd(this_cpu_pvti());
 	preempt_enable_notrace();
 	return ret;
 }
@@ -86,7 +86,7 @@ static u64 kvm_clock_get_cycles(struct c
 	return kvm_clock_read();
 }
 
-static u64 kvm_sched_clock_read(void)
+static noinstr u64 kvm_sched_clock_read(void)
 {
 	return kvm_clock_read() - kvm_sched_clock_offset;
 }
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -64,7 +64,8 @@ u8 pvclock_read_flags(struct pvclock_vcp
 	return flags & valid_flags;
 }
 
-u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+static __always_inline
+u64 __pvclock_clocksource_read(struct pvclock_vcpu_time_info *src, bool dowd)
 {
 	unsigned version;
 	u64 ret;
@@ -77,7 +78,7 @@ u64 pvclock_clocksource_read(struct pvcl
 		flags = src->flags;
 	} while (pvclock_read_retry(src, version));
 
-	if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
+	if (dowd && unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
 		src->flags &= ~PVCLOCK_GUEST_STOPPED;
 		pvclock_touch_watchdogs();
 	}
@@ -100,15 +101,25 @@ u64 pvclock_clocksource_read(struct pvcl
 	 * updating at the same time, and one of them could be slightly behind,
 	 * making the assumption that last_value always go forward fail to hold.
 	 */
-	last = atomic64_read(&last_value);
+	last = arch_atomic64_read(&last_value);
 	do {
 		if (ret <= last)
 			return last;
-	} while (!atomic64_try_cmpxchg(&last_value, &last, ret));
+	} while (!arch_atomic64_try_cmpxchg(&last_value, &last, ret));
 
 	return ret;
 }
 
+u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+{
+	return __pvclock_clocksource_read(src, true);
+}
+
+noinstr u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src)
+{
+	return __pvclock_clocksource_read(src, false);
+}
+
 void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
 			    struct pvclock_vcpu_time_info *vcpu_time,
 			    struct timespec64 *ts)
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -215,7 +215,7 @@ static void __init cyc2ns_init_secondary
 /*
  * Scheduler clock - returns current time in nanosec units.
  */
-u64 native_sched_clock(void)
+noinstr u64 native_sched_clock(void)
 {
 	if (static_branch_likely(&__use_tsc)) {
 		u64 tsc_now = rdtsc();
@@ -248,7 +248,7 @@ u64 native_sched_clock_from_tsc(u64 tsc)
 /* We need to define a real function for sched_clock, to override the
    weak default version */
 #ifdef CONFIG_PARAVIRT
-unsigned long long sched_clock(void)
+noinstr u64 sched_clock(void)
 {
 	return paravirt_sched_clock();
 }
@@ -258,8 +258,7 @@ bool using_native_sched_clock(void)
 	return static_call_query(pv_sched_clock) == native_sched_clock;
 }
 #else
-unsigned long long
-sched_clock(void) __attribute__((alias("native_sched_clock")));
+u64 sched_clock(void) __attribute__((alias("native_sched_clock")));
 
 bool using_native_sched_clock(void) { return true; }
 #endif
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -60,9 +60,17 @@ static u64 xen_clocksource_get_cycles(st
 	return xen_clocksource_read();
 }
 
-static u64 xen_sched_clock(void)
+static noinstr u64 xen_sched_clock(void)
 {
-	return xen_clocksource_read() - xen_sched_clock_offset;
+        struct pvclock_vcpu_time_info *src;
+	u64 ret;
+
+	preempt_disable_notrace();
+	src = &__this_cpu_read(xen_vcpu)->time;
+	ret = pvclock_clocksource_read_nowd(src);
+	ret -= xen_sched_clock_offset;
+	preempt_enable_notrace();
+	return ret;
 }
 
 static void xen_read_wallclock(struct timespec64 *ts)
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -161,7 +161,7 @@ static inline u64 mul_u32_u32(u32 a, u32
 #if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__)
 
 #ifndef mul_u64_u32_shr
-static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
+static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
 {
 	return (u64)(((unsigned __int128)a * mul) >> shift);
 }
@@ -177,7 +177,7 @@ static inline u64 mul_u64_u64_shr(u64 a,
 #else
 
 #ifndef mul_u64_u32_shr
-static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
+static __always_inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift)
 {
 	u32 ah, al;
 	u64 ret;


_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

  parent reply	other threads:[~2023-01-23 20:58 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-23 20:50 [PATCH 0/6] A few cpuidle vs rcu fixes Peter Zijlstra
2023-01-23 20:50 ` [PATCH 1/6] x86: Always inline arch_atomic64 Peter Zijlstra
2023-01-23 20:50 ` [PATCH 2/6] x86/pvclock: improve atomic update of last_value in pvclock_clocksource_read Peter Zijlstra
2023-01-23 20:50 ` [PATCH 3/6] ftrace/x86: Warn and ignore graph tracing when RCU is disabled Peter Zijlstra
2023-01-23 21:53   ` Steven Rostedt
2023-01-23 22:07     ` Steven Rostedt
2023-01-24 14:44       ` Peter Zijlstra
2023-01-24 17:12         ` Mark Rutland
2023-01-25  9:37           ` Peter Zijlstra
2023-01-25 10:47           ` Peter Zijlstra
2023-01-25 11:32             ` Mark Rutland
     [not found]             ` <20230125184658.GL2948950@paulmck-ThinkPad-P17-Gen-1>
2023-01-26  9:28               ` Peter Zijlstra
2023-01-23 20:50 ` Peter Zijlstra [this message]
2023-01-23 20:50 ` [PATCH 5/6] sched/clock: Make local_clock() noinstr Peter Zijlstra
2023-01-23 20:50 ` [PATCH 6/6] cpuidle: Fix poll_idle() noinstr annotation Peter Zijlstra
2023-01-24 14:24   ` Rafael J. Wysocki
2023-01-24 16:34 ` [PATCH 0/6] A few cpuidle vs rcu fixes Mark Rutland
2023-01-24 17:30   ` Mark Rutland
2023-01-24 18:39     ` Mark Rutland
2023-01-25  9:35       ` Peter Zijlstra
2023-01-25  9:40         ` Peter Zijlstra
2023-01-25 10:23           ` Mark Rutland
2023-01-25  9:31   ` Peter Zijlstra
2023-01-25  9:36     ` Mark Rutland
2023-01-25 15:20 ` Mark Rutland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230123205515.118011340@infradead.org \
    --to=peterz@infradead.org \
    --cc=amakhalov@vmware.com \
    --cc=boqun.feng@gmail.com \
    --cc=boris.ostrovsky@oracle.com \
    --cc=bp@alien8.de \
    --cc=bristot@redhat.com \
    --cc=bsegall@google.com \
    --cc=daniel.lezcano@linaro.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=dietmar.eggemann@arm.com \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=juri.lelli@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mgorman@suse.de \
    --cc=mhiramat@kernel.org \
    --cc=mingo@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=pv-drivers@vmware.com \
    --cc=rafael@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=vincent.guittot@linaro.org \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=vschneid@redhat.com \
    --cc=wanpengli@tencent.com \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).