linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Like Xu <like.xu.linux@gmail.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>
Cc: Jim Mattson <jmattson@google.com>,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	Wanpeng Li <wanpengli@tencent.com>
Subject: [PATCH v2 7/7] KVM: x86/pmu: Defer counter emulated overflow via pmc->stale_counter
Date: Thu, 21 Jul 2022 18:35:48 +0800	[thread overview]
Message-ID: <20220721103549.49543-8-likexu@tencent.com> (raw)
In-Reply-To: <20220721103549.49543-1-likexu@tencent.com>

From: Like Xu <likexu@tencent.com>

There are contextual restrictions on the functions that can be called
in the *_exit_handlers_fastpath path, for example calling
pmc_reprogram_counter() brings up a host complaint like:

 [*] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580
 [*] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2981888, name: CPU 15/KVM
 [*] preempt_count: 1, expected: 0
 [*] RCU nest depth: 0, expected: 0
 [*] INFO: lockdep is turned off.
 [*] irq event stamp: 0
 [*] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
 [*] hardirqs last disabled at (0): [<ffffffff8121222a>] copy_process+0x146a/0x62d0
 [*] softirqs last  enabled at (0): [<ffffffff81212269>] copy_process+0x14a9/0x62d0
 [*] softirqs last disabled at (0): [<0000000000000000>] 0x0
 [*] Preemption disabled at:
 [*] [<ffffffffc2063fc1>] vcpu_enter_guest+0x1001/0x3dc0 [kvm]
 [*] CPU: 17 PID: 2981888 Comm: CPU 15/KVM Kdump: 5.19.0-rc1-g239111db364c-dirty #2
 [*] Call Trace:
 [*]  <TASK>
 [*]  dump_stack_lvl+0x6c/0x9b
 [*]  __might_resched.cold+0x22e/0x297
 [*]  __mutex_lock+0xc0/0x23b0
 [*]  perf_event_ctx_lock_nested+0x18f/0x340
 [*]  perf_event_pause+0x1a/0x110
 [*]  reprogram_counter+0x2af/0x1490 [kvm]
 [*]  kvm_pmu_trigger_event+0x429/0x950 [kvm]
 [*]  kvm_skip_emulated_instruction+0x48/0x90 [kvm]
 [*]  handle_fastpath_set_msr_irqoff+0x349/0x3b0 [kvm]
 [*]  vmx_vcpu_run+0x268e/0x3b80 [kvm_intel]
 [*]  vcpu_enter_guest+0x1d22/0x3dc0 [kvm]

A new stale_counter field is introduced to keep this part of the semantics
invariant. It records the current counter value and it's used to determine
whether to inject an emulated overflow interrupt in the later
kvm_pmu_handle_event(), given that the internal count value from its
perf_event has not been added to pmc->counter in time, or the guest
will update the value of a running counter directly.

Opportunistically shrink sizeof(struct kvm_pmc) a bit.

Suggested-by: Wanpeng Li <wanpengli@tencent.com>
Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions")
Signed-off-by: Like Xu <likexu@tencent.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++--
 arch/x86/kvm/pmu.c              | 15 ++++++++-------
 arch/x86/kvm/svm/pmu.c          |  2 +-
 arch/x86/kvm/vmx/pmu_intel.c    |  4 ++--
 4 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0295b763bd14..92f397361e3f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -488,7 +488,10 @@ enum pmc_type {
 struct kvm_pmc {
 	enum pmc_type type;
 	u8 idx;
+	bool is_paused;
+	bool intr;
 	u64 counter;
+	u64 stale_counter;
 	u64 eventsel;
 	struct perf_event *perf_event;
 	struct kvm_vcpu *vcpu;
@@ -498,8 +501,6 @@ struct kvm_pmc {
 	 * ctrl value for fixed counters.
 	 */
 	u64 current_config;
-	bool is_paused;
-	bool intr;
 };
 
 #define KVM_PMC_MAX_FIXED	3
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 6940cbeee54d..45d062cb1dd5 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -350,6 +350,12 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 		}
 
 		__reprogram_counter(pmc);
+
+		if (pmc->stale_counter) {
+			if (pmc->counter < pmc->stale_counter)
+				__kvm_perf_overflow(pmc, false);
+			pmc->stale_counter = 0;
+		}
 	}
 
 	/*
@@ -522,14 +528,9 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
 
 static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
 {
-	u64 prev_count;
-
-	prev_count = pmc->counter;
+	pmc->stale_counter = pmc->counter;
 	pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
-
-	__reprogram_counter(pmc);
-	if (pmc->counter < prev_count)
-		__kvm_perf_overflow(pmc, false);
+	reprogram_counter(pmc);
 }
 
 static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index f24613a108c5..e9c66dd659a6 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -290,7 +290,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
 		struct kvm_pmc *pmc = &pmu->gp_counters[i];
 
 		pmc_stop_counter(pmc);
-		pmc->counter = pmc->eventsel = 0;
+		pmc->counter = pmc->stale_counter = pmc->eventsel = 0;
 	}
 }
 
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 97236b6cbe04..e3d9f2876081 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -654,14 +654,14 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
 		pmc = &pmu->gp_counters[i];
 
 		pmc_stop_counter(pmc);
-		pmc->counter = pmc->eventsel = 0;
+		pmc->counter = pmc->stale_counter = pmc->eventsel = 0;
 	}
 
 	for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
 		pmc = &pmu->fixed_counters[i];
 
 		pmc_stop_counter(pmc);
-		pmc->counter = 0;
+		pmc->counter = pmc->stale_counter = 0;
 	}
 
 	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;
-- 
2.37.1


  parent reply	other threads:[~2022-07-21 10:36 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-21 10:35 [PATCH v2 0/7] KVM: x86/pmu: Fix some corner cases including Intel PEBS Like Xu
2022-07-21 10:35 ` [PATCH v2 1/7] perf/x86/core: Update x86_pmu.pebs_capable for ICELAKE_{X,D} Like Xu
2022-08-12  7:52   ` Paolo Bonzini
2022-08-15  9:31     ` Peter Zijlstra
2022-08-15  9:43       ` Like Xu
2022-08-15 11:51         ` Peter Zijlstra
2022-08-15 12:48           ` Like Xu
2022-08-15 13:14             ` Peter Zijlstra
2022-08-15 13:06           ` Liang, Kan
2022-08-15 14:30             ` Peter Zijlstra
2022-08-16 11:57               ` Like Xu
2022-07-21 10:35 ` [PATCH v2 2/7] perf/x86/core: Completely disable guest PEBS via guest's global_ctrl Like Xu
2022-07-21 10:35 ` [PATCH v2 3/7] KVM: x86/pmu: Avoid setting BIT_ULL(-1) to pmu->host_cross_mapped_mask Like Xu
2022-07-21 10:35 ` [PATCH v2 4/7] KVM: x86/pmu: Don't generate PEBS records for emulated instructions Like Xu
2022-07-21 10:35 ` [PATCH v2 5/7] KVM: x86/pmu: Avoid using PEBS perf_events for normal counters Like Xu
2022-07-21 10:35 ` [PATCH v2 6/7] KVM: x86/pmu: Defer reprogram_counter() to kvm_pmu_handle_event() Like Xu
2022-07-21 10:35 ` Like Xu [this message]
2022-07-21 10:35 ` [kvm-unit-tests PATCH] x86: Add tests for Guest Processor Event Based Sampling (PEBS) Like Xu
2022-07-27 22:42   ` Sean Christopherson
2022-07-28 11:31     ` Like Xu
2022-08-02 11:07 ` [PATCH v2 0/7] KVM: x86/pmu: Fix some corner cases including Intel PEBS Like Xu
2022-08-11  7:10   ` Wanpeng Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220721103549.49543-8-likexu@tencent.com \
    --to=like.xu.linux@gmail.com \
    --cc=jmattson@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).