From: Like Xu <like.xu@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
Paolo Bonzini <pbonzini@redhat.com>,
kvm@vger.kernel.org
Cc: Sean Christopherson <sean.j.christopherson@intel.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Wanpeng Li <wanpengli@tencent.com>,
Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
Kan Liang <kan.liang@linux.intel.com>,
luwei.kang@intel.com, Thomas Gleixner <tglx@linutronix.de>,
wei.w.wang@intel.com, Tony Luck <tony.luck@intel.com>,
Stephane Eranian <eranian@google.com>,
Mark Gross <mgross@linux.intel.com>,
Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH RFC v2 14/17] KVM: vmx/pmu: Limit pebs_interrupt_threshold in the guest DS area
Date: Mon, 9 Nov 2020 10:12:51 +0800 [thread overview]
Message-ID: <20201109021254.79755-15-like.xu@linux.intel.com> (raw)
In-Reply-To: <20201109021254.79755-1-like.xu@linux.intel.com>
If the host counter X is scheduled to the guest PEBS counter Y,
the guest ds pebs_interrupt_threshold field in guest DS area would
be changed to only ONE record before vm-entry which helps KVM
more easily and accurately handle the cross-mapping emulation
when the PEBS overflow PMI is generated.
In most cases, the guest counters would not be scheduled in a cross-mapped
way which means there is no need to change guest DS
pebs_interrupt_threshold and the applicable_counters fields in the guest
PEBS records are naturally correct. PEBS facility writes multiple PEBS
records into guest DS w/o interception and the performance is good.
AFAIK, we don't expect that changing the pebs_interrupt_threshold value
from the KVM side will break any guest PEBS drivers.
Signed-off-by: Like Xu <like.xu@linux.intel.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/pmu.c | 17 +++-----
arch/x86/kvm/pmu.h | 11 +++++
arch/x86/kvm/vmx/pmu_intel.c | 71 +++++++++++++++++++++++++++++++++
arch/x86/kvm/x86.c | 1 +
5 files changed, 90 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bffb384485da..77b529b8c16a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -456,6 +456,7 @@ struct kvm_pmu {
u64 pebs_data_cfg_mask;
bool counter_cross_mapped;
+ bool need_rewrite_ds_pebs_interrupt_threshold;
/*
* The gate to release perf_events not marked in
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index f87be3c2140e..7c8e3ca5b7ad 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -471,17 +471,6 @@ void kvm_pmu_init(struct kvm_vcpu *vcpu)
kvm_pmu_refresh(vcpu);
}
-static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
-{
- struct kvm_pmu *pmu = pmc_to_pmu(pmc);
-
- if (pmc_is_fixed(pmc))
- return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
- pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
-
- return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
-}
-
/* Release perf_events for vPMCs that have been unused for a full time slice. */
void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
{
@@ -576,4 +565,10 @@ void kvm_pmu_counter_cross_mapped_check(struct kvm_vcpu *vcpu)
break;
}
}
+
+ if (!pmu->counter_cross_mapped)
+ return;
+
+ if (pmu->need_rewrite_ds_pebs_interrupt_threshold)
+ kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index b1e52e33f08c..6cdc9fd03195 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -147,6 +147,17 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
return sample_period;
}
+static inline bool pmc_speculative_in_use(struct kvm_pmc *pmc)
+{
+ struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+
+ if (pmc_is_fixed(pmc))
+ return fixed_ctrl_field(pmu->fixed_ctr_ctrl,
+ pmc->idx - INTEL_PMC_IDX_FIXED) & 0x3;
+
+ return pmc->eventsel & ARCH_PERFMON_EVENTSEL_ENABLE;
+}
+
void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 2917105e584e..346b1104e674 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -211,6 +211,23 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
return pmc;
}
+static void intel_pmu_pebs_setup(struct kvm_pmu *pmu)
+{
+ struct kvm_pmc *pmc = NULL;
+ int bit;
+
+ pmu->need_rewrite_ds_pebs_interrupt_threshold = false;
+
+ for_each_set_bit(bit, (unsigned long *)&pmu->pebs_enable, X86_PMC_IDX_MAX) {
+ pmc = kvm_x86_ops.pmu_ops->pmc_idx_to_pmc(pmu, bit);
+
+ if (pmc && pmc_speculative_in_use(pmc)) {
+ pmu->need_rewrite_ds_pebs_interrupt_threshold = true;
+ break;
+ }
+ }
+}
+
static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -287,6 +304,8 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0;
if (kvm_valid_perf_global_ctrl(pmu, data)) {
global_ctrl_changed(pmu, data);
+ if (pmu->global_ctrl & pmu->pebs_enable)
+ intel_pmu_pebs_setup(pmu);
return 0;
}
break;
@@ -491,12 +510,64 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
pmu->global_ovf_ctrl = 0;
}
+static int rewrite_ds_pebs_interrupt_threshold(struct kvm_vcpu *vcpu)
+{
+ struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ struct debug_store *ds = NULL;
+ u64 new_threshold, offset;
+ gpa_t gpa;
+ int srcu_idx, ret = -ENOMEM;
+
+ ds = kmalloc(sizeof(struct debug_store), GFP_KERNEL);
+ if (!ds)
+ goto out;
+
+ ret = -EFAULT;
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+ gpa = kvm_mmu_gva_to_gpa_system(vcpu, pmu->ds_area, NULL);
+ if (gpa == UNMAPPED_GVA)
+ goto unlock_out;
+
+ if (kvm_read_guest(vcpu->kvm, gpa, ds, sizeof(struct debug_store)))
+ goto unlock_out;
+
+ /* Adding sizeof(struct pebs_basic) offset is enough to generate PMI. */
+ new_threshold = ds->pebs_buffer_base + sizeof(struct pebs_basic);
+ offset = offsetof(struct debug_store, pebs_interrupt_threshold);
+ gpa = kvm_mmu_gva_to_gpa_system(vcpu, pmu->ds_area + offset, NULL);
+ if (gpa == UNMAPPED_GVA)
+ goto unlock_out;
+
+ if (kvm_write_guest(vcpu->kvm, gpa, &new_threshold, sizeof(u64)))
+ goto unlock_out;
+
+ ret = 0;
+
+unlock_out:
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+out:
+ kfree(ds);
+ return ret;
+}
+
void intel_pmu_handle_event(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
+ int ret;
if (!(pmu->global_ctrl & pmu->pebs_enable))
return;
+
+ if (pmu->counter_cross_mapped && pmu->need_rewrite_ds_pebs_interrupt_threshold) {
+ ret = rewrite_ds_pebs_interrupt_threshold(vcpu);
+ pmu->need_rewrite_ds_pebs_interrupt_threshold = false;
+ }
+
+ if (ret == -ENOMEM)
+ pr_debug_ratelimited("%s: Fail to emulate guest PEBS due to OOM.", __func__);
+ else if (ret == -EFAULT)
+ pr_debug_ratelimited("%s: Fail to emulate guest PEBS due to GPA fault.", __func__);
}
struct kvm_pmu_ops intel_pmu_ops = {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 88a544e6379f..8db0811c1dd3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5856,6 +5856,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
{
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, 0, exception);
}
+EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_system);
static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
struct kvm_vcpu *vcpu, u32 access,
--
2.21.3
next prev parent reply other threads:[~2020-11-09 2:18 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-11-09 2:12 [PATCH RFC v2 00/17] KVM: x86/pmu: Add support to enable Guest PEBS via DS Like Xu
2020-11-09 2:12 ` [PATCH v2 01/17] KVM: x86/pmu: Set MSR_IA32_MISC_ENABLE_EMON bit when vPMU is enabled Like Xu
2020-11-09 2:12 ` [PATCH v2 02/17] KVM: vmx/pmu: Use IA32_PERF_CAPABILITIES to adjust features visibility Like Xu
2020-11-09 2:12 ` [PATCH v2 03/17] KVM: x86/pmu: Introduce the ctrl_mask value for fixed counter Like Xu
2020-11-09 2:12 ` [PATCH v2 04/17] perf: x86/ds: Handle guest PEBS overflow PMI and inject it to guest Like Xu
2020-11-17 14:35 ` Peter Zijlstra
2020-11-18 16:15 ` Like Xu
2020-11-18 18:07 ` Peter Zijlstra
2020-11-19 1:36 ` Xu, Like
2020-11-27 2:14 ` Xu, Like
2020-11-30 10:49 ` Peter Zijlstra
2020-12-01 1:25 ` Xu, Like
2020-11-09 2:12 ` [PATCH v2 05/17] KVM: x86/pmu: Reprogram guest PEBS event to emulate guest PEBS counter Like Xu
2020-11-17 14:41 ` Peter Zijlstra
2020-11-18 16:18 ` Like Xu
2020-11-09 2:12 ` [PATCH v2 06/17] KVM: x86/pmu: Add IA32_PEBS_ENABLE MSR emulation for extended PEBS Like Xu
2020-11-09 2:12 ` [PATCH v2 07/17] KVM: x86/pmu: Add IA32_DS_AREA MSR emulation to manage guest DS buffer Like Xu
2020-11-09 2:12 ` [PATCH v2 08/17] KVM: x86/pmu: Add PEBS_DATA_CFG MSR emulation to support adaptive PEBS Like Xu
2020-11-09 2:12 ` [PATCH v2 09/17] KVM: x86: Set PEBS_UNAVAIL in IA32_MISC_ENABLE when PEBS is enabled Like Xu
2020-11-09 2:12 ` [PATCH v2 10/17] KVM: x86/pmu: Expose CPUIDs feature bits PDCM, DS, DTES64 Like Xu
2020-11-09 2:12 ` [PATCH v2 11/17] KVM: x86/pmu: Adjust precise_ip to emulate Ice Lake guest PDIR counter Like Xu
2020-11-09 2:12 ` [PATCH v2 12/17] KVM: x86/pmu: Disable guest PEBS when counters are cross-mapped Like Xu
2020-11-09 2:12 ` [PATCH RFC v2 13/17] KVM: x86/pmu: Add hook to emulate pebs for cross-mapped counters Like Xu
2020-11-19 14:25 ` kernel test robot
2020-11-09 2:12 ` Like Xu [this message]
2020-11-09 2:12 ` [PATCH RFC v2 15/17] KVM: vmx/pmu: Rewrite applicable_counters field in the guest PEBS record Like Xu
2020-11-09 2:12 ` [PATCH RFC v2 16/17] KVM: x86/pmu: Save guest pebs reset value when a pebs counter is configured Like Xu
2020-11-09 2:12 ` [PATCH RFC v2 17/17] KVM: x86/pmu: Adjust guest DS pebs reset counter values for mapped counter Like Xu
2020-11-10 15:12 ` [PATCH RFC v2 00/17] KVM: x86/pmu: Add support to enable Guest PEBS via DS Peter Zijlstra
2020-11-10 15:37 ` [PATCH] perf/intel: Remove Perfmon-v4 counter_freezing support Peter Zijlstra
2020-11-10 20:52 ` Stephane Eranian
2020-11-11 2:42 ` Xu, Like
2021-01-26 9:51 ` Paolo Bonzini
2021-01-26 10:36 ` Peter Zijlstra
2021-01-26 11:35 ` Xu, Like
2021-01-26 11:59 ` Paolo Bonzini
2020-11-11 8:38 ` Peter Zijlstra
2020-11-16 3:22 ` Like Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201109021254.79755-15-like.xu@linux.intel.com \
--to=like.xu@linux.intel.com \
--cc=eranian@google.com \
--cc=jmattson@google.com \
--cc=joro@8bytes.org \
--cc=kan.liang@linux.intel.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=luwei.kang@intel.com \
--cc=mgross@linux.intel.com \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=sean.j.christopherson@intel.com \
--cc=srinivas.pandruvada@linux.intel.com \
--cc=tglx@linutronix.de \
--cc=tony.luck@intel.com \
--cc=vkuznets@redhat.com \
--cc=wanpengli@tencent.com \
--cc=wei.w.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.