From mboxrd@z Thu Jan 1 00:00:00 1970 From: Wei Huang Subject: Re: [PATCH V1 3/4] KVM: x86/vPMU: Implement AMD PMU support for KVM Date: Tue, 04 Nov 2014 12:20:24 -0600 Message-ID: <545918E8.20400@redhat.com> References: <1414771534-29411-1-git-send-email-wei@redhat.com> <1414771534-29411-4-git-send-email-wei@redhat.com> <20141103181719.GA2909@potion.brq.redhat.com> Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: QUOTED-PRINTABLE Cc: kvm@vger.kernel.org, pbonzini@redhat.com, gleb@kernel.org To: =?UTF-8?B?UmFkaW0gS3LEjW3DocWZ?= Return-path: Received: from mx1.redhat.com ([209.132.183.28]:32953 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751017AbaKDSUa (ORCPT ); Tue, 4 Nov 2014 13:20:30 -0500 In-Reply-To: <20141103181719.GA2909@potion.brq.redhat.com> Sender: kvm-owner@vger.kernel.org List-ID: On 11/03/2014 12:17 PM, Radim Kr=C4=8Dm=C3=A1=C5=99 wrote: > 2014-10-31 12:05-0400, Wei Huang: >> This patch implemented vPMU for AMD platform. The design piggybacks >> on the existing Intel structs (kvm_pmu and kvm_pmc), but only uses >> the parts of generic counters. The kvm_pmu_ops interface is also >> initialized in this patch. >> >> Signed-off-by: Wei Huang >> --- >> arch/x86/kvm/pmu_amd.c | 332 ++++++++++++++++++++++++++++++++++++++= ++++++++--- >> 1 file changed, 318 insertions(+), 14 deletions(-) >> >> diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c >> index 6d6e1c3..19cfe26 100644 >> --- a/arch/x86/kvm/pmu_amd.c >> +++ b/arch/x86/kvm/pmu_amd.c >> @@ -16,58 +16,362 @@ >> #include >> #include >> #include >> -#include >> #include "x86.h" >> #include "cpuid.h" >> #include "lapic.h" >> =20 >> -void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu) >> +/* duplicated from amd_perfmon_event_map, K7 and above should work = */ >> +static struct kvm_event_hw_type_mapping amd_event_mapping[] =3D { >> + [0] =3D { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, >> + [1] =3D { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, >> + [2] =3D { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES }, >> + [3] =3D { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES }, >=20 >> + [4] =3D { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, >> + [5] =3D { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, >=20 > amd_perfmon_event_map has 0xc2 and 0xc3. Sorry, will fix. >=20 >> + [6] =3D { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, >> + [7] =3D { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, >> +}; >> + >> +static inline bool pmc_is_gp(struct kvm_pmc *pmc) >> { >> + return pmc->type =3D=3D KVM_PMC_GP; >> } >=20 > What is the benefit of having the same implementation in both files? >=20 No benefits. In fact I can remove it from AMD code because it is always true for AMD. Will fix. > I'd prefer to have it in pmu.h and I'll try to note all functions tha= t > look identical. As always, you can ignore anything in parentheses, > there are only few real issues with this patch. >=20 >> =20 >> -int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc) >> +static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 m= sr, >> + u32 base) >=20 > (pmu.h) I will try to merge common ones into pmu.h in next re-spin. This applie= s to the rest (pmu.h and pmu.c) in your replies. >=20 >> { >> - return 1; >> + if (msr >=3D base && msr < base + pmu->nr_arch_gp_counters) >> + return &pmu->gp_counters[msr - base]; >> + >> + return NULL; >> } >> =20 >> -int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data= ) >> +static inline u64 pmc_bitmask(struct kvm_pmc *pmc) >=20 > (pmu.h) >=20 >> { >> - return 1; >> + struct kvm_pmu *pmu =3D &pmc->vcpu->arch.pmu; >> + >> + return pmu->counter_bitmask[pmc->type]; >> } >> =20 >> -int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_inf= o) >> +static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int i= dx) >> { >> - return 1; >> + return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + idx, MSR_K7_EVNTSEL0); >> } >> =20 >> -int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) >> +void amd_deliver_pmi(struct kvm_vcpu *vcpu) >=20 > static. >=20 > (pmu.c, rename to deliver_pmi and reuse for intel.) >=20 Same here. I will try to merge them as much as I can. >> { >> - return 1; >> + if (vcpu->arch.apic) >> + kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); >> } >> =20 >> -bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr) >> +static void trigger_pmi(struct irq_work *irq_work) >=20 > (pmu.c) >=20 >> { >> - return 0; >> + struct kvm_pmu *pmu =3D container_of(irq_work, struct kvm_pmu, >> + irq_work); >> + struct kvm_vcpu *vcpu =3D container_of(pmu, struct kvm_vcpu, >> + arch.pmu); >> + >> + amd_deliver_pmi(vcpu); >> } >> =20 >> -void amd_deliver_pmi(struct kvm_vcpu *vcpu) >> +static u64 read_pmc(struct kvm_pmc *pmc) >=20 > (pmu.c) >=20 >> +{ >> + u64 counter, enabled, running, result, delta, prev; >> + >> + counter =3D pmc->counter; >> + prev =3D pmc->counter; >> + >> + if (pmc->perf_event) { >> + delta =3D perf_event_read_value(pmc->perf_event, >> + &enabled, &running); >> + counter +=3D delta; >> + } >> + >> + result =3D counter & pmc_bitmask(pmc); >> + >> + return result; >> +} >=20 > No. The one intel uses is better. > (This one looks like a relic from experimenting.) Yes, I inserted some debug message using "result" variable. Will fix. >=20 >> + >> +static void stop_counter(struct kvm_pmc *pmc) >=20 > (pmu.c) >=20 >> +{ >> + if (pmc->perf_event) { >> + pmc->counter =3D read_pmc(pmc); >> + perf_event_release_kernel(pmc->perf_event); >> + pmc->perf_event =3D NULL; >> + } >> +} >> + >> +static unsigned find_hw_type_event(struct kvm_pmu *pmu, u8 event_se= lect, >> + u8 unit_mask) >> +{ >=20 > (Intel calls this find_arch_event.) I don't quite agree with existing naming, find_arch_event, in pmu_intel.c. This function isn't directly related to arch event. It is for mapping from perf_counters to linux_perf_event. But I only changed the version pmu_amd.c though. >=20 >> + int i; >> + >> + for (i =3D 0; i < ARRAY_SIZE(amd_event_mapping); i++) >> + if (amd_event_mapping[i].eventsel =3D=3D event_select >> + && amd_event_mapping[i].unit_mask =3D=3D unit_mask) >=20 > (And it could be less extensible, but shorter: > return amd_event_mapping[i].event_type; > } > return PERF_COUNT_HW_MAX; > } > ) Will fix >=20 >> + break; >> + >> + if (i =3D=3D ARRAY_SIZE(amd_event_mapping)) >> + return PERF_COUNT_HW_MAX; >> + >> + return amd_event_mapping[i].event_type; >> +} >> + >> +static void kvm_perf_overflow_intr(struct perf_event *perf_event, >> + struct perf_sample_data *data, struct pt_regs *regs) >> +{ >=20 > (pmu.c) >=20 >> + struct kvm_pmc *pmc =3D perf_event->overflow_handler_context; >> + struct kvm_pmu *pmu =3D &pmc->vcpu->arch.pmu; >> + >> + if (!test_and_set_bit(pmc->idx, >> + (unsigned long *)&pmu->reprogram_pmi)) { >=20 > (The useless set_bit for intel is not that bad to keep them separate; > please mind my radicality when it comes to abstracting.) >=20 >> + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); >> + >> + if (!kvm_is_in_guest()) >> + irq_work_queue(&pmc->vcpu->arch.pmu.irq_work); >> + else >> + kvm_make_request(KVM_REQ_PMI, pmc->vcpu); >> + } >> +} >> + >> +static void kvm_perf_overflow(struct perf_event *perf_event, >> + struct perf_sample_data *data, >> + struct pt_regs *regs) >> +{ >=20 > (pmu.c, ditto.) >=20 >> + struct kvm_pmc *pmc =3D perf_event->overflow_handler_context; >> + struct kvm_pmu *pmu =3D &pmc->vcpu->arch.pmu; >> + >> + if (!test_and_set_bit(pmc->idx, >> + (unsigned long *)&pmu->reprogram_pmi)) { >> + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); >> + } >> +} >> + >> +static void reprogram_counter(struct kvm_pmc *pmc, u32 type, >> + unsigned config, bool exclude_user, bool exclude_kernel, >> + bool intr) >=20 > (Could be merged in my world, I'll comment on previous patch.) Will try. >=20 >> +{ >> + struct perf_event *event; >> + struct perf_event_attr attr =3D { >> + .type =3D type, >> + .size =3D sizeof(attr), >> + .pinned =3D true, >> + .exclude_idle =3D true, >> + .exclude_host =3D 1, >> + .exclude_user =3D exclude_user, >> + .exclude_kernel =3D exclude_kernel, >> + .config =3D config, >> + }; >> + >> + attr.sample_period =3D (-pmc->counter) & pmc_bitmask(pmc); >> + >> + event =3D perf_event_create_kernel_counter(&attr, -1, current, >> + intr?kvm_perf_overflow_intr : >> + kvm_perf_overflow, pmc); >> + if (IS_ERR(event)) { >> + printk_once("kvm: pmu event creation failed %ld\n", >> + PTR_ERR(event)); >> + return; >> + } >> + >> + pmc->perf_event =3D event; >> + clear_bit(pmc->idx, (unsigned long *)&pmc->vcpu->arch.pmu.reprogra= m_pmi); >> +} >> + >> + >> +static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) >> +{ >=20 > (Ditto.) >=20 >> + unsigned config, type =3D PERF_TYPE_RAW; >> + u8 event_select, unit_mask; >> + >> + if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) >> + printk_once("kvm pmu: pin control bit is ignored\n"); >> + >> + pmc->eventsel =3D eventsel; >> + >> + stop_counter(pmc); >> + >> + if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE)) >> + return; >> + >> + event_select =3D eventsel & ARCH_PERFMON_EVENTSEL_EVENT; >> + unit_mask =3D (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; >> + >> + if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | >> + ARCH_PERFMON_EVENTSEL_INV | >> + ARCH_PERFMON_EVENTSEL_CMASK | >> + HSW_IN_TX | >> + HSW_IN_TX_CHECKPOINTED))) { >> + config =3D find_hw_type_event(&pmc->vcpu->arch.pmu, event_select, >> + unit_mask); >> + if (config !=3D PERF_COUNT_HW_MAX) >> + type =3D PERF_TYPE_HARDWARE; >> + } >> + >> + if (type =3D=3D PERF_TYPE_RAW) >> + config =3D eventsel & X86_RAW_EVENT_MASK; >> + >> + reprogram_counter(pmc, type, config, >> + !(eventsel & ARCH_PERFMON_EVENTSEL_USR), >> + !(eventsel & ARCH_PERFMON_EVENTSEL_OS), >> + eventsel & ARCH_PERFMON_EVENTSEL_INT); >> +} >> + >> +static void reprogram_idx(struct kvm_pmu *pmu, int idx) >> { >> + struct kvm_pmc *pmc =3D global_idx_to_pmc(pmu, idx); >> + >> + if (!pmc || !pmc_is_gp(pmc)) >> + return; >> + reprogram_gp_counter(pmc, pmc->eventsel); >> } >> =20 >> void amd_handle_pmu_event(struct kvm_vcpu *vcpu) >> { >=20 > static. >=20 > (pmu.c) >=20 >> + struct kvm_pmu *pmu =3D &vcpu->arch.pmu; >> + u64 bitmask; >> + int bit; >> + >> + bitmask =3D pmu->reprogram_pmi; >> + >> + for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) = { >> + struct kvm_pmc *pmc =3D global_idx_to_pmc(pmu, bit); >> + >> + if (unlikely(!pmc || !pmc->perf_event)) { >> + clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi); >> + continue; >> + } >> + >> + reprogram_idx(pmu, bit); >> + } >> } >> =20 >> void amd_pmu_reset(struct kvm_vcpu *vcpu) >=20 > static. >=20 >> { >> + struct kvm_pmu *pmu =3D &vcpu->arch.pmu; >> + int i; >> + >> + irq_work_sync(&pmu->irq_work); >> + >> + for (i =3D 0; i < AMD64_NUM_COUNTERS; i++) { >> + struct kvm_pmc *pmc =3D &pmu->gp_counters[i]; >> + >> + stop_counter(pmc); >> + pmc->counter =3D pmc->eventsel =3D 0; >> + } >> +} >> + >> +void amd_pmu_destroy(struct kvm_vcpu *vcpu) >=20 > static. >=20 >> +{ >> + amd_pmu_reset(vcpu); >> +} >> + >> +int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc) >=20 > static. >=20 >> +{ >> + struct kvm_pmu *pmu =3D &vcpu->arch.pmu; >> + >> + pmc &=3D ~(3u << 30); >=20 > (-1u >> 2.) >=20 >> + return (pmc >=3D pmu->nr_arch_gp_counters); >> +} >> + >> +int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data= ) >=20 > static. >=20 >> +{ >> + struct kvm_pmu *pmu =3D &vcpu->arch.pmu; >> + struct kvm_pmc *counters; >> + u64 ctr; >> + >> + pmc &=3D ~(3u << 30); >> + if (pmc >=3D pmu->nr_arch_gp_counters) >> + return 1; >> + >> + counters =3D pmu->gp_counters; >> + ctr =3D read_pmc(&counters[pmc]); >> + *data =3D ctr; >> + >> + return 0; >> +} >> + >> +int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_inf= o) >=20 > static. >=20 >> +{ >> + struct kvm_pmu *pmu =3D &vcpu->arch.pmu; >> + struct kvm_pmc *pmc; >> + u32 index =3D msr_info->index; >> + u64 data =3D msr_info->data; >> + >> + if ((pmc =3D get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) { >> + if (!msr_info->host_initiated) >> + data =3D (s64)data; >> + pmc->counter +=3D data - read_pmc(pmc); >> + return 0; >> + } else if ((pmc =3D get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) { >> + if (data =3D=3D pmc->eventsel) >> + return 0; >> + if (!(data & pmu->reserved_bits)) { >> + reprogram_gp_counter(pmc, data); >> + return 0; >> + } >> + } >> + >> + return 1; >> +} >> + >> +int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) >=20 > static. >=20 >> +{ >> + struct kvm_pmu *pmu =3D &vcpu->arch.pmu; >> + struct kvm_pmc *pmc; >> + >> + if ((pmc =3D get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) { >> + *data =3D read_pmc(pmc); >> + return 0; >> + } else if ((pmc =3D get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) { >> + *data =3D pmc->eventsel; >> + return 0; >> + } >> + >> + return 1; >> +} >> + >> +void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu) >=20 > static. >=20 >> +{ >> + struct kvm_pmu *pmu =3D &vcpu->arch.pmu; >> + >> + pmu->nr_arch_gp_counters =3D 0; >> + pmu->nr_arch_fixed_counters =3D 0; >> + pmu->counter_bitmask[KVM_PMC_GP] =3D 0; >> + pmu->version =3D 0; >> + pmu->reserved_bits =3D 0xffffffff00200000ull; >> + >=20 >> + /* configuration */ >> + pmu->nr_arch_gp_counters =3D 4; >=20 > AMD64_NUM_COUNTERS? Yes, will fix. >=20 >> + pmu->nr_arch_fixed_counters =3D 0; >> + pmu->counter_bitmask[KVM_PMC_GP] =3D ((u64)1 << 48) - 1; >=20 > amd_pmu_cpuid_update doesn't depend on cpuid at all. > (I'd keep this function empty.) >=20 >> } >> =20 >> void amd_pmu_init(struct kvm_vcpu *vcpu) >=20 > static. >=20 >> { >> + int i; >> + struct kvm_pmu *pmu =3D &vcpu->arch.pmu; >> + >> + memset(pmu, 0, sizeof(*pmu)); >> + for (i =3D 0; i < AMD64_NUM_COUNTERS ; i++) { >> + pmu->gp_counters[i].type =3D KVM_PMC_GP; >> + pmu->gp_counters[i].vcpu =3D vcpu; >> + pmu->gp_counters[i].idx =3D i; >> + } >> + >> + init_irq_work(&pmu->irq_work, trigger_pmi); >> + amd_pmu_cpuid_update(vcpu); >> } >> =20 >> -void amd_pmu_destroy(struct kvm_vcpu *vcpu) >> +bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr) >=20 > static. >=20 >> { >> + struct kvm_pmu *pmu =3D &vcpu->arch.pmu; >> + int ret; >> + >> + ret =3D get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) || >> + get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); >> + >> + return ret; >> } >> =20 >> struct kvm_pmu_ops amd_pmu_ops =3D { >> --=20 >> 1.8.3.1 >>