From mboxrd@z Thu Jan  1 00:00:00 1970
From: Wei Huang <wei@redhat.com>
Subject: Re: [PATCH V1 3/4] KVM: x86/vPMU: Implement AMD PMU support for KVM
Date: Tue, 04 Nov 2014 12:20:24 -0600
Message-ID: <545918E8.20400@redhat.com>
References: <1414771534-29411-1-git-send-email-wei@redhat.com> <1414771534-29411-4-git-send-email-wei@redhat.com> <20141103181719.GA2909@potion.brq.redhat.com>
Mime-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: QUOTED-PRINTABLE
Cc: kvm@vger.kernel.org, pbonzini@redhat.com, gleb@kernel.org
To: =?UTF-8?B?UmFkaW0gS3LEjW3DocWZ?= <rkrcmar@redhat.com>
Return-path: <kvm-owner@vger.kernel.org>
Received: from mx1.redhat.com ([209.132.183.28]:32953 "EHLO mx1.redhat.com"
	rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP
	id S1751017AbaKDSUa (ORCPT <rfc822;kvm@vger.kernel.org>);
	Tue, 4 Nov 2014 13:20:30 -0500
In-Reply-To: <20141103181719.GA2909@potion.brq.redhat.com>
Sender: kvm-owner@vger.kernel.org
List-ID: <kvm.vger.kernel.org>

On 11/03/2014 12:17 PM, Radim Kr=C4=8Dm=C3=A1=C5=99 wrote:
> 2014-10-31 12:05-0400, Wei Huang:
>> This patch implemented vPMU for AMD platform. The design piggybacks
>> on the existing Intel structs (kvm_pmu and kvm_pmc), but only uses
>> the parts of generic counters. The kvm_pmu_ops interface is also
>> initialized in this patch.
>>
>> Signed-off-by: Wei Huang <wei@redhat.com>
>> ---
>>  arch/x86/kvm/pmu_amd.c | 332 ++++++++++++++++++++++++++++++++++++++=
++++++++---
>>  1 file changed, 318 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c
>> index 6d6e1c3..19cfe26 100644
>> --- a/arch/x86/kvm/pmu_amd.c
>> +++ b/arch/x86/kvm/pmu_amd.c
>> @@ -16,58 +16,362 @@
>>  #include <linux/types.h>
>>  #include <linux/kvm_host.h>
>>  #include <linux/perf_event.h>
>> -#include <asm/perf_event.h>
>>  #include "x86.h"
>>  #include "cpuid.h"
>>  #include "lapic.h"
>> =20
>> -void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu)
>> +/* duplicated from amd_perfmon_event_map, K7 and above should work =
*/
>> +static struct kvm_event_hw_type_mapping amd_event_mapping[] =3D {
>> +	[0] =3D { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES },
>> +	[1] =3D { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS },
>> +	[2] =3D { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES },
>> +	[3] =3D { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES },
>=20
>> +	[4] =3D { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
>> +	[5] =3D { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
>=20
> amd_perfmon_event_map has 0xc2 and 0xc3.
Sorry, will fix.
>=20
>> +	[6] =3D { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
>> +	[7] =3D { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
>> +};
>> +
>> +static inline bool pmc_is_gp(struct kvm_pmc *pmc)
>>  {
>> +	return pmc->type =3D=3D KVM_PMC_GP;
>>  }
>=20
> What is the benefit of having the same implementation in both files?
>=20

No benefits. In fact I can remove it from AMD code because it is always
true for AMD. Will fix.

> I'd prefer to have it in pmu.h and I'll try to note all functions tha=
t
> look identical.  As always, you can ignore anything in parentheses,
> there are only few real issues with this patch.
>=20
>> =20
>> -int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
>> +static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 m=
sr,
>> +					 u32 base)
>=20
> (pmu.h)

I will try to merge common ones into pmu.h in next re-spin. This applie=
s
to the rest (pmu.h and pmu.c) in your replies.

>=20
>>  {
>> -	return 1;
>> +	if (msr >=3D base && msr < base + pmu->nr_arch_gp_counters)
>> +		return &pmu->gp_counters[msr - base];
>> +
>> +	return NULL;
>>  }
>> =20
>> -int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data=
)
>> +static inline u64 pmc_bitmask(struct kvm_pmc *pmc)
>=20
> (pmu.h)
>=20
>>  {
>> -	return 1;
>> +	struct kvm_pmu *pmu =3D &pmc->vcpu->arch.pmu;
>> +
>> +	return pmu->counter_bitmask[pmc->type];
>>  }
>> =20
>> -int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_inf=
o)
>> +static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int i=
dx)
>>  {
>> -	return 1;
>> +	return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + idx, MSR_K7_EVNTSEL0);
>>  }
>> =20
>> -int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
>> +void amd_deliver_pmi(struct kvm_vcpu *vcpu)
>=20
> static.
>=20
> (pmu.c, rename to deliver_pmi and reuse for intel.)
>=20

Same here. I will try to merge them as much as I can.

>>  {
>> -	return 1;
>> +	if (vcpu->arch.apic)
>> +		kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC);
>>  }
>> =20
>> -bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
>> +static void trigger_pmi(struct irq_work *irq_work)
>=20
> (pmu.c)
>=20
>>  {
>> -	return 0;
>> +	struct kvm_pmu *pmu =3D container_of(irq_work, struct kvm_pmu,
>> +			irq_work);
>> +	struct kvm_vcpu *vcpu =3D container_of(pmu, struct kvm_vcpu,
>> +			arch.pmu);
>> +
>> +	amd_deliver_pmi(vcpu);
>>  }
>> =20
>> -void amd_deliver_pmi(struct kvm_vcpu *vcpu)
>> +static u64 read_pmc(struct kvm_pmc *pmc)
>=20
> (pmu.c)
>=20
>> +{
>> +	u64 counter, enabled, running, result, delta, prev;
>> +
>> +	counter =3D pmc->counter;
>> +	prev =3D pmc->counter;
>> +
>> +	if (pmc->perf_event) {
>> +		delta =3D perf_event_read_value(pmc->perf_event,
>> +					      &enabled, &running);
>> +		counter +=3D delta;
>> +	}
>> +
>> +	result =3D counter & pmc_bitmask(pmc);
>> +
>> +	return result;
>> +}
>=20
> No.  The one intel uses is better.
> (This one looks like a relic from experimenting.)

Yes, I inserted some debug message using "result" variable. Will fix.

>=20
>> +
>> +static void stop_counter(struct kvm_pmc *pmc)
>=20
> (pmu.c)
>=20
>> +{
>> +	if (pmc->perf_event) {
>> +		pmc->counter =3D read_pmc(pmc);
>> +		perf_event_release_kernel(pmc->perf_event);
>> +		pmc->perf_event =3D NULL;
>> +	}
>> +}
>> +
>> +static unsigned find_hw_type_event(struct kvm_pmu *pmu, u8 event_se=
lect,
>> +				u8 unit_mask)
>> +{
>=20
> (Intel calls this find_arch_event.)

I don't quite agree with existing naming, find_arch_event, in
pmu_intel.c. This function isn't directly related to arch event. It is
for mapping from perf_counters to linux_perf_event. But I only changed
the version pmu_amd.c though.

>=20
>> +	int i;
>> +
>> +	for (i =3D 0; i < ARRAY_SIZE(amd_event_mapping); i++)
>> +		if (amd_event_mapping[i].eventsel =3D=3D event_select
>> +		    && amd_event_mapping[i].unit_mask =3D=3D unit_mask)
>=20
> (And it could be less extensible, but shorter:
>   			return amd_event_mapping[i].event_type;
>   	}
>   	return PERF_COUNT_HW_MAX;
>   }
> )

Will fix

>=20
>> +			break;
>> +
>> +	if (i =3D=3D ARRAY_SIZE(amd_event_mapping))
>> +		return PERF_COUNT_HW_MAX;
>> +
>> +	return amd_event_mapping[i].event_type;
>> +}
>> +
>> +static void kvm_perf_overflow_intr(struct perf_event *perf_event,
>> +		struct perf_sample_data *data, struct pt_regs *regs)
>> +{
>=20
> (pmu.c)
>=20
>> +	struct kvm_pmc *pmc =3D perf_event->overflow_handler_context;
>> +	struct kvm_pmu *pmu =3D &pmc->vcpu->arch.pmu;
>> +
>> +	if (!test_and_set_bit(pmc->idx,
>> +			      (unsigned long *)&pmu->reprogram_pmi)) {
>=20
> (The useless set_bit for intel is not that bad to keep them separate;
>  please mind my radicality when it comes to abstracting.)
>=20
>> +		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
>> +
>> +		if (!kvm_is_in_guest())
>> +			irq_work_queue(&pmc->vcpu->arch.pmu.irq_work);
>> +		else
>> +			kvm_make_request(KVM_REQ_PMI, pmc->vcpu);
>> +	}
>> +}
>> +
>> +static void kvm_perf_overflow(struct perf_event *perf_event,
>> +			      struct perf_sample_data *data,
>> +			      struct pt_regs *regs)
>> +{
>=20
> (pmu.c, ditto.)
>=20
>> +	struct kvm_pmc *pmc =3D perf_event->overflow_handler_context;
>> +	struct kvm_pmu *pmu =3D &pmc->vcpu->arch.pmu;
>> +
>> +	if (!test_and_set_bit(pmc->idx,
>> +			      (unsigned long *)&pmu->reprogram_pmi)) {
>> +		kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
>> +	}
>> +}
>> +
>> +static void reprogram_counter(struct kvm_pmc *pmc, u32 type,
>> +		unsigned config, bool exclude_user, bool exclude_kernel,
>> +		bool intr)
>=20
> (Could be merged in my world, I'll comment on previous patch.)

Will try.

>=20
>> +{
>> +	struct perf_event *event;
>> +	struct perf_event_attr attr =3D {
>> +		.type =3D type,
>> +		.size =3D sizeof(attr),
>> +		.pinned =3D true,
>> +		.exclude_idle =3D true,
>> +		.exclude_host =3D 1,
>> +		.exclude_user =3D exclude_user,
>> +		.exclude_kernel =3D exclude_kernel,
>> +		.config =3D config,
>> +	};
>> +
>> +	attr.sample_period =3D (-pmc->counter) & pmc_bitmask(pmc);
>> +
>> +	event =3D perf_event_create_kernel_counter(&attr, -1, current,
>> +						 intr?kvm_perf_overflow_intr :
>> +						 kvm_perf_overflow, pmc);
>> +	if (IS_ERR(event)) {
>> +		printk_once("kvm: pmu event creation failed %ld\n",
>> +			    PTR_ERR(event));
>> +		return;
>> +	}
>> +
>> +	pmc->perf_event =3D event;
>> +	clear_bit(pmc->idx, (unsigned long *)&pmc->vcpu->arch.pmu.reprogra=
m_pmi);
>> +}
>> +
>> +
>> +static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
>> +{
>=20
> (Ditto.)
>=20
>> +	unsigned config, type =3D PERF_TYPE_RAW;
>> +	u8 event_select, unit_mask;
>> +
>> +	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
>> +		printk_once("kvm pmu: pin control bit is ignored\n");
>> +
>> +	pmc->eventsel =3D eventsel;
>> +
>> +	stop_counter(pmc);
>> +
>> +	if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE))
>> +		return;
>> +
>> +	event_select =3D eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
>> +	unit_mask =3D (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
>> +
>> +	if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE |
>> +			  ARCH_PERFMON_EVENTSEL_INV |
>> +			  ARCH_PERFMON_EVENTSEL_CMASK |
>> +			  HSW_IN_TX |
>> +			  HSW_IN_TX_CHECKPOINTED))) {
>> +		config =3D find_hw_type_event(&pmc->vcpu->arch.pmu, event_select,
>> +					 unit_mask);
>> +		if (config !=3D PERF_COUNT_HW_MAX)
>> +			type =3D PERF_TYPE_HARDWARE;
>> +	}
>> +
>> +	if (type =3D=3D PERF_TYPE_RAW)
>> +		config =3D eventsel & X86_RAW_EVENT_MASK;
>> +
>> +	reprogram_counter(pmc, type, config,
>> +			  !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
>> +			  !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
>> +			  eventsel & ARCH_PERFMON_EVENTSEL_INT);
>> +}
>> +
>> +static void reprogram_idx(struct kvm_pmu *pmu, int idx)
>>  {
>> +	struct kvm_pmc *pmc =3D global_idx_to_pmc(pmu, idx);
>> +
>> +	if (!pmc || !pmc_is_gp(pmc))
>> +		return;
>> +	reprogram_gp_counter(pmc, pmc->eventsel);
>>  }
>> =20
>>  void amd_handle_pmu_event(struct kvm_vcpu *vcpu)
>>  {
>=20
> static.
>=20
> (pmu.c)
>=20
>> +	struct kvm_pmu *pmu =3D &vcpu->arch.pmu;
>> +	u64 bitmask;
>> +	int bit;
>> +
>> +	bitmask =3D pmu->reprogram_pmi;
>> +
>> +	for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) =
{
>> +		struct kvm_pmc *pmc =3D global_idx_to_pmc(pmu, bit);
>> +
>> +		if (unlikely(!pmc || !pmc->perf_event)) {
>> +			clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi);
>> +			continue;
>> +		}
>> +
>> +		reprogram_idx(pmu, bit);
>> +	}
>>  }
>> =20
>>  void amd_pmu_reset(struct kvm_vcpu *vcpu)
>=20
> static.
>=20
>>  {
>> +	struct kvm_pmu *pmu =3D &vcpu->arch.pmu;
>> +	int i;
>> +
>> +	irq_work_sync(&pmu->irq_work);
>> +
>> +	for (i =3D 0; i < AMD64_NUM_COUNTERS; i++) {
>> +		struct kvm_pmc *pmc =3D &pmu->gp_counters[i];
>> +
>> +		stop_counter(pmc);
>> +		pmc->counter =3D pmc->eventsel =3D 0;
>> +	}
>> +}
>> +
>> +void amd_pmu_destroy(struct kvm_vcpu *vcpu)
>=20
> static.
>=20
>> +{
>> +	amd_pmu_reset(vcpu);
>> +}
>> +
>> +int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc)
>=20
> static.
>=20
>> +{
>> +	struct kvm_pmu *pmu =3D &vcpu->arch.pmu;
>> +
>> +	pmc &=3D ~(3u << 30);
>=20
> (-1u >> 2.)
>=20
>> +	return (pmc >=3D pmu->nr_arch_gp_counters);
>> +}
>> +
>> +int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data=
)
>=20
> static.
>=20
>> +{
>> +	struct kvm_pmu *pmu =3D &vcpu->arch.pmu;
>> +	struct kvm_pmc *counters;
>> +	u64 ctr;
>> +
>> +	pmc &=3D ~(3u << 30);
>> +	if (pmc >=3D pmu->nr_arch_gp_counters)
>> +		return 1;
>> +
>> +	counters =3D pmu->gp_counters;
>> +	ctr =3D read_pmc(&counters[pmc]);
>> +	*data =3D ctr;
>> +
>> +	return 0;
>> +}
>> +
>> +int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_inf=
o)
>=20
> static.
>=20
>> +{
>> +	struct kvm_pmu *pmu =3D &vcpu->arch.pmu;
>> +	struct kvm_pmc *pmc;
>> +	u32 index =3D msr_info->index;
>> +	u64 data =3D msr_info->data;
>> +
>> +	if ((pmc =3D get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) {
>> +		if (!msr_info->host_initiated)
>> +			data =3D (s64)data;
>> +		pmc->counter +=3D data - read_pmc(pmc);
>> +		return 0;
>> +	} else if ((pmc =3D get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) {
>> +		if (data =3D=3D pmc->eventsel)
>> +			return 0;
>> +		if (!(data & pmu->reserved_bits)) {
>> +			reprogram_gp_counter(pmc, data);
>> +			return 0;
>> +		}
>> +	}
>> +
>> +	return 1;
>> +}
>> +
>> +int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data)
>=20
> static.
>=20
>> +{
>> +	struct kvm_pmu *pmu =3D &vcpu->arch.pmu;
>> +	struct kvm_pmc *pmc;
>> +
>> +	if ((pmc =3D get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) {
>> +		*data =3D read_pmc(pmc);
>> +		return 0;
>> +	} else if ((pmc =3D get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) {
>> +		*data =3D pmc->eventsel;
>> +		return 0;
>> +	}
>> +
>> +	return 1;
>> +}
>> +
>> +void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu)
>=20
> static.
>=20
>> +{
>> +	struct kvm_pmu *pmu =3D &vcpu->arch.pmu;
>> +
>> +	pmu->nr_arch_gp_counters =3D 0;
>> +	pmu->nr_arch_fixed_counters =3D 0;
>> +	pmu->counter_bitmask[KVM_PMC_GP] =3D 0;
>> +	pmu->version =3D 0;
>> +	pmu->reserved_bits =3D 0xffffffff00200000ull;
>> +
>=20
>> +	/* configuration */
>> +	pmu->nr_arch_gp_counters =3D 4;
>=20
> AMD64_NUM_COUNTERS?

Yes, will fix.

>=20
>> +	pmu->nr_arch_fixed_counters =3D 0;
>> +	pmu->counter_bitmask[KVM_PMC_GP] =3D ((u64)1 << 48) - 1;
>=20
> amd_pmu_cpuid_update doesn't depend on cpuid at all.
> (I'd keep this function empty.)
>=20
>>  }
>> =20
>>  void amd_pmu_init(struct kvm_vcpu *vcpu)
>=20
> static.
>=20
>>  {
>> +	int i;
>> +	struct kvm_pmu *pmu =3D &vcpu->arch.pmu;
>> +
>> +	memset(pmu, 0, sizeof(*pmu));
>> +	for (i =3D 0; i < AMD64_NUM_COUNTERS ; i++) {
>> +		pmu->gp_counters[i].type =3D KVM_PMC_GP;
>> +		pmu->gp_counters[i].vcpu =3D vcpu;
>> +		pmu->gp_counters[i].idx =3D i;
>> +	}
>> +
>> +	init_irq_work(&pmu->irq_work, trigger_pmi);
>> +	amd_pmu_cpuid_update(vcpu);
>>  }
>> =20
>> -void amd_pmu_destroy(struct kvm_vcpu *vcpu)
>> +bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr)
>=20
> static.
>=20
>>  {
>> +	struct kvm_pmu *pmu =3D &vcpu->arch.pmu;
>> +	int ret;
>> +
>> +	ret =3D get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) ||
>> +		get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0);
>> +
>> +	return ret;
>>  }
>> =20
>>  struct kvm_pmu_ops amd_pmu_ops =3D {
>> --=20
>> 1.8.3.1
>>