Re: [Qemu-devel] [PATCH v1 RFC 6/6] KVM: s390: add cpu model support

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Alexander Graf <agraf@suse.de>
To: Michael Mueller <mimu@linux.vnet.ibm.com>
Cc: linux-s390@vger.kernel.org, kvm@vger.kernel.org,
	Gleb Natapov <gleb@kernel.org>,
	qemu-devel@nongnu.org, linux-kernel@vger.kernel.org,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	"Jason J. Herne" <jjherne@linux.vnet.ibm.com>,
	Cornelia Huck <cornelia.huck@de.ibm.com>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Andreas Faerber <afaerber@suse.de>,
	Richard Henderson <rth@twiddle.net>
Subject: Re: [Qemu-devel] [PATCH v1 RFC 6/6] KVM: s390: add cpu model support
Date: Mon, 19 May 2014 13:48:08 +0200	[thread overview]
Message-ID: <5379EF78.9040209@suse.de> (raw)
In-Reply-To: <20140519125339.09840b9e@bee>


On 19.05.14 12:53, Michael Mueller wrote:
> On Fri, 16 May 2014 22:31:12 +0200
> Alexander Graf <agraf@suse.de> wrote:
>
>> On 16.05.14 17:39, Michael Mueller wrote:
>>> On Fri, 16 May 2014 14:08:24 +0200
>>> Alexander Graf <agraf@suse.de> wrote:
>>>
>>>> On 13.05.14 16:58, Michael Mueller wrote:
>>>>> This patch enables cpu model support in kvm/s390 via the vm attribute
>>>>> interface.
>>>>>
>>>>> During KVM initialization, the host properties cpuid, IBC value and the
>>>>> facility list are stored in the architecture specific cpu model structure.
>>>>>
>>>>> During vcpu setup, these properties are taken to initialize the related SIE
>>>>> state. This mechanism allows to adjust the properties from user space and thus
>>>>> to implement different selectable cpu models.
>>>>>
>>>>> This patch uses the IBC functionality to block instructions that have not
>>>>> been implemented at the requested CPU type and GA level compared to the
>>>>> full host capability.
>>>>>
>>>>> Userspace has to initialize the cpu model before vcpu creation. A cpu model
>>>>> change of running vcpus is currently not possible.
>>>> Why is this VM global? It usually fits a lot better modeling wise when
>>>> CPU types are vcpu properties.
>>> It simplifies the code substantially because it inherently guarantees the vcpus being
>>> configured identical. In addition, there is no S390 hardware implementation containing
>>> inhomogeneous processor types. Thus I consider the properties as machine specific.
>>>
>>>>> Signed-off-by: Michael Mueller <mimu@linux.vnet.ibm.com>
>>>>> ---
>>>>>     arch/s390/include/asm/kvm_host.h |   4 +-
>>>>>     arch/s390/include/uapi/asm/kvm.h |  23 ++++++
>>>>>     arch/s390/kvm/kvm-s390.c         | 146 ++++++++++++++++++++++++++++++++++++++-
>>>>>     arch/s390/kvm/kvm-s390.h         |   1 +
>>>>>     4 files changed, 172 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
>>>>> index b4751ba..6b826cb 100644
>>>>> --- a/arch/s390/include/asm/kvm_host.h
>>>>> +++ b/arch/s390/include/asm/kvm_host.h
>>>>> @@ -84,7 +84,8 @@ struct kvm_s390_sie_block {
>>>>>     	atomic_t cpuflags;		/* 0x0000 */
>>>>>     	__u32 : 1;			/* 0x0004 */
>>>>>     	__u32 prefix : 18;
>>>>> -	__u32 : 13;
>>>>> +	__u32 : 1;
>>>>> +	__u32 ibc : 12;
>>>>>     	__u8	reserved08[4];		/* 0x0008 */
>>>>>     #define PROG_IN_SIE (1<<0)
>>>>>     	__u32	prog0c;			/* 0x000c */
>>>>> @@ -418,6 +419,7 @@ struct kvm_s390_cpu_model {
>>>>>     	unsigned long *sie_fac;
>>>>>     	struct cpuid cpu_id;
>>>>>     	unsigned long *fac_list;
>>>>> +	unsigned short ibc;
>>>>>     };
>>>>>     
>>>>>     struct kvm_arch{
>>>>> diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
>>>>> index 313100a..82ef1b5 100644
>>>>> --- a/arch/s390/include/uapi/asm/kvm.h
>>>>> +++ b/arch/s390/include/uapi/asm/kvm.h
>>>>> @@ -58,12 +58,35 @@ struct kvm_s390_io_adapter_req {
>>>>>     
>>>>>     /* kvm attr_group  on vm fd */
>>>>>     #define KVM_S390_VM_MEM_CTRL		0
>>>>> +#define KVM_S390_VM_CPU_MODEL		1
>>>>>     
>>>>>     /* kvm attributes for mem_ctrl */
>>>>>     #define KVM_S390_VM_MEM_ENABLE_CMMA	0
>>>>>     #define KVM_S390_VM_MEM_CLR_CMMA	1
>>>>>     #define KVM_S390_VM_MEM_CLR_PAGES	2
>>>>>     
>>>>> +/* kvm attributes for cpu_model */
>>>>> +
>>>>> +/* the s390 processor related attributes are r/w */
>>>>> +#define KVM_S390_VM_CPU_PROCESSOR	0
>>>>> +struct kvm_s390_vm_cpu_processor {
>>>>> +	__u64 cpuid;
>>>>> +	__u16 ibc;
>>>>> +	__u8  pad[6];
>>>>> +	__u64 fac_list[256];
>>>>> +};
>>>>> +
>>>>> +/* the machine related attributes are read only */
>>>>> +#define KVM_S390_VM_CPU_MACHINE		1
>>>>> +struct kvm_s390_vm_cpu_machine {
>>>>> +	__u64 cpuid;
>>>>> +	__u32 ibc_range;
>>>>> +	__u8  pad[4];
>>>>> +	__u64 fac_mask[256];
>>>>> +	__u64 hard_fac_list[256];
>>>>> +	__u64 soft_fac_list[256];
>>>>> +};
>>>>> +
>>>>>     /* for KVM_GET_REGS and KVM_SET_REGS */
>>>>>     struct kvm_regs {
>>>>>     	/* general purpose regs for s390 */
>>>>> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
>>>>> index a53652f..9965d8b 100644
>>>>> --- a/arch/s390/kvm/kvm-s390.c
>>>>> +++ b/arch/s390/kvm/kvm-s390.c
>>>>> @@ -369,6 +369,110 @@ static int kvm_s390_mem_control(struct kvm *kvm, struct
>>>>> kvm_device_attr *attr) return ret;
>>>>>     }
>>>>>     
>>>>> +static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
>>>>> +{
>>>>> +	struct kvm_s390_vm_cpu_processor *proc;
>>>>> +
>>>>> +	if (atomic_read(&kvm->online_vcpus))
>>>>> +		return -EBUSY;
>>>>> +
>>>>> +	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
>>>>> +	if (!proc)
>>>>> +		return -ENOMEM;
>>>>> +
>>>>> +	if (copy_from_user(proc, (void __user *)attr->addr,
>>>>> +			   sizeof(*proc))) {
>>>>> +		kfree(proc);
>>>>> +		return -EFAULT;
>>>>> +	}
>>>>> +
>>>>> +	mutex_lock(&kvm->lock);
>>>>> +	memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
>>>>> +	       sizeof(struct cpuid));
>>>>> +	kvm->arch.model.ibc = proc->ibc;
>>>>> +	kvm_s390_apply_fac_list_mask((long unsigned *)proc->fac_list);
>>>>> +	memcpy(kvm->arch.model.fac_list, proc->fac_list,
>>>>> +	       S390_ARCH_FAC_LIST_SIZE_BYTE);
>>>>> +	mutex_unlock(&kvm->lock);
>>>>> +	kfree(proc);
>>>>> +
>>>>> +	return 0;
>>>>> +}
>>>>> +
>>>>> +static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
>>>>> +{
>>>>> +	int ret = -ENXIO;
>>>>> +
>>>>> +	switch (attr->attr) {
>>>>> +	case KVM_S390_VM_CPU_PROCESSOR:
>>>>> +		ret = kvm_s390_set_processor(kvm, attr);
>>>>> +		break;
>>>>> +	}
>>>>> +	return ret;
>>>>> +}
>>>>> +
>>>>> +static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
>>>>> +{
>>>>> +	struct kvm_s390_vm_cpu_processor *proc;
>>>>> +	int rc = 0;
>>>>> +
>>>>> +	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
>>>>> +	if (!proc) {
>>>>> +		rc = -ENOMEM;
>>>>> +		goto out;
>>>>> +	}
>>>>> +	memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
>>>>> +	proc->ibc = kvm->arch.model.ibc;
>>>>> +	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
>>>>> +	       S390_ARCH_FAC_LIST_SIZE_BYTE);
>>>>> +	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
>>>>> +		rc = -EFAULT;
>>>>> +	kfree(proc);
>>>>> +out:
>>>>> +	return rc;
>>>>> +}
>>>>> +
>>>>> +static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
>>>>> +{
>>>>> +	struct kvm_s390_vm_cpu_machine *mach;
>>>>> +	int rc = 0;
>>>>> +
>>>>> +	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
>>>>> +	if (!mach) {
>>>>> +		rc = -ENOMEM;
>>>>> +		goto out;
>>>>> +	}
>>>>> +	get_cpu_id((struct cpuid *) &mach->cpuid);
>>>>> +	mach->ibc_range = kvm_s390_lowest_ibc() << 16;
>>>>> +	mach->ibc_range |= kvm_s390_latest_ibc();
>>>>> +	memcpy(&mach->fac_mask, kvm_s390_fac_list_mask,
>>>>> +	       kvm_s390_fac_list_mask_size() * sizeof(u64));
>>>>> +	kvm_s390_get_hard_fac_list((long unsigned int *) &mach->hard_fac_list,
>>>>> +				   S390_ARCH_FAC_LIST_SIZE_U64);
>>>>> +	kvm_s390_get_soft_fac_list((long unsigned int *) &mach->soft_fac_list,
>>>>> +				   S390_ARCH_FAC_LIST_SIZE_U64);
>>>> I really have a hard time grasping what hard and soft means.
>>> Hard facilities are those that are implemented by the CPU itself, either through processor
>>> logic or be means of firmware micro code. That's the list returned by the STFL/STFLE
>>> instruction. In addition to that, one can imagine that in future some of that features are
>>> emulated on KVM side. These will be placed in the soft facility list and are optionally to
>>> request by user space.
>> I don't see why we would have to differentiate between the two. User
>> space wants features enabled. Whether they are done in hardware or in
>> software doesn't matter.
> I've tried to make my point on that in last answer of patch 3/6. It's a mistake
> to think that user space just wants to have features, they come with different
> qualities!

So? If I want to run a z9 compatible guest, I do -cpu z9. I can either

   a) run it with emulation of a facility or
   b) not run it

which one would the user choose?

>
>> So all we need is a list of "features the guest sees available" which is
>> the same as "features user space wants the guest to see" which then gets
>> masked through "features the host can do in hardware".
>>
>> For emulation we can just check on the global feature availability on
>> whether we should emulate them or not.
>>
>>>> Also, if user space wants to make sure that its feature list is actually
>>>> workable on the host kernel, it needs to set and get the features again
>>>> and then compare that with the ones it set? That's different from x86's
>>>> cpuid implementation but probably workable.
>>> User space will probe what facilities are available and match them with the predefined cpu
>>> model set. Only those models which use a partial or full subset of the hard/host facility
>>> list are selectable.
>> Why?
> If a host does not offer the features required for a model it is not able to
> run efficiently.
>
>> Please take a look at how x86 does cpuid masking :).
>>
>> In fact, I'm not 100% convinced that it's a good idea to link cpuid /
>> feature list exposure to the guest and actual feature implementation
>> inside the guest together. On POWER there is a patch set pending that
>> implements these two things separately - admittedly mostly because
>> hardware sucks and we can't change the PVR.
> That is maybe the big difference with s390. The cpuid in the S390 case is not
> directly comparable with the processor version register of POWER.
>
> In the S390 world we have a well defined CPU model room spanned by the machine
> type and its GA count. Thus we can define a bijective mapping between
> (type, ga) <-> (cpuid, ibc, facility set). From type and ga we form the model
> name which BTW is meaningful also for a human user.

Same thing as POWER.

>
> By means of this name, a management interface (libvirt) will draw decisions if
> migration to a remote hypervisor is a good idea or not. For that it just needs
> to compare if the current model of the guest on the source hypervisor
> ("query-cpu-model"), is contained in the supported model list of the target
> hypervisor ("query-cpu-definitions").

I don't think this works, since QEMU should always return all the cpu 
definitions it's aware of on query-cpu-definitions, not just the ones 
that it thinks may be compatible with the host at a random point in time.

Please check with the x86 people to find out how they do this.

>
>>>> I also don't quite grasp what the story behind IBC is. Do you actually
>>>> block instructions? Where do you match instructions that have to get
>>>> blocked with instructions that user space wants to see exposed?
>>>>
>>> Instruction Blocking Control is a feature that was first introduced with the 2097 (IBM System
>>> z10.) The IBC value is part of the SIE state. Just consider it as a kind of parameter, that
>>> allows only instructions that have been implemented up to a certain cpu type and GA level to
>>> become executed, all other op codes will end in an illegal opcode abort. E.g. take the
>>> "Transactional Memory" instructions, they are implemented since type 2827, GA1
>>> (IBM zEnterprise EC12.). The IBC value has 12 bits 8 for the type and 4 for the GA level.
>>> 0x001 means its a z10, GA1. The value 0x021 means it's a 2827 (CMOS generation 12 is 0x02) and
>>> GA1 and so forth. A guest running with IBC value 0x012 (z196 GA2) will not be able to use
>>> TE instructions in contrast to a guest running with IBC value 0x022 given the host supports
>>> it.
>> That sounds very similar to the "compat" cpu property that Alexey is
>> introducing for POWER. Maybe we can model it identically?
> I think it is something different. With "compat" one might be able the express some kind
> of compatibility between two processors of the some different generations, upon which
> the management interface can draw conclusions if migration makes sense or not.
>
> The IBC works totally different. It enforces that the instruction set defined for TYPE-GA.

Yes, which is the same as the PCR register on POWER which the "compat" 
option controls. I think we can simplify s390x because it's not as 
broken as POWER in what we can fake to the guest, but I think you should 
at least be aware of the concepts that are around.


Alex

next prev parent reply	other threads:[~2014-05-19 11:48 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-05-13 14:58 [Qemu-devel] [PATCH v1 RFC 0/6] KVM: s390: cpu model implementation Michael Mueller
2014-05-13 14:58 ` [Qemu-devel] [PATCH v1 RFC 1/6] s390/sclp: add sclp_get_ibc function Michael Mueller
2014-05-13 14:58 ` [Qemu-devel] [PATCH v1 RFC 2/6] KVM: s390: split SIE state guest prefix field Michael Mueller
2014-05-13 14:58 ` [Qemu-devel] [PATCH v1 RFC 3/6] KVM: s390: use facilities and cpu_id per KVM Michael Mueller
2014-05-16 11:55   ` Alexander Graf
2014-05-16 14:46     ` Michael Mueller
2014-05-16 14:49       ` Alexander Graf
2014-05-16 16:09         ` Michael Mueller
2014-05-16 20:35           ` Alexander Graf
2014-05-19 10:13             ` Michael Mueller
2014-05-19 10:41               ` Alexander Graf
2014-05-19 11:29                 ` Michael Mueller
2014-05-19 11:35                   ` Alexander Graf
2014-05-13 14:58 ` [Qemu-devel] [PATCH v1 RFC 4/6] KVM: s390: add ibc api Michael Mueller
2014-05-13 14:58 ` [Qemu-devel] [PATCH v1 RFC 5/6] KVM: s390: initial implementation of soft facilities Michael Mueller
2014-05-13 14:58 ` [Qemu-devel] [PATCH v1 RFC 6/6] KVM: s390: add cpu model support Michael Mueller
2014-05-16 12:08   ` Alexander Graf
2014-05-16 15:39     ` Michael Mueller
2014-05-16 20:31       ` Alexander Graf
2014-05-19 10:53         ` Michael Mueller
2014-05-19 11:48           ` Alexander Graf [this message]
2014-05-19 14:18             ` Michael Mueller
2014-05-19 14:49               ` Alexander Graf
2014-05-19 17:03                 ` Michael Mueller
2014-05-19 20:14                   ` Alexander Graf
2014-05-20 10:02                     ` Michael Mueller
2014-05-20 10:10                       ` Alexander Graf
2014-05-21 12:56                         ` Michael Mueller
2014-05-21 13:22                           ` Alexander Graf
2014-05-22  8:23                             ` Michael Mueller
2014-05-22  8:53                               ` Paolo Bonzini
2014-05-22 12:29                                 ` Michael Mueller
2014-05-22 20:36                                 ` Christian Borntraeger
2014-05-22 22:39                                   ` Alexander Graf
2014-05-16 11:32 ` [Qemu-devel] [PATCH v1 RFC 0/6] KVM: s390: cpu model implementation Christian Borntraeger
2014-05-16 14:49   ` Michael Mueller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5379EF78.9040209@suse.de \
    --to=agraf@suse.de \
    --cc=afaerber@suse.de \
    --cc=borntraeger@de.ibm.com \
    --cc=cornelia.huck@de.ibm.com \
    --cc=gleb@kernel.org \
    --cc=jjherne@linux.vnet.ibm.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mimu@linux.vnet.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).