From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:55529) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1VGmCC-0004gh-Qc for qemu-devel@nongnu.org; Tue, 03 Sep 2013 04:42:50 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1VGmC6-0004B7-Fi for qemu-devel@nongnu.org; Tue, 03 Sep 2013 04:42:44 -0400 Message-ID: <5225A0F6.40506@suse.de> Date: Tue, 03 Sep 2013 10:42:30 +0200 From: =?ISO-8859-15?Q?Andreas_F=E4rber?= MIME-Version: 1.0 References: <1378193502-4968-1-git-send-email-aik@ozlabs.ru> In-Reply-To: <1378193502-4968-1-git-send-email-aik@ozlabs.ru> Content-Type: text/plain; charset=ISO-8859-15 Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [RFC PATCH] spapr: support time base offset migration List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Alexey Kardashevskiy Cc: Alexander Graf , qemu-devel@nongnu.org, qemu-ppc@nongnu.org, Paolo Bonzini , Paul Mackerras , David Gibson Am 03.09.2013 09:31, schrieb Alexey Kardashevskiy: > This allows guests to have a different timebase origin from the host. >=20 > This is needed for migration, where a guest can migrate from one host > to another and the two hosts might have a different timebase origin. > However, the timebase seen by the guest must not go backwards, and > should go forwards only by a small amount corresponding to the time > taken for the migration. >=20 > This is only supported for recent POWER hardware which has the TBU40 > (timebase upper 40 bits) register. That includes POWER6, 7, 8 but not > 970. >=20 > This adds kvm_access_one_reg() to access a special register which is no= t > in env->spr. >=20 > The feature must be present in the host kernel. >=20 > Signed-off-by: Alexey Kardashevskiy > --- >=20 > This is an RFC but not a final patch. Can break something but I just do= not see what. >=20 > --- > hw/ppc/ppc.c | 49 ++++++++++++++++++++++++++++++++++++++++++++= +++++ > include/hw/ppc/ppc.h | 4 ++++ > target-ppc/kvm.c | 23 +++++++++++++++++++++++ > target-ppc/machine.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ > trace-events | 3 +++ > 5 files changed, 123 insertions(+) >=20 > diff --git a/hw/ppc/ppc.c b/hw/ppc/ppc.c > index 1e3cab3..7d08c9a 100644 > --- a/hw/ppc/ppc.c > +++ b/hw/ppc/ppc.c > @@ -31,6 +31,7 @@ > #include "hw/loader.h" > #include "sysemu/kvm.h" > #include "kvm_ppc.h" > +#include "trace.h" > =20 > //#define PPC_DEBUG_IRQ > #define PPC_DEBUG_TB > @@ -796,6 +797,54 @@ static void cpu_ppc_set_tb_clk (void *opaque, uint= 32_t freq) > cpu_ppc_store_purr(cpu, 0x0000000000000000ULL); > } > =20 > +/* > + * Calculate timebase on the destination side of migration > + * > + * We calculate new timebase offset as shown below: > + * 1) Gtb2 =3D Gtb1 + max(tod2 - tod1, 0) > + * Gtb2 =3D tb2 + off2 > + * Gtb1 =3D tb1 + off1 > + * 2) tb2 + off2 =3D tb1 + off1 + max(tod2 - tod1, 0) > + * 3) off2 =3D tb1 - tb2 + off1 + max(tod2 - tod1, 0) > + * > + * where: > + * Gtb2 - destination guest timebase > + * tb2 - destination host timebase > + * off2 - destination timebase offset > + * tod2 - destination time of the day > + * Gtb1 - source guest timebase > + * tb1 - source host timebase > + * off1 - source timebase offset > + * tod1 - source time of the day > + * > + * The result we want is in @off2 > + * > + * Two conditions must be met for @off2: > + * 1) off2 must be multiple of 2^24 ticks as it will be set via TBU40 = SPR > + * 2) Gtb2 >=3D Gtb1 > + */ > +void cpu_ppc_adjust_tb_offset(ppc_tb_t *tb_env) > +{ > + uint64_t tb2, tod2, off2; > + int ratio =3D tb_env->tb_freq / 1000000; > + struct timeval tv; > + > + tb2 =3D cpu_get_real_ticks(); > + gettimeofday(&tv, NULL); > + tod2 =3D tv.tv_sec * 1000000 + tv.tv_usec; > + > + off2 =3D tb_env->timebase - tb2 + tb_env->tb_offset; > + if (tod2 > tb_env->time_of_the_day) { > + off2 +=3D (tod2 - tb_env->time_of_the_day) * ratio; > + } > + off2 =3D ROUND_UP(off2, 1 << 24); > + > + trace_ppc_tb_adjust(tb_env->tb_offset, off2, > + (int64_t)off2 - tb_env->tb_offset); > + > + tb_env->tb_offset =3D off2; > +} > + > /* Set up (once) timebase frequency (in Hz) */ > clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq) > { > diff --git a/include/hw/ppc/ppc.h b/include/hw/ppc/ppc.h > index 132ab97..235871c 100644 > --- a/include/hw/ppc/ppc.h > +++ b/include/hw/ppc/ppc.h > @@ -32,6 +32,9 @@ struct ppc_tb_t { > uint64_t purr_start; > void *opaque; > uint32_t flags; > + /* Cached values for live migration purposes */ > + uint64_t timebase; > + uint64_t time_of_the_day; > }; > =20 > /* PPC Timers flags */ > @@ -46,6 +49,7 @@ struct ppc_tb_t { > */ > =20 > uint64_t cpu_ppc_get_tb(ppc_tb_t *tb_env, uint64_t vmclk, int64_t tb_o= ffset); > +void cpu_ppc_adjust_tb_offset(ppc_tb_t *tb_env); > clk_setup_cb cpu_ppc_tb_init (CPUPPCState *env, uint32_t freq); > /* Embedded PowerPC DCR management */ > typedef uint32_t (*dcr_read_cb)(void *opaque, int dcrn); > diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c > index 7af9e3d..93df955 100644 > --- a/target-ppc/kvm.c > +++ b/target-ppc/kvm.c > @@ -35,6 +35,7 @@ > #include "hw/sysbus.h" > #include "hw/ppc/spapr.h" > #include "hw/ppc/spapr_vio.h" > +#include "hw/ppc/ppc.h" > #include "sysemu/watchdog.h" > =20 > //#define DEBUG_KVM > @@ -761,6 +762,22 @@ static int kvm_put_vpa(CPUState *cs) > } > #endif /* TARGET_PPC64 */ > =20 > +static int kvm_access_one_reg(CPUState *cs, bool set, __u64 id, void *= addr) > +{ > + struct kvm_one_reg reg =3D { > + .id =3D id, > + .addr =3D (uintptr_t)addr, > + }; > + int ret =3D kvm_vcpu_ioctl(cs, set ? KVM_SET_ONE_REG : KVM_GET_ONE= _REG, ®); > + > + if (ret) { > + DPRINTF("Unable to %s time base offset to KVM: %s\n", > + set ? "set" : "get", strerror(errno)); > + } > + > + return ret; > +} > + > int kvm_arch_put_registers(CPUState *cs, int level) > { > PowerPCCPU *cpu =3D POWERPC_CPU(cs); > @@ -873,6 +890,9 @@ int kvm_arch_put_registers(CPUState *cs, int level) > DPRINTF("Warning: Unable to set VPA information to KVM= \n"); > } > } > + > + kvm_access_one_reg(cs, 1, KVM_REG_PPC_TB_OFFSET, > + &env->tb_env->tb_offset); > #endif /* TARGET_PPC64 */ > } > =20 > @@ -1082,6 +1102,9 @@ int kvm_arch_get_registers(CPUState *cs) > DPRINTF("Warning: Unable to get VPA information from K= VM\n"); > } > } > + > + kvm_access_one_reg(cs, 0, KVM_REG_PPC_TB_OFFSET, > + &env->tb_env->tb_offset); > #endif > } > =20 > diff --git a/target-ppc/machine.c b/target-ppc/machine.c > index 12e1512..d1ffc7f 100644 > --- a/target-ppc/machine.c > +++ b/target-ppc/machine.c > @@ -1,5 +1,6 @@ > #include "hw/hw.h" > #include "hw/boards.h" > +#include "hw/ppc/ppc.h" > #include "sysemu/kvm.h" > #include "helper_regs.h" > =20 > @@ -459,6 +460,45 @@ static const VMStateDescription vmstate_tlbmas =3D= { > } > }; > =20 > +static void timebase_pre_save(void *opaque) > +{ > + ppc_tb_t *tb_env =3D opaque; > + struct timeval tv; > + > + gettimeofday(&tv, NULL); > + tb_env->time_of_the_day =3D tv.tv_sec * 1000000 + tv.tv_usec; > + tb_env->timebase =3D cpu_get_real_ticks(); > +} > + > +static int timebase_post_load(void *opaque, int version_id) > +{ > + ppc_tb_t *tb_env =3D opaque; > + > + if (!tb_env) { > + printf("NO TB!\n"); > + return -1; > + } > + cpu_ppc_adjust_tb_offset(tb_env); > + > + return 0; > +} > + > +static const VMStateDescription vmstate_timebase =3D { > + .name =3D "cpu/timebase", > + .version_id =3D 1, > + .minimum_version_id =3D 1, > + .minimum_version_id_old =3D 1, > + .pre_save =3D timebase_pre_save, > + .post_load =3D timebase_post_load, > + .fields =3D (VMStateField []) { > + VMSTATE_UINT64(timebase, ppc_tb_t), > + VMSTATE_INT64(tb_offset, ppc_tb_t), > + VMSTATE_UINT64(time_of_the_day, ppc_tb_t), > + VMSTATE_UINT32_EQUAL(tb_freq, ppc_tb_t), > + VMSTATE_END_OF_LIST() > + }, > +}; > + > const VMStateDescription vmstate_ppc_cpu =3D { > .name =3D "cpu", > .version_id =3D 5, > @@ -498,6 +538,10 @@ const VMStateDescription vmstate_ppc_cpu =3D { > VMSTATE_UINT64_EQUAL(env.insns_flags, PowerPCCPU), > VMSTATE_UINT64_EQUAL(env.insns_flags2, PowerPCCPU), > VMSTATE_UINT32_EQUAL(env.nb_BATs, PowerPCCPU), > + > + /* Time offset */ > + VMSTATE_STRUCT_POINTER(env.tb_env, PowerPCCPU, > + vmstate_timebase, ppc_tb_t *), > VMSTATE_END_OF_LIST() > }, > .subsections =3D (VMStateSubsection []) { Breaks the migration format. ;) You need to bump version_id and use a macro that accepts the version the field was added in as argument. Andreas > diff --git a/trace-events b/trace-events > index 935b953..24cf4d2 100644 > --- a/trace-events > +++ b/trace-events > @@ -1141,6 +1141,9 @@ spapr_iommu_stuff(uint64_t liobn, uint64_t ioba, = uint64_t tce, uint64_t ret) "li > spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigne= d perm, unsigned pgsize) "liobn=3D%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" pe= rm=3D%u mask=3D%x" > spapr_iommu_new_table(uint64_t liobn, void *tcet, void *table, int fd)= "liobn=3D%"PRIx64" tcet=3D%p table=3D%p fd=3D%d" > =20 > +# hw/ppc/ppc.c > +ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff) "adjusted = from %"PRIx64" to %"PRIx64", diff %"PRId64 > + > # util/hbitmap.c > hbitmap_iter_skip_words(const void *hb, void *hbi, uint64_t pos, unsig= ned long cur) "hb %p hbi %p pos %"PRId64" cur 0x%lx" > hbitmap_reset(void *hb, uint64_t start, uint64_t count, uint64_t sbit,= uint64_t ebit) "hb %p items %"PRIu64",%"PRIu64" bits %"PRIu64"..%"PRIu64 >=20 --=20 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 N=FCrnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imend=F6rffer; HRB 16746 AG N=FCrnbe= rg