From mboxrd@z Thu Jan 1 00:00:00 1970 From: Christian Ehrhardt Date: Wed, 24 Sep 2008 09:24:23 +0000 Subject: exit timing analysis v1 - comments&discussions welcome Message-Id: <48DA0747.3020004@linux.vnet.ibm.com> MIME-Version: 1 Content-Type: multipart/mixed; boundary="------------070001070505000401020905" List-Id: To: kvm-ppc@vger.kernel.org This is a multi-part message in MIME format. --------------070001070505000401020905 Content-Type: text/plain; charset="iso-8859-1"; format="flowed" Content-Transfer-Encoding: quoted-printable Hi everyone, I wrote a patch that tries to get some data where we spent our time.=20 This should give us some data to think about when we talk about=20 virtualization on non hardware assisted platforms like 440. A good example what this data might help us to discuss is the comparison=20 between paravirtualized and non paravirtualized guest (using the magic=20 page mechanism I posted recently). You see that the pv extension reduces=20 the number of exits for instruction emulation by 50%, but unfortunately=20 that only reduced the time we spend for the emulation by ~20%. This data=20 is for you information and comments/discussions are welcome (I expect=20 that we need to trace the data of the emulation per instr. to identify=20 the slow ones and think about their optimization). The basic approach is to read TB[U&L] when entering the guest and when=20 exiting it. The time from exit->enter is the time we spent to handle the=20 exit and accounted per exit type. The time enter->exit is the time we=20 spent in the guest and accounted too. For details I attached the patch I used to measure the timing. I=20 verified it using with serveral assertions I now removed in the final=20 version like "testing for double accounting of an exit" or "testing if=20 we missed to account an exit". Additionally I verified the exit counts=20 with the data reported by our kvm_stat support and that matches. Also=20 the sum of accounted time matches the real time spent to run the test=20 (Test was booting a ubuntu guest, logon as root, executing a ls, kill=20 guest). I think this pretty much explains why the guest is sometimes feeling=20 slow :-). Handle this data as semi-experimental and let me know what you think. NO PV support unit is usec traced duration 202,244,233 usec count min max =20 sum avg stddev time% EMULINST 6,423,699 12 247,582 91,732,705 =20 14.2804 241.200 45.36 ITLBVIRT 1,777,242 21 264,257 47,116,557 =20 26.5111 286.040 23.30 DTLBVIRT 1,544,241 22 263,947 41,765,447 =20 27.0459 218.997 20.65 TIMEINGUEST 10,000,000 0 3,673 10,861,792 =20 1.0862 4.790 5.37 MMIO 9,318 47 10,686 3,544,934 =20 380.4394 507.292 1.75 EXTINT 3,523 33 4,475 2,557,183 =20 725.8538 619.026 1.26 DEC 16,937 50 4,866 1,441,084 =20 85.0850 203.856 0.71 DTLBREAL 95,497 13 105 1,349,255 =20 14.1288 2.811 0.67 ITLBREAL 80,622 13 146 1,079,052 =20 13.3841 2.870 0.53 DSI 22,498 11 99 249,554 =20 11.0923 1.975 0.12 DCR 4,854 41 3,758 232,657 =20 47.9310 55.802 0.12 ISI 11,167 11 67 124,210 =20 11.1230 2.242 0.06 SYSCALL 9,459 11 100 113,972 =20 12.0491 2.763 0.06 SIGNAL 635 66 6,025 72,438 =20 114.0756 335.235 0.04 FP_UNAVAIL 308 11 12 3,393 =20 11.0162 0.126 0.00 *never hit USR_PR_INST, HCALL, HALT, DTLBPV, DEBUG With PV Support unit is usec traced duration 136,640,181 usec count min max =20 sum avg stddev time% EMULINST 3,160,786 12 247,587 50,808,972 =20 16.0748 205.445 37.18 ITLBVIRT 1,333,725 21 264,336 35,094,300 =20 26.3130 235.204 25.68 DTLBVIRT 1,212,505 22 16,753 32,382,302 =20 26.7069 51.475 23.70 TIMEINGUEST 6,000,000 0 3,846 7259,180 =20 1.2099 9.317 5.31 MMIO 9,955 47 12,289 3570,654 =20 358.6795 539.759 2.61 EXTINT 2,380 31 5,285 1791,705 =20 752.8172 665.021 1.31 ITLBREAL 80,690 13 247,582 1320,621 =20 16.3666 871.535 0.97 DTLBREAL 93,213 13 108 1315,333 =20 14.1110 2.799 0.96 DTLBPV 46,941 20 3,696 1100,399 =20 23.4422 50.127 0.81 DEC 10,376 50 3,672 928,878 =20 89.5218 217.384 0.68 SIGNAL 658 66 264,544 339,911 =20 516.5821 10303.957 0.25 DSI 21,799 11 87 242,343 =20 11.1172 2.227 0.18 DCR 5,042 41 258 233,095 =20 46.2307 6.465 0.17 ISI 11,582 11 66 128,625 =20 11.1056 2.133 0.09 SYSCALL 10,052 11 59 120,597 =20 11.9973 2.415 0.09 FP_UNAVAIL 295 11 12 3,247 =20 11.0068 0.082 0.00 HCALL 1 19 19 19 =20 19.0000 0.000 0.00 *never hit USR_PR_INST, HALT, DEBUG --=20 Gr=FCsse / regards,=20 Christian Ehrhardt IBM Linux Technology Center, Open Virtualization --------------070001070505000401020905 Content-Type: text/x-diff; name="kvmppc-exit-timing.diff" Content-Transfer-Encoding: 8bit Content-Disposition: inline; filename="kvmppc-exit-timing.diff" diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -346,6 +346,11 @@ DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); + + DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, arch.timing_exit.tv32.tbu)); + DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, arch.timing_exit.tv32.tbl)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, arch.timing_last_enter.tv32.tbu)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, arch.timing_last_enter.tv32.tbl)); #endif return 0; diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c --- a/arch/powerpc/kvm/booke_guest.c +++ b/arch/powerpc/kvm/booke_guest.c @@ -56,6 +56,14 @@ { "halt_wakeup", VCPU_STAT(halt_wakeup) }, { NULL } }; + +u32 last_exit_type; +u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES]; +u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES]; +u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES]; +u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; +u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; +u64 timing_last_exit; static const u32 interrupt_msr_mask[16] = { [BOOKE_INTERRUPT_CRITICAL] = MSR_ME, @@ -261,6 +269,9 @@ enum emulation_result er; int r = RESUME_HOST; + /* update before a new last_exit_type is written */ + update_exit_timing(&(vcpu->arch)); + local_irq_enable(); run->exit_reason = KVM_EXIT_UNKNOWN; @@ -285,10 +296,13 @@ * misses before ceding control. */ if (need_resched()) cond_resched(); - if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) + if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) { vcpu->stat.dec_exits++; - else + set_last_exit_type(DEC_EXITS); + } else { vcpu->stat.ext_intr_exits++; + set_last_exit_type(EXT_INTR_EXITS); + } r = RESUME_GUEST; break; @@ -299,6 +313,7 @@ vcpu->arch.esr = vcpu->arch.fault_esr; kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); r = RESUME_GUEST; + set_last_exit_type(USR_PR_INST); break; } @@ -308,10 +323,12 @@ /* Future optimization: only reload non-volatiles if * they were actually modified by emulation. */ vcpu->stat.emulated_inst_exits++; + set_last_exit_type(EMULATED_INST_EXITS); r = RESUME_GUEST_NV; break; case EMULATE_DO_DCR: run->exit_reason = KVM_EXIT_DCR; + set_last_exit_type(DCR_EXITS); r = RESUME_HOST; break; case EMULATE_FAIL: @@ -331,6 +348,7 @@ case BOOKE_INTERRUPT_FP_UNAVAIL: kvmppc_queue_exception(vcpu, exit_nr); + set_last_exit_type(FP_UNAVAIL); r = RESUME_GUEST; break; @@ -339,6 +357,7 @@ vcpu->arch.esr = vcpu->arch.fault_esr; kvmppc_queue_exception(vcpu, exit_nr); vcpu->stat.dsi_exits++; + set_last_exit_type(DSI_EXITS); r = RESUME_GUEST; break; @@ -346,6 +365,7 @@ vcpu->arch.esr = vcpu->arch.fault_esr; kvmppc_queue_exception(vcpu, exit_nr); vcpu->stat.isi_exits++; + set_last_exit_type(ISI_EXITS); r = RESUME_GUEST; break; @@ -353,9 +373,11 @@ if (vcpu->arch.last_inst == KVM_HYPERCALL_BIN) { kvmppc_do_hypercall(vcpu); vcpu->stat.hcall_exits++; + set_last_exit_type(HCALL_EXITS); } else { kvmppc_queue_exception(vcpu, exit_nr); vcpu->stat.syscall_exits++; + set_last_exit_type(SYSCALL_EXITS); } r = RESUME_GUEST; break; @@ -370,6 +392,7 @@ vcpu->arch.pvmem_gpaddr >> KVM_PPCPV_MAGIC_PAGE_SHIFT, 0, KVM_PPCPV_MAGIC_PAGE_FLAGS); vcpu->stat.dtlb_pvmem_miss_exits++; + set_last_exit_type(DTLB_PVMEM_MISS_EXITS); r = RESUME_GUEST; break; } @@ -382,6 +405,7 @@ vcpu->arch.dear = vcpu->arch.fault_dear; vcpu->arch.esr = vcpu->arch.fault_esr; vcpu->stat.dtlb_real_miss_exits++; + set_last_exit_type(DTLB_REAL_MISS_EXITS); r = RESUME_GUEST; break; } @@ -399,11 +423,13 @@ kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, gtlbe->word2); vcpu->stat.dtlb_virt_miss_exits++; + set_last_exit_type(DTLB_VIRT_MISS_EXITS); r = RESUME_GUEST; } else { /* Guest has mapped and accessed a page which is not * actually RAM. */ r = kvmppc_emulate_mmio(run, vcpu); + set_last_exit_type(MMIO_EXITS); } break; @@ -422,10 +448,12 @@ /* The guest didn't have a mapping for it. */ kvmppc_queue_exception(vcpu, exit_nr); vcpu->stat.itlb_real_miss_exits++; + set_last_exit_type(ITLB_REAL_MISS_EXITS); break; } vcpu->stat.itlb_virt_miss_exits++; + set_last_exit_type(ITLB_VIRT_MISS_EXITS); gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; @@ -458,6 +486,7 @@ mtspr(SPRN_DBSR, dbsr); run->exit_reason = KVM_EXIT_DEBUG; + set_last_exit_type(DEBUG_EXITS); r = RESUME_HOST; break; } @@ -482,6 +511,7 @@ r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); vcpu->stat.signal_exits++; + set_last_exit_type(SIGNAL_EXITS); } else { vcpu->stat.light_exits++; } @@ -495,6 +525,7 @@ break; case KVM_EXIT_INTR: vcpu->stat.signal_exits++; + set_last_exit_type(SIGNAL_EXITS); break; } } @@ -535,6 +566,8 @@ * real timebase frequency. Accordingly, it must see the state of * CCR1[TCS]. */ vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + + init_exit_timing(&(vcpu->arch)); return 0; } diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -109,6 +109,16 @@ li r6, 1 slw r6, r6, r5 + + /* save exit time */ +..exit_tbu_overflow_loop: + mfspr r7, SPRN_TBRU + mfspr r8, SPRN_TBRL + mfspr r9, SPRN_TBRU + cmpw r9, r7 + bne ..exit_tbu_overflow_loop + stw r8, VCPU_TIMING_EXIT_TBL(r4) + stw r9, VCPU_TIMING_EXIT_TBU(r4) /* Save the faulting instruction and all GPRs for emulation. */ andi. r7, r6, NEED_INST_MASK @@ -415,6 +425,17 @@ lwz r3, VCPU_SPRG7(r4) mtspr SPRN_SPRG7, r3 + /* save enter time */ + +..enter_tbu_overflow_loop: + mfspr r6, SPRN_TBRU + mfspr r7, SPRN_TBRL + mfspr r8, SPRN_TBRU + cmpw r8, r6 + bne ..enter_tbu_overflow_loop + stw r7, VCPU_TIMING_LAST_ENTER_TBL(r4) + stw r8, VCPU_TIMING_LAST_ENTER_TBU(r4) + /* Finish loading guest volatiles and jump to guest. */ lwz r3, VCPU_CTR(r4) mtctr r3 diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h --- a/include/asm-powerpc/kvm_host.h +++ b/include/asm-powerpc/kvm_host.h @@ -25,6 +25,8 @@ #include #include #include +#include +#include #define KVM_MAX_VCPUS 1 #define KVM_MEMORY_SLOTS 32 @@ -66,6 +68,53 @@ u32 halt_wakeup; }; +enum kvm_exit_types { + MMIO_EXITS, + DCR_EXITS, + SIGNAL_EXITS, + ITLB_REAL_MISS_EXITS, + ITLB_VIRT_MISS_EXITS, + DTLB_REAL_MISS_EXITS, + DTLB_PVMEM_MISS_EXITS, + DTLB_VIRT_MISS_EXITS, + SYSCALL_EXITS, + HCALL_EXITS, + ISI_EXITS, + DSI_EXITS, + EMULATED_INST_EXITS, + DEC_EXITS, + EXT_INTR_EXITS, + HALT_WAKEUP, + USR_PR_INST, + FP_UNAVAIL, + DEBUG_EXITS, + TIMEINGUEST, + __NUMBER_OF_KVM_EXIT_TYPES +}; + +static const char* kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = { + "MMIO", + "DCR", + "SIGNAL", + "ITLBREAL", + "ITLBVIRT", + "DTLBREAL", + "DTLBPV", + "DTLBVIRT", + "SYSCALL", + "HCALL", + "ISI", + "DSI", + "EMULINST", + "DEC", + "EXTINT", + "HALT", + "USR_PR_INST", + "FP_UNAVAIL", + "DEBUG", + "TIMEINGUEST" +}; + struct tlbe { u32 tid; /* Only the low 8 bits are used. */ u32 word0; @@ -75,6 +124,14 @@ struct kvm_arch { }; + +/* allow access to big endian 32bit upper/lower parts and 64bit var */ +typedef union { + u64 tv64; + struct { + u32 tbu, tbl; + } tv32; +} exit_timing_t; struct kvm_vcpu_arch { /* Unmodified copy of the guest's TLB. */ @@ -139,6 +196,10 @@ u32 dbcr0; u32 dbcr1; + /* to trace exit timing */ + exit_timing_t timing_exit; + exit_timing_t timing_last_enter; + u32 last_inst; u32 fault_dear; u32 fault_esr; @@ -165,4 +226,121 @@ int singlestep; }; +/* helpers for instruction timing, allocated in arch/powerpc/kvm/powerpc.c */ +extern u32 last_exit_type; +extern u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES]; +extern u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES]; +extern u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES]; +extern u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; +extern u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; +extern u64 timing_last_exit; + +static inline void init_exit_timing(struct kvm_vcpu_arch* vcpu_arch) +{ + int i; + last_exit_type=0xDEAD; + for (i=0; i<__NUMBER_OF_KVM_EXIT_TYPES; i++) { + timing_count_type[i]=0; + timing_max_duration[i]=0; + timing_min_duration[i]=0xFFFFFFFF; + timing_sum_duration[i]=0; + timing_sum_quad_duration[i]=0; + } + vcpu_arch->timing_exit.tv64 = 0; + timing_last_exit = 0; + vcpu_arch->timing_last_enter.tv64 = 0; + printk(KERN_ERR"### initialized exit timing (print start timestamp to log) ###\n"); +} + +static inline void set_last_exit_type(int type) +{ + last_exit_type=type; +} + +static inline void print_exit_timing(void) +{ + int i; + u64 min, max; + + printk(KERN_ERR"### start exit timing statistics ###\n"); + for (i=0; i<__NUMBER_OF_KVM_EXIT_TYPES; i++) { + if (timing_min_duration[i]==0xFFFFFFFF) + min = 0; + else + min = timing_min_duration[i]; + if (timing_max_duration[i]==0) + max = 0; + else + max = timing_max_duration[i]; + + printk(KERN_ERR"exit type %2d (%12s) count %10d min %10lld max %10lld sum %20lld sum_quad %20lld\n", + i, kvm_exit_names[i], timing_count_type[i], timing_min_duration[i], timing_max_duration[i], + timing_sum_duration[i], timing_sum_quad_duration[i]); + } + printk(KERN_ERR"please post calc: avg = sum/count; stddev = sqrt((sum_quad/count)-(sum_dur/count)������)\n"); + printk(KERN_ERR"### end exit timing statistics ###\n"); +} + +static inline void add_exit_timing(u64 duration, int type) +{ + u64 old; + + do_div(duration, tb_ticks_per_usec); + if (unlikely(duration > 0xFFFFFFFF)) { + printk(KERN_ERR"%s - duration too big, **2 would overflow duration %lld max 4294967295 type %d exit # of type %d (tb_ticks_per_usec=%ld)\n", + __func__, duration, type, timing_count_type[type], tb_ticks_per_usec); + print_exit_timing(); + } + + + timing_count_type[type]++; + + /* sum */ + old = timing_sum_duration[type]; + timing_sum_duration[type] += duration; + if (unlikely(old > timing_sum_duration[type])) { + printk(KERN_ERR"%s - wraparound adding sum of durations old %lld new %lld type %d exit # of type %d\n", + __func__, old, timing_sum_duration[type], type, timing_count_type[type]); + print_exit_timing(); + } + + /* square sum */ + old = timing_sum_quad_duration[type]; + timing_sum_quad_duration[type] += (duration*duration); + if (unlikely(old > timing_sum_quad_duration[type])) { + printk(KERN_ERR"%s - wraparound adding sum of squared durations old %lld new %lld type %d exit # of type %d\n", + __func__, old, timing_sum_quad_duration[type], type, timing_count_type[type]); + print_exit_timing(); + } + + /* set min/max */ + if (unlikely(duration < timing_min_duration[type])) + timing_min_duration[type] = duration; + if (unlikely(duration > timing_max_duration[type])) + timing_max_duration[type] = duration; +} + +static inline void update_exit_timing(struct kvm_vcpu_arch* vcpu_arch) +{ + u64 exit = timing_last_exit; + u64 enter = vcpu_arch->timing_last_enter.tv64; + + /* save exit time to use it next exit when the related enter time is known */ + timing_last_exit = vcpu_arch->timing_exit.tv64; + + if (unlikely(last_exit_type==0xDEAD)) + return; /* skip first incomplete enter/exit cycle */ + + /* update statistics for average and standard deviation */ + add_exit_timing((enter - exit), last_exit_type); + /* enter -> timing_last_exit is time spent in guest - log this too */ + add_exit_timing((timing_last_exit - enter), TIMEINGUEST); + + /* report regularly every 100000 exits*/ + if (unlikely(timing_count_type[TIMEINGUEST] % 1000000 == 0)) { + printk(KERN_ERR"%s - %d exits report timing statistics\n", __func__,timing_count_type[TIMEINGUEST]); + print_exit_timing(); + } +} + #endif /* __POWERPC_KVM_HOST_H__ */ --------------070001070505000401020905--