From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S932878AbcBXNoB (ORCPT ); Wed, 24 Feb 2016 08:44:01 -0500 Received: from mx1.redhat.com ([209.132.183.28]:33176 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932774AbcBXNn6 (ORCPT ); Wed, 24 Feb 2016 08:43:58 -0500 Subject: Re: [PATCH 01/12] KVM: MMU: Fix ubsan warnings To: Mike Krinkin References: <1456319873-34182-1-git-send-email-pbonzini@redhat.com> <1456319873-34182-2-git-send-email-pbonzini@redhat.com> <20160224134205.GA8175@kmu-ThinkPad-X230> Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org, yoshikawa_takuya_b1@lab.ntt.co.jp, guangrong.xiao@linux.intel.com, mtosatti@redhat.com From: Paolo Bonzini Message-ID: <56CDB39A.50808@redhat.com> Date: Wed, 24 Feb 2016 14:43:54 +0100 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.5.0 MIME-Version: 1.0 In-Reply-To: <20160224134205.GA8175@kmu-ThinkPad-X230> Content-Type: text/plain; charset=windows-1252 Content-Transfer-Encoding: 7bit Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org On 24/02/2016 14:42, Mike Krinkin wrote: > Could you also merge a fix for the following ubsan warning (i > reported this one too, but it attracted a bit less attention): > > [ 168.791851] ================================================================================ > [ 168.791862] UBSAN: Undefined behaviour in arch/x86/kvm/paging_tmpl.h:252:15 > [ 168.791866] index 4 is out of range for type 'u64 [4]' > [ 168.791871] CPU: 0 PID: 2950 Comm: qemu-system-x86 Tainted: G O L 4.5.0-rc5-next-20160222 #7 > [ 168.791873] Hardware name: LENOVO 23205NG/23205NG, BIOS G2ET95WW (2.55 ) 07/09/2013 > [ 168.791876] 0000000000000000 ffff8801cfcaf208 ffffffff81c9f780 0000000041b58ab3 > [ 168.791882] ffffffff82eb2cc1 ffffffff81c9f6b4 ffff8801cfcaf230 ffff8801cfcaf1e0 > [ 168.791886] 0000000000000004 0000000000000001 0000000000000000 ffffffffa1981600 > [ 168.791891] Call Trace: > [ 168.791899] [] dump_stack+0xcc/0x12c > [ 168.791904] [] ? _atomic_dec_and_lock+0xc4/0xc4 > [ 168.791910] [] ubsan_epilogue+0xd/0x8a > [ 168.791914] [] __ubsan_handle_out_of_bounds+0x15c/0x1a3 > [ 168.791918] [] ? __ubsan_handle_shift_out_of_bounds+0x2bd/0x2bd > [ 168.791922] [] ? get_user_pages_fast+0x2bf/0x360 > [ 168.791954] [] ? kvm_largepages_enabled+0x30/0x30 [kvm] > [ 168.791958] [] ? __get_user_pages_fast+0x360/0x360 > [ 168.791987] [] paging64_walk_addr_generic+0x1b28/0x2600 [kvm] > [ 168.792014] [] ? init_kvm_mmu+0x1100/0x1100 [kvm] > [ 168.792019] [] ? debug_check_no_locks_freed+0x350/0x350 > [ 168.792044] [] ? init_kvm_mmu+0x1100/0x1100 [kvm] > [ 168.792076] [] paging64_gva_to_gpa+0x7d/0x110 [kvm] > [ 168.792121] [] ? paging64_walk_addr_generic+0x2600/0x2600 [kvm] > [ 168.792130] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 > [ 168.792178] [] emulator_read_write_onepage+0x27a/0x1150 [kvm] > [ 168.792208] [] ? __kvm_read_guest_page+0x54/0x70 [kvm] > [ 168.792234] [] ? kvm_task_switch+0x160/0x160 [kvm] > [ 168.792238] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 > [ 168.792263] [] emulator_read_write+0xe7/0x6d0 [kvm] > [ 168.792290] [] ? em_cr_write+0x230/0x230 [kvm] > [ 168.792314] [] emulator_write_emulated+0x15/0x20 [kvm] > [ 168.792340] [] segmented_write+0xf8/0x130 [kvm] > [ 168.792367] [] ? em_lgdt+0x20/0x20 [kvm] > [ 168.792374] [] ? vmx_read_guest_seg_ar+0x42/0x1e0 [kvm_intel] > [ 168.792400] [] writeback+0x3f2/0x700 [kvm] > [ 168.792424] [] ? em_sidt+0xa0/0xa0 [kvm] > [ 168.792449] [] ? x86_decode_insn+0x1b3d/0x4f70 [kvm] > [ 168.792474] [] x86_emulate_insn+0x572/0x3010 [kvm] > [ 168.792499] [] x86_emulate_instruction+0x3bd/0x2110 [kvm] > [ 168.792524] [] ? reexecute_instruction.part.110+0x2e0/0x2e0 [kvm] > [ 168.792532] [] handle_ept_misconfig+0x61/0x460 [kvm_intel] > [ 168.792539] [] ? handle_pause+0x450/0x450 [kvm_intel] > [ 168.792546] [] vmx_handle_exit+0xd6a/0x1ad0 [kvm_intel] > [ 168.792572] [] ? kvm_arch_vcpu_ioctl_run+0xbdc/0x6090 [kvm] > [ 168.792597] [] kvm_arch_vcpu_ioctl_run+0xd3d/0x6090 [kvm] > [ 168.792621] [] ? kvm_arch_vcpu_ioctl_run+0xbdc/0x6090 [kvm] > [ 168.792627] [] ? __ww_mutex_lock_interruptible+0x1630/0x1630 > [ 168.792651] [] ? kvm_arch_vcpu_runnable+0x4f0/0x4f0 [kvm] > [ 168.792656] [] ? preempt_notifier_unregister+0x190/0x190 > [ 168.792681] [] ? kvm_arch_vcpu_load+0x127/0x650 [kvm] > [ 168.792704] [] kvm_vcpu_ioctl+0x553/0xda0 [kvm] > [ 168.792727] [] ? vcpu_put+0x40/0x40 [kvm] > [ 168.792732] [] ? debug_check_no_locks_freed+0x350/0x350 > [ 168.792735] [] ? _raw_spin_unlock+0x27/0x40 > [ 168.792740] [] ? handle_mm_fault+0x1673/0x2e40 > [ 168.792744] [] ? trace_hardirqs_on_caller+0x478/0x6c0 > [ 168.792747] [] ? trace_hardirqs_on+0xd/0x10 > [ 168.792751] [] ? debug_lockdep_rcu_enabled+0x7b/0x90 > [ 168.792756] [] do_vfs_ioctl+0x1b0/0x12b0 > [ 168.792759] [] ? ioctl_preallocate+0x210/0x210 > [ 168.792763] [] ? __fget+0x273/0x4a0 > [ 168.792766] [] ? __fget+0x50/0x4a0 > [ 168.792770] [] ? __fget_light+0x96/0x2b0 > [ 168.792773] [] SyS_ioctl+0x79/0x90 > [ 168.792777] [] entry_SYSCALL_64_fastpath+0x23/0xc1 > [ 168.792780] ================================================================================ > > This seems to be a typo, and the following fix solves problem for me: > > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 6c9fed9..2ce4f05 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -249,7 +249,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu, > return ret; > > kvm_vcpu_mark_page_dirty(vcpu, table_gfn); > - walker->ptes[level] = pte; > + walker->ptes[level - 1] = pte; > } > return 0; > } This patch is correct---good catch! Can you resend it with a "Signed-off-by: Mike Krinkin " line? Thanks, Paolo > On Wed, Feb 24, 2016 at 02:17:42PM +0100, Paolo Bonzini wrote: >> kvm_mmu_pages_init is doing some really yucky stuff. It is setting >> up a sentinel for mmu_page_clear_parents; however, because of a) the >> way levels are numbered starting from 1 and b) the way mmu_page_path >> sizes its arrays with PT64_ROOT_LEVEL-1 elements, the access can be >> out of bounds. This is harmless because the code overwrites up to the >> first two elements of parents->idx and these are initialized, and >> because the sentinel is not needed in this case---mmu_page_clear_parents >> exits anyway when it gets to the end of the array. However ubsan >> complains, and everyone else should too. >> >> This fix does three things. First it makes the mmu_page_path arrays >> PT64_ROOT_LEVEL elements in size, so that we can write to them without >> checking the level in advance. Second it disintegrates kvm_mmu_pages_init >> between mmu_unsync_walk (to reset the struct kvm_mmu_pages) and >> for_each_sp (to place the NULL sentinel at the end of the current path). >> This is okay because the mmu_page_path is only used in >> mmu_pages_clear_parents; mmu_pages_clear_parents itself is called within >> a for_each_sp iterator, and hence always after a call to mmu_pages_next. >> Third it changes mmu_pages_clear_parents to just use the sentinel to >> stop iteration, without checking the bounds on level. >> >> Reported-by: Sasha Levin >> Reported-by: Mike Krinkin >> Signed-off-by: Paolo Bonzini >> --- >> arch/x86/kvm/mmu.c | 57 +++++++++++++++++++++++++++++++----------------------- >> 1 file changed, 33 insertions(+), 24 deletions(-) >> >> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c >> index 07f4c26a10d3..4dee855897cf 100644 >> --- a/arch/x86/kvm/mmu.c >> +++ b/arch/x86/kvm/mmu.c >> @@ -1843,6 +1843,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, >> static int mmu_unsync_walk(struct kvm_mmu_page *sp, >> struct kvm_mmu_pages *pvec) >> { >> + pvec->nr = 0; >> if (!sp->unsync_children) >> return 0; >> >> @@ -1956,13 +1957,12 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) >> } >> >> struct mmu_page_path { >> - struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; >> - unsigned int idx[PT64_ROOT_LEVEL-1]; >> + struct kvm_mmu_page *parent[PT64_ROOT_LEVEL]; >> + unsigned int idx[PT64_ROOT_LEVEL]; >> }; >> >> #define for_each_sp(pvec, sp, parents, i) \ >> - for (i = mmu_pages_next(&pvec, &parents, -1), \ >> - sp = pvec.page[i].sp; \ >> + for (i = mmu_pages_first(&pvec, &parents); \ >> i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ >> i = mmu_pages_next(&pvec, &parents, i)) >> >> @@ -1974,19 +1974,41 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec, >> >> for (n = i+1; n < pvec->nr; n++) { >> struct kvm_mmu_page *sp = pvec->page[n].sp; >> + unsigned idx = pvec->page[n].idx; >> + int level = sp->role.level; >> >> - if (sp->role.level == PT_PAGE_TABLE_LEVEL) { >> - parents->idx[0] = pvec->page[n].idx; >> - return n; >> - } >> + parents->idx[level-1] = idx; >> + if (level == PT_PAGE_TABLE_LEVEL) >> + break; >> >> - parents->parent[sp->role.level-2] = sp; >> - parents->idx[sp->role.level-1] = pvec->page[n].idx; >> + parents->parent[level-2] = sp; >> } >> >> return n; >> } >> >> +static int mmu_pages_first(struct kvm_mmu_pages *pvec, >> + struct mmu_page_path *parents) >> +{ >> + struct kvm_mmu_page *sp; >> + int level; >> + >> + if (pvec->nr == 0) >> + return 0; >> + >> + sp = pvec->page[0].sp; >> + level = sp->role.level; >> + WARN_ON(level == PT_PAGE_TABLE_LEVEL); >> + >> + parents->parent[level-2] = sp; >> + >> + /* Also set up a sentinel. Further entries in pvec are all >> + * children of sp, so this element is never overwritten. >> + */ >> + parents->parent[level-1] = NULL; >> + return mmu_pages_next(pvec, parents, 0); >> +} >> + >> static void mmu_pages_clear_parents(struct mmu_page_path *parents) >> { >> struct kvm_mmu_page *sp; >> @@ -1994,22 +2016,13 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) >> >> do { >> unsigned int idx = parents->idx[level]; >> - >> sp = parents->parent[level]; >> if (!sp) >> return; >> >> clear_unsync_child_bit(sp, idx); >> level++; >> - } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); >> -} >> - >> -static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, >> - struct mmu_page_path *parents, >> - struct kvm_mmu_pages *pvec) >> -{ >> - parents->parent[parent->role.level-1] = NULL; >> - pvec->nr = 0; >> + } while (!sp->unsync_children); >> } >> >> static void mmu_sync_children(struct kvm_vcpu *vcpu, >> @@ -2021,7 +2034,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, >> struct kvm_mmu_pages pages; >> LIST_HEAD(invalid_list); >> >> - kvm_mmu_pages_init(parent, &parents, &pages); >> while (mmu_unsync_walk(parent, &pages)) { >> bool protected = false; >> >> @@ -2037,7 +2049,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, >> } >> kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); >> cond_resched_lock(&vcpu->kvm->mmu_lock); >> - kvm_mmu_pages_init(parent, &parents, &pages); >> } >> } >> >> @@ -2269,7 +2280,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm, >> if (parent->role.level == PT_PAGE_TABLE_LEVEL) >> return 0; >> >> - kvm_mmu_pages_init(parent, &parents, &pages); >> while (mmu_unsync_walk(parent, &pages)) { >> struct kvm_mmu_page *sp; >> >> @@ -2278,7 +2288,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm, >> mmu_pages_clear_parents(&parents); >> zapped++; >> } >> - kvm_mmu_pages_init(parent, &parents, &pages); >> } >> >> return zapped; >> -- >> 1.8.3.1 >> >> >> -- >> To unsubscribe from this list: send the line "unsubscribe kvm" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html