Re: [PATCH v3 2/2] KVM: x86/mmu: Recover TDP MMU NX huge pages using MMU read lock

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Sean Christopherson <seanjc@google.com>
To: Vipin Sharma <vipinsh@google.com>
Cc: pbonzini@redhat.com, dmatlack@google.com, kvm@vger.kernel.org,
	 linux-kernel@vger.kernel.org
Subject: Re: [PATCH v3 2/2] KVM: x86/mmu: Recover TDP MMU NX huge pages using MMU read lock
Date: Wed, 30 Oct 2024 07:28:28 -0700	[thread overview]
Message-ID: <ZyJCjJx2lxnEnDwa@google.com> (raw)
In-Reply-To: <20240906204515.3276696-3-vipinsh@google.com>

On Fri, Sep 06, 2024, Vipin Sharma wrote:
> diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
> index 455caaaa04f5..fc597f66aa11 100644
> --- a/arch/x86/kvm/mmu/mmu.c
> +++ b/arch/x86/kvm/mmu/mmu.c
> @@ -7317,8 +7317,8 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
>  	return err;
>  }
>  
> -void kvm_recover_nx_huge_pages(struct kvm *kvm, struct list_head *pages,
> -			       unsigned long nr_pages)
> +void kvm_recover_nx_huge_pages(struct kvm *kvm, bool shared,
> +			       struct list_head *pages, unsigned long nr_pages)
>  {
>  	struct kvm_memory_slot *slot;
>  	int rcu_idx;
> @@ -7329,7 +7329,10 @@ void kvm_recover_nx_huge_pages(struct kvm *kvm, struct list_head *pages,
>  	ulong to_zap;
>  
>  	rcu_idx = srcu_read_lock(&kvm->srcu);
> -	write_lock(&kvm->mmu_lock);
> +	if (shared)


Hmm, what if we do this?

enum kvm_mmu_types {
	KVM_SHADOW_MMU,
#ifdef CONFIG_X86_64
	KVM_TDP_MMU,
#endif
	KVM_NR_MMU_TYPES,
};

#ifndef CONFIG_X86_64
#define KVM_TDP_MMU -1
#endif

And then this becomes:

	if (mmu_type == KVM_TDP_MMU)
		
> +		read_lock(&kvm->mmu_lock);
> +	else
> +		write_lock(&kvm->mmu_lock);
>  
>  	/*
>  	 * Zapping TDP MMU shadow pages, including the remote TLB flush, must
> @@ -7341,8 +7344,13 @@ void kvm_recover_nx_huge_pages(struct kvm *kvm, struct list_head *pages,
>  	ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
>  	to_zap = ratio ? DIV_ROUND_UP(nr_pages, ratio) : 0;
>  	for ( ; to_zap; --to_zap) {
> -		if (list_empty(pages))
> +		if (tdp_mmu_enabled)

Shouldn't this be?

		if (shared)

Or if we do the above

		if (mmu_type == KVM_TDP_MMU)
			 
Actually, better idea (sans comments)

	if (mmu_type == KVM_TDP_MMU) {
		read_lock(&kvm->mmu_lock);
		kvm_tdp_mmu_pages_lock(kvm);
	} else {
		write_lock(&kvm->mmu_lock);
	}

	rcu_read_lock();

	ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
	to_zap = ratio ? DIV_ROUND_UP(possible_nx->nr_pages, ratio) : 0;
	for ( ; to_zap; --to_zap) {
		if (list_empty(possible_nx->pages))
			break;

                ...

		/* Blah blah blah. */
		if (mmu_type == KVM_TDP_MMU)
			kvm_tdp_mmu_pages_unlock(kvm);

                ...

		/* Blah blah blah. */
		if (mmu_type == KVM_TDP_MMU)
			kvm_tdp_mmu_pages_lock(kvm);
	}
	kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);

	rcu_read_unlock();

	if (mmu_type == KVM_TDP_MMU) {
		kvm_tdp_mmu_pages_unlock(kvm);
		read_unlock(&kvm->mmu_lock);
	} else {
		write_unlock(&kvm->mmu_lock);
	}
	srcu_read_unlock(&kvm->srcu, rcu_idx);

> @@ -825,23 +835,51 @@ static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
>  	rcu_read_unlock();
>  }
>  
> -bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
> +bool kvm_tdp_mmu_zap_possible_nx_huge_page(struct kvm *kvm,

This rename, and any refactoring that is associated with said rename, e.g. comments,
belongs in a separate patch.

> +					   struct kvm_mmu_page *sp)
>  {
> -	u64 old_spte;
> +	struct tdp_iter iter = {
> +		.old_spte = sp->ptep ? kvm_tdp_mmu_read_spte(sp->ptep) : 0,
> +		.sptep = sp->ptep,
> +		.level = sp->role.level + 1,
> +		.gfn = sp->gfn,
> +		.as_id = kvm_mmu_page_as_id(sp),
> +	};
> +
> +	lockdep_assert_held_read(&kvm->mmu_lock);

Newline here, to isolate the lockdep assertion from the functional code.

> +	if (WARN_ON_ONCE(!is_tdp_mmu_page(sp)))
> +		return false;
>  
>  	/*
> -	 * This helper intentionally doesn't allow zapping a root shadow page,
> -	 * which doesn't have a parent page table and thus no associated entry.
> +	 * Root shadow pages don't a parent page table and thus no associated

Missed a word or three.

> +	 * entry, but they can never be possible NX huge pages.
>  	 */
>  	if (WARN_ON_ONCE(!sp->ptep))
>  		return false;
>  
> -	old_spte = kvm_tdp_mmu_read_spte(sp->ptep);
> -	if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
> +	/*
> +	 * Since mmu_lock is held in read mode, it's possible another task has
> +	 * already modified the SPTE. Zap the SPTE if and only if the SPTE
> +	 * points at the SP's page table, as checking  shadow-present isn't
> +	 * sufficient, e.g. the SPTE could be replaced by a leaf SPTE, or even
> +	 * another SP. Note, spte_to_child_pt() also checks that the SPTE is
> +	 * shadow-present, i.e. guards against zapping a frozen SPTE.
> +	 */
> +	if ((tdp_ptep_t)sp->spt != spte_to_child_pt(iter.old_spte, iter.level))
>  		return false;
>  
> -	tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte,
> -			 SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1);
> +	/*
> +	 * If a different task modified the SPTE, then it should be impossible
> +	 * for the SPTE to still be used for the to-be-zapped SP. Non-leaf
> +	 * SPTEs don't have Dirty bits, KVM always sets the Accessed bit when
> +	 * creating non-leaf SPTEs, and all other bits are immutable for non-
> +	 * leaf SPTEs, i.e. the only legal operations for non-leaf SPTEs are
> +	 * zapping and replacement.
> +	 */
> +	if (tdp_mmu_set_spte_atomic(kvm, &iter, SHADOW_NONPRESENT_VALUE)) {
> +		WARN_ON_ONCE((tdp_ptep_t)sp->spt == spte_to_child_pt(iter.old_spte, iter.level));
> +		return false;
> +	}
>  
>  	return true;
>  }

     prev parent reply	other threads:[~2024-10-30 14:28 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-06 20:45 [PATCH v3 0/2] KVM: x86/mmu: Run NX huge page recovery under MMU read lock Vipin Sharma
2024-09-06 20:45 ` [PATCH v3 1/2] KVM: x86/mmu: Track TDP MMU NX huge pages separately Vipin Sharma
2024-10-30 14:12   ` Sean Christopherson
2024-10-30 14:25     ` Sean Christopherson
2024-09-06 20:45 ` [PATCH v3 2/2] KVM: x86/mmu: Recover TDP MMU NX huge pages using MMU read lock Vipin Sharma
2024-09-08 23:29   ` kernel test robot
2024-09-09 16:37     ` Vipin Sharma
2024-10-30 14:28   ` Sean Christopherson [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZyJCjJx2lxnEnDwa@google.com \
    --to=seanjc@google.com \
    --cc=dmatlack@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=vipinsh@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.