All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Matlack <dmatlack@google.com>
To: James Houghton <jthoughton@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Paolo Bonzini <pbonzini@redhat.com>,
	Ankit Agrawal <ankita@nvidia.com>,
	Axel Rasmussen <axelrasmussen@google.com>,
	Catalin Marinas <catalin.marinas@arm.com>,
	David Rientjes <rientjes@google.com>,
	James Morse <james.morse@arm.com>, Jason Gunthorpe <jgg@ziepe.ca>,
	Jonathan Corbet <corbet@lwn.net>, Marc Zyngier <maz@kernel.org>,
	Oliver Upton <oliver.upton@linux.dev>,
	Raghavendra Rao Ananta <rananta@google.com>,
	Ryan Roberts <ryan.roberts@arm.com>,
	Sean Christopherson <seanjc@google.com>,
	Shaoqin Huang <shahuang@redhat.com>,
	Suzuki K Poulose <suzuki.poulose@arm.com>,
	Wei Xu <weixugc@google.com>, Will Deacon <will@kernel.org>,
	Yu Zhao <yuzhao@google.com>, Zenghui Yu <yuzenghui@huawei.com>,
	kvmarm@lists.linux.dev, kvm@vger.kernel.org,
	linux-arm-kernel@lists.infradead.org, linux-doc@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org
Subject: Re: [PATCH v6 01/11] KVM: Add lockless memslot walk to KVM
Date: Thu, 25 Jul 2024 09:39:32 -0700	[thread overview]
Message-ID: <ZqJ_xANKf3bNcaHM@google.com> (raw)
In-Reply-To: <20240724011037.3671523-2-jthoughton@google.com>

On 2024-07-24 01:10 AM, James Houghton wrote:
> Provide flexibility to the architecture to synchronize as optimally as
> they can instead of always taking the MMU lock for writing.
> 
> Architectures that do their own locking must select
> CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS.
> 
> The immediate application is to allow architectures to implement the
> test/clear_young MMU notifiers more cheaply.
> 
> Suggested-by: Yu Zhao <yuzhao@google.com>
> Signed-off-by: James Houghton <jthoughton@google.com>

Aside from the cleanup suggestion (which should be in separate patches
anyway):

Reviewed-by: David Matlack <dmatlack@google.com>

> ---
>  include/linux/kvm_host.h |  1 +
>  virt/kvm/Kconfig         |  3 +++
>  virt/kvm/kvm_main.c      | 26 +++++++++++++++++++-------
>  3 files changed, 23 insertions(+), 7 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 689e8be873a7..8cd80f969cff 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -266,6 +266,7 @@ struct kvm_gfn_range {
>  	gfn_t end;
>  	union kvm_mmu_notifier_arg arg;
>  	bool may_block;
> +	bool lockless;
>  };
>  bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range);
>  bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range);
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index b14e14cdbfb9..632334861001 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -100,6 +100,9 @@ config KVM_GENERIC_MMU_NOTIFIER
>         select MMU_NOTIFIER
>         bool
>  
> +config KVM_MMU_NOTIFIER_YOUNG_LOCKLESS
> +       bool
> +
>  config KVM_GENERIC_MEMORY_ATTRIBUTES
>         depends on KVM_GENERIC_MMU_NOTIFIER
>         bool
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index d0788d0a72cc..33f8997a5c29 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -555,6 +555,7 @@ struct kvm_mmu_notifier_range {
>  	on_lock_fn_t on_lock;
>  	bool flush_on_ret;
>  	bool may_block;
> +	bool lockless;
>  };
>  
>  /*
> @@ -609,6 +610,10 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
>  			 IS_KVM_NULL_FN(range->handler)))
>  		return r;
>  
> +	/* on_lock will never be called for lockless walks */
> +	if (WARN_ON_ONCE(range->lockless && !IS_KVM_NULL_FN(range->on_lock)))
> +		return r;
> +
>  	idx = srcu_read_lock(&kvm->srcu);
>  
>  	for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) {
> @@ -640,15 +645,18 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
>  			gfn_range.start = hva_to_gfn_memslot(hva_start, slot);
>  			gfn_range.end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, slot);
>  			gfn_range.slot = slot;
> +			gfn_range.lockless = range->lockless;
>  
>  			if (!r.found_memslot) {
>  				r.found_memslot = true;
> -				KVM_MMU_LOCK(kvm);
> -				if (!IS_KVM_NULL_FN(range->on_lock))
> -					range->on_lock(kvm);
> -
> -				if (IS_KVM_NULL_FN(range->handler))
> -					goto mmu_unlock;
> +				if (!range->lockless) {
> +					KVM_MMU_LOCK(kvm);
> +					if (!IS_KVM_NULL_FN(range->on_lock))
> +						range->on_lock(kvm);
> +
> +					if (IS_KVM_NULL_FN(range->handler))
> +						goto mmu_unlock;
> +				}
>  			}
>  			r.ret |= range->handler(kvm, &gfn_range);
>  		}
> @@ -658,7 +666,7 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
>  		kvm_flush_remote_tlbs(kvm);
>  
>  mmu_unlock:
> -	if (r.found_memslot)
> +	if (r.found_memslot && !range->lockless)
>  		KVM_MMU_UNLOCK(kvm);
>  
>  	srcu_read_unlock(&kvm->srcu, idx);
> @@ -679,6 +687,8 @@ static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
>  		.on_lock	= (void *)kvm_null_fn,
>  		.flush_on_ret	= true,
>  		.may_block	= false,
> +		.lockless	=
> +			IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),
>  	};
>  
>  	return __kvm_handle_hva_range(kvm, &range).ret;
> @@ -697,6 +707,8 @@ static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn
>  		.on_lock	= (void *)kvm_null_fn,
>  		.flush_on_ret	= false,
>  		.may_block	= false,
> +		.lockless	=
> +			IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),

kvm_handle_hva_range{,_no_flush}() have very generic names but
they're intimately tied to the "young" notifiers. Whereas
__kvm_handle_hva_range() is the truly generic handler function.

This is arguably a pre-existing issue, but adding
CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS makes these functions even more
intamtely tied to the "young" notifiers.

We could rename kvm_handle_hva_range{,_no_flush}() but I think the
cleanest thing to do might be to just drop them entirely and move their
contents into their callers (there are only 2 callers of these 3
functions). That will create a little duplication but IMO will make the
code easier to read.

And then we can also rename __kvm_handle_hva_range() to
kvm_handle_hva_range().

e.g. Something like this as the end result:


diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 86fb2b560d98..0146c83e24bd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -590,8 +590,8 @@ static void kvm_null_fn(void)
 	     node;							     \
 	     node = interval_tree_iter_next(node, start, last))	     \
 
-static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
-							   const struct kvm_mmu_notifier_range *range)
+static __always_inline kvm_mn_ret_t kvm_handle_hva_range(struct kvm *kvm,
+							 const struct kvm_mmu_notifier_range *range)
 {
 	struct kvm_mmu_notifier_return r = {
 		.ret = false,
@@ -674,48 +674,6 @@ static __always_inline kvm_mn_ret_t __kvm_handle_hva_range(struct kvm *kvm,
 	return r;
 }
 
-static __always_inline int kvm_handle_hva_range(struct mmu_notifier *mn,
-						unsigned long start,
-						unsigned long end,
-						gfn_handler_t handler)
-{
-	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	const struct kvm_mmu_notifier_range range = {
-		.start		= start,
-		.end		= end,
-		.handler	= handler,
-		.on_lock	= (void *)kvm_null_fn,
-		.flush_on_ret	= true,
-		.may_block	= false,
-		.lockless	=
-			IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),
-	};
-
-	return __kvm_handle_hva_range(kvm, &range).ret;
-}
-
-static __always_inline int kvm_handle_hva_range_no_flush(struct mmu_notifier *mn,
-							 unsigned long start,
-							 unsigned long end,
-							 gfn_handler_t handler,
-							 bool fast_only)
-{
-	struct kvm *kvm = mmu_notifier_to_kvm(mn);
-	const struct kvm_mmu_notifier_range range = {
-		.start			= start,
-		.end			= end,
-		.handler		= handler,
-		.on_lock		= (void *)kvm_null_fn,
-		.flush_on_ret		= false,
-		.may_block		= false,
-		.lockless		=
-			IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),
-		.arg.fast_only		= fast_only,
-	};
-
-	return __kvm_handle_hva_range(kvm, &range).ret;
-}
-
 void kvm_mmu_invalidate_begin(struct kvm *kvm)
 {
 	lockdep_assert_held_write(&kvm->mmu_lock);
@@ -808,7 +766,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 	 * that guest memory has been reclaimed.  This needs to be done *after*
 	 * dropping mmu_lock, as x86's reclaim path is slooooow.
 	 */
-	if (__kvm_handle_hva_range(kvm, &hva_range).found_memslot)
+	if (kvm_handle_hva_range(kvm, &hva_range).found_memslot)
 		kvm_arch_guest_memory_reclaimed(kvm);
 
 	return 0;
@@ -854,7 +812,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 	};
 	bool wake;
 
-	__kvm_handle_hva_range(kvm, &hva_range);
+	kvm_handle_hva_range(kvm, &hva_range);
 
 	/* Pairs with the increment in range_start(). */
 	spin_lock(&kvm->mn_invalidate_lock);
@@ -876,6 +834,17 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 					      unsigned long start,
 					      unsigned long end)
 {
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	const struct kvm_mmu_notifier_range range = {
+		.start		= start,
+		.end		= end,
+		.handler	= kvm_age_gfn,
+		.on_lock	= (void *)kvm_null_fn,
+		.flush_on_ret	= true,
+		.may_block	= false,
+		.lockless	= IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),
+	};
+
 	trace_kvm_age_hva(start, end, false);
 
 	return kvm_handle_hva_range(mn, start, end, kvm_age_gfn);
@@ -887,6 +856,18 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
 					unsigned long end,
 					bool fast_only)
 {
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	const struct kvm_mmu_notifier_range range = {
+		.start		= start,
+		.end		= end,
+		.handler	= kvm_age_gfn,
+		.on_lock	= (void *)kvm_null_fn,
+		.flush_on_ret	= false,
+		.may_block	= false,
+		.lockless	= IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),
+		.arg.fast_only	= fast_only,
+	};
+
 	trace_kvm_age_hva(start, end, fast_only);
 
 	/*
@@ -902,8 +883,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
 	 * cadence. If we find this inaccurate, we might come up with a
 	 * more sophisticated heuristic later.
 	 */
-	return kvm_handle_hva_range_no_flush(mn, start, end, kvm_age_gfn,
-					     fast_only);
+	return kvm_handle_hva_range(kvm, &range).ret;
 }
 
 static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
@@ -911,6 +891,18 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
 				       unsigned long address,
 				       bool fast_only)
 {
+	struct kvm *kvm = mmu_notifier_to_kvm(mn);
+	const struct kvm_mmu_notifier_range range = {
+		.start		= address,
+		.end		= address + 1,
+		.handler	= kvm_test_age_gfn,
+		.on_lock	= (void *)kvm_null_fn,
+		.flush_on_ret	= false,
+		.may_block	= false,
+		.lockless	= IS_ENABLED(CONFIG_KVM_MMU_NOTIFIER_YOUNG_LOCKLESS),
+		.arg.fast_only	= fast_only,
+	};
+
 	trace_kvm_test_age_hva(address, fast_only);
 
 	return kvm_handle_hva_range_no_flush(mn, address, address + 1,

>  	};
>  
>  	return __kvm_handle_hva_range(kvm, &range).ret;
> -- 
> 2.46.0.rc1.232.g9752f9e123-goog
> 

  reply	other threads:[~2024-07-25 16:39 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-07-24  1:10 [PATCH v6 00/11] mm: multi-gen LRU: Walk secondary MMU page tables while aging James Houghton
2024-07-24  1:10 ` [PATCH v6 01/11] KVM: Add lockless memslot walk to KVM James Houghton
2024-07-25 16:39   ` David Matlack [this message]
2024-07-26  0:28     ` James Houghton
2024-07-24  1:10 ` [PATCH v6 02/11] KVM: x86: Relax locking for kvm_test_age_gfn and kvm_age_gfn James Houghton
2024-07-25 18:07   ` David Matlack
2024-07-26  0:34     ` James Houghton
2024-08-17  1:05   ` Sean Christopherson
2024-08-30  0:35     ` James Houghton
2024-08-30  3:47       ` Sean Christopherson
2024-08-30 12:47         ` Jason Gunthorpe
2024-08-30 17:09           ` Sean Christopherson
2024-08-30 20:22             ` Jason Gunthorpe
2024-07-24  1:10 ` [PATCH v6 03/11] KVM: arm64: " James Houghton
2024-07-25 21:55   ` James Houghton
2024-08-17  0:46     ` Sean Christopherson
2024-08-17  1:03       ` Yu Zhao
2024-08-19 20:41         ` Oliver Upton
2024-08-19 22:47           ` Sean Christopherson
2024-08-30  0:33           ` James Houghton
2024-08-30  0:48             ` Oliver Upton
2024-08-30 15:33               ` David Matlack
2024-08-30 17:38                 ` Oliver Upton
2024-07-24  1:10 ` [PATCH v6 04/11] mm: Add missing mmu_notifier_clear_young for !MMU_NOTIFIER James Houghton
2024-08-01  9:34   ` David Hildenbrand
2024-08-06 17:24   ` Jason Gunthorpe
2024-07-24  1:10 ` [PATCH v6 05/11] mm: Add fast_only bool to test_young and clear_young MMU notifiers James Houghton
2024-08-01  9:36   ` David Hildenbrand
2024-08-01 23:13     ` James Houghton
2024-08-02 15:57       ` David Hildenbrand
2024-08-05 16:54         ` James Houghton
2024-08-06 17:23       ` Jason Gunthorpe
2024-08-07 15:02         ` James Houghton
2024-08-07 18:26           ` Jason Gunthorpe
2024-07-24  1:10 ` [PATCH v6 06/11] mm: Add has_fast_aging to struct mmu_notifier James Houghton
2024-07-24  1:10 ` [PATCH v6 07/11] KVM: Pass fast_only to kvm_{test_,}age_gfn James Houghton
2024-07-24  1:10 ` [PATCH v6 08/11] KVM: x86: Optimize kvm_{test_,}age_gfn a little bit James Houghton
2024-07-25 18:17   ` David Matlack
2024-08-17  1:00     ` Sean Christopherson
2024-08-30  0:34       ` James Houghton
2024-07-24  1:10 ` [PATCH v6 09/11] KVM: x86: Implement fast_only versions of kvm_{test_,}age_gfn James Houghton
2024-07-25 18:24   ` David Matlack
2024-07-24  1:10 ` [PATCH v6 10/11] mm: multi-gen LRU: Have secondary MMUs participate in aging James Houghton
2024-07-24  1:10 ` [PATCH v6 11/11] KVM: selftests: Add multi-gen LRU aging to access_tracking_perf_test James Houghton

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZqJ_xANKf3bNcaHM@google.com \
    --to=dmatlack@google.com \
    --cc=akpm@linux-foundation.org \
    --cc=ankita@nvidia.com \
    --cc=axelrasmussen@google.com \
    --cc=catalin.marinas@arm.com \
    --cc=corbet@lwn.net \
    --cc=james.morse@arm.com \
    --cc=jgg@ziepe.ca \
    --cc=jthoughton@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=kvmarm@lists.linux.dev \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=maz@kernel.org \
    --cc=oliver.upton@linux.dev \
    --cc=pbonzini@redhat.com \
    --cc=rananta@google.com \
    --cc=rientjes@google.com \
    --cc=ryan.roberts@arm.com \
    --cc=seanjc@google.com \
    --cc=shahuang@redhat.com \
    --cc=suzuki.poulose@arm.com \
    --cc=weixugc@google.com \
    --cc=will@kernel.org \
    --cc=yuzenghui@huawei.com \
    --cc=yuzhao@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.