public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations
@ 2024-11-25 11:50 Heiko Carstens
  2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 11:50 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

Use try_cmpxchg() instead of cmpxchg() so compilers with flag output
operand support (gcc 14 and newer) can generate slightly better code.

Also get rid of two cmpxchg() usages on one/two byte memory areas
which generates inefficient code.

bloat-o-meter statistics of the kvm module:

add/remove: 0/0 grow/shrink: 0/11 up/down: 0/-318 (-318)
Function                                     old     new   delta
kvm_s390_handle_wait                         886     880      -6
kvm_s390_gisa_destroy                        226     220      -6
kvm_s390_gisa_clear                           96      90      -6
ipte_unlock                                  380     372      -8
kvm_s390_gisc_unregister                     270     260     -10
kvm_s390_gisc_register                       290     280     -10
gisa_vcpu_kicker                             200     190     -10
account_mem                                  250     232     -18
ipte_lock                                    416     368     -48
kvm_s390_update_topology_change_report       174     122     -52
kvm_s390_clear_local_irqs                    420     276    -144
Total: Before=316521, After=316203, chg -0.10%

Heiko Carstens (3):
  KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
  KVM: s390: Remove one byte cmpxchg() usage
  KVM: s390: Increase size of union sca_utility to four bytes

 arch/s390/include/asm/kvm_host.h | 10 +++++-----
 arch/s390/kvm/gaccess.c          | 16 ++++++++--------
 arch/s390/kvm/interrupt.c        | 25 ++++++++-----------------
 arch/s390/kvm/kvm-s390.c         |  4 ++--
 arch/s390/kvm/pci.c              |  5 ++---
 5 files changed, 25 insertions(+), 35 deletions(-)


base-commit: 9f16d5e6f220661f73b36a4be1b21575651d8833
-- 
2.45.2


^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
  2024-11-25 11:50 [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
@ 2024-11-25 11:50 ` Heiko Carstens
  2024-11-25 12:18   ` Claudio Imbrenda
  2024-11-25 11:50 ` [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
  2024-11-25 11:50 ` [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
  2 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 11:50 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

Convert all cmpxchg() loops to try_cmpxchg() loops. With gcc 14 and the
usage of flag output operands in try_cmpxchg() this allows the compiler to
generate slightly better code.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kvm/gaccess.c   | 16 ++++++++--------
 arch/s390/kvm/interrupt.c | 12 ++++++------
 arch/s390/kvm/kvm-s390.c  |  4 ++--
 arch/s390/kvm/pci.c       |  5 ++---
 4 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index a688351f4ab5..9816b0060fbe 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -129,8 +129,8 @@ static void ipte_lock_simple(struct kvm *kvm)
 retry:
 	read_lock(&kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		if (old.k) {
 			read_unlock(&kvm->arch.sca_lock);
 			cond_resched();
@@ -138,7 +138,7 @@ static void ipte_lock_simple(struct kvm *kvm)
 		}
 		new = old;
 		new.k = 1;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 out:
 	mutex_unlock(&kvm->arch.ipte_mutex);
@@ -154,11 +154,11 @@ static void ipte_unlock_simple(struct kvm *kvm)
 		goto out;
 	read_lock(&kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		new = old;
 		new.k = 0;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 	wake_up(&kvm->arch.ipte_wq);
 out:
@@ -172,8 +172,8 @@ static void ipte_lock_siif(struct kvm *kvm)
 retry:
 	read_lock(&kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		if (old.kg) {
 			read_unlock(&kvm->arch.sca_lock);
 			cond_resched();
@@ -182,7 +182,7 @@ static void ipte_lock_siif(struct kvm *kvm)
 		new = old;
 		new.k = 1;
 		new.kh++;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 }
 
@@ -192,13 +192,13 @@ static void ipte_unlock_siif(struct kvm *kvm)
 
 	read_lock(&kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		new = old;
 		new.kh--;
 		if (!new.kh)
 			new.k = 0;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 	if (!new.kh)
 		wake_up(&kvm->arch.ipte_wq);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4f0e7f61edf7..eff69018cbeb 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -247,12 +247,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
 {
 	u64 word, _word;
 
+	word = READ_ONCE(gisa->u64.word[0]);
 	do {
-		word = READ_ONCE(gisa->u64.word[0]);
 		if ((u64)gisa != word >> 32)
 			return -EBUSY;
 		_word = (word & ~0xffUL) | iam;
-	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+	} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
 
 	return 0;
 }
@@ -270,10 +270,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
 {
 	u64 word, _word;
 
+	word = READ_ONCE(gisa->u64.word[0]);
 	do {
-		word = READ_ONCE(gisa->u64.word[0]);
 		_word = word & ~(0xffUL << 24);
-	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+	} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
 }
 
 /**
@@ -291,14 +291,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
 	u8 pending_mask, alert_mask;
 	u64 word, _word;
 
+	word = READ_ONCE(gi->origin->u64.word[0]);
 	do {
-		word = READ_ONCE(gi->origin->u64.word[0]);
 		alert_mask = READ_ONCE(gi->alert.mask);
 		pending_mask = (u8)(word >> 24) & alert_mask;
 		if (pending_mask)
 			return pending_mask;
 		_word = (word & ~0xffUL) | alert_mask;
-	} while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
+	} while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
 
 	return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 442d4a227c0e..d8080c27d45b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1937,11 +1937,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
 
 	read_lock(&kvm->arch.sca_lock);
 	sca = kvm->arch.sca;
+	old = READ_ONCE(sca->utility);
 	do {
-		old = READ_ONCE(sca->utility);
 		new = old;
 		new.mtcr = val;
-	} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 }
 
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index a61518b549f0..9b9e7fdd5380 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages)
 
 	page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
+	cur_pages = atomic_long_read(&user->locked_vm);
 	do {
-		cur_pages = atomic_long_read(&user->locked_vm);
 		new_pages = cur_pages + nr_pages;
 		if (new_pages > page_limit)
 			return -ENOMEM;
-	} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
-					new_pages) != cur_pages);
+	} while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
 
 	atomic64_add(nr_pages, &current->mm->pinned_vm);
 
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage
  2024-11-25 11:50 [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
  2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
@ 2024-11-25 11:50 ` Heiko Carstens
  2024-11-25 12:16   ` Claudio Imbrenda
  2024-11-25 11:50 ` [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
  2 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 11:50 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

Within sca_clear_ext_call() cmpxchg() is used to clear one or two bytes
(depending on sca format). The cmpxchg() calls are not supposed to fail; if
so that would be a bug. Given that cmpxchg() usage on one and two byte
areas generates very inefficient code, replace them with block concurrent
WRITE_ONCE() calls, and remove the WARN_ON().

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kvm/interrupt.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index eff69018cbeb..3fd21037479f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -118,8 +118,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 
 static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 {
-	int rc, expect;
-
 	if (!kvm_s390_use_sca_entries())
 		return;
 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
@@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 		struct esca_block *sca = vcpu->kvm->arch.sca;
 		union esca_sigp_ctrl *sigp_ctrl =
 			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-		union esca_sigp_ctrl old;
 
-		old = READ_ONCE(*sigp_ctrl);
-		expect = old.value;
-		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+		WRITE_ONCE(sigp_ctrl->value, 9);
 	} else {
 		struct bsca_block *sca = vcpu->kvm->arch.sca;
 		union bsca_sigp_ctrl *sigp_ctrl =
 			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-		union bsca_sigp_ctrl old;
 
-		old = READ_ONCE(*sigp_ctrl);
-		expect = old.value;
-		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+		WRITE_ONCE(sigp_ctrl->value, 0);
 	}
 	read_unlock(&vcpu->kvm->arch.sca_lock);
-	WARN_ON(rc != expect); /* cannot clear? */
 }
 
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes
  2024-11-25 11:50 [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
  2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
  2024-11-25 11:50 ` [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
@ 2024-11-25 11:50 ` Heiko Carstens
  2024-11-25 12:20   ` Claudio Imbrenda
  2 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 11:50 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

kvm_s390_update_topology_change_report() modifies a single bit within
sca_utility using cmpxchg(). Given that the size of the sca_utility union
is two bytes this generates very inefficient code. Change the size to four
bytes, so better code can be generated.

Even though the size of sca_utility doesn't reflect architecture anymore
this seems to be the easiest and most pragmatic approach to avoid
inefficient code.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1cd8eaebd3c0..1cb1de232b9e 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -95,10 +95,10 @@ union ipte_control {
 };
 
 union sca_utility {
-	__u16 val;
+	__u32 val;
 	struct {
-		__u16 mtcr : 1;
-		__u16 reserved : 15;
+		__u32 mtcr : 1;
+		__u32	   : 31;
 	};
 };
 
@@ -107,7 +107,7 @@ struct bsca_block {
 	__u64	reserved[5];
 	__u64	mcn;
 	union sca_utility utility;
-	__u8	reserved2[6];
+	__u8	reserved2[4];
 	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
 };
 
@@ -115,7 +115,7 @@ struct esca_block {
 	union ipte_control ipte_control;
 	__u64   reserved1[6];
 	union sca_utility utility;
-	__u8	reserved2[6];
+	__u8	reserved2[4];
 	__u64   mcn[4];
 	__u64   reserved3[20];
 	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage
  2024-11-25 11:50 ` [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
@ 2024-11-25 12:16   ` Claudio Imbrenda
  2024-11-25 13:37     ` Heiko Carstens
  0 siblings, 1 reply; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 12:16 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Mon, 25 Nov 2024 12:50:38 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:

> Within sca_clear_ext_call() cmpxchg() is used to clear one or two bytes
> (depending on sca format). The cmpxchg() calls are not supposed to fail; if
> so that would be a bug. Given that cmpxchg() usage on one and two byte
> areas generates very inefficient code, replace them with block concurrent
> WRITE_ONCE() calls, and remove the WARN_ON().
> 
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
> ---
>  arch/s390/kvm/interrupt.c | 13 ++-----------
>  1 file changed, 2 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> index eff69018cbeb..3fd21037479f 100644
> --- a/arch/s390/kvm/interrupt.c
> +++ b/arch/s390/kvm/interrupt.c
> @@ -118,8 +118,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
>  
>  static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
>  {
> -	int rc, expect;
> -
>  	if (!kvm_s390_use_sca_entries())
>  		return;
>  	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
> @@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
>  		struct esca_block *sca = vcpu->kvm->arch.sca;
>  		union esca_sigp_ctrl *sigp_ctrl =
>  			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> -		union esca_sigp_ctrl old;
>  
> -		old = READ_ONCE(*sigp_ctrl);
> -		expect = old.value;
> -		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
> +		WRITE_ONCE(sigp_ctrl->value, 9);

that's supposed to be a 0, right?

>  	} else {
>  		struct bsca_block *sca = vcpu->kvm->arch.sca;
>  		union bsca_sigp_ctrl *sigp_ctrl =
>  			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> -		union bsca_sigp_ctrl old;
>  
> -		old = READ_ONCE(*sigp_ctrl);
> -		expect = old.value;
> -		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
> +		WRITE_ONCE(sigp_ctrl->value, 0);
>  	}
>  	read_unlock(&vcpu->kvm->arch.sca_lock);
> -	WARN_ON(rc != expect); /* cannot clear? */
>  }
>  
>  int psw_extint_disabled(struct kvm_vcpu *vcpu)


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
  2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
@ 2024-11-25 12:18   ` Claudio Imbrenda
  0 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 12:18 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Mon, 25 Nov 2024 12:50:37 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:

> Convert all cmpxchg() loops to try_cmpxchg() loops. With gcc 14 and the
> usage of flag output operands in try_cmpxchg() this allows the compiler to
> generate slightly better code.
> 
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>

Looks straightforward

Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

> ---
>  arch/s390/kvm/gaccess.c   | 16 ++++++++--------
>  arch/s390/kvm/interrupt.c | 12 ++++++------
>  arch/s390/kvm/kvm-s390.c  |  4 ++--
>  arch/s390/kvm/pci.c       |  5 ++---
>  4 files changed, 18 insertions(+), 19 deletions(-)
> 
> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> index a688351f4ab5..9816b0060fbe 100644
> --- a/arch/s390/kvm/gaccess.c
> +++ b/arch/s390/kvm/gaccess.c
> @@ -129,8 +129,8 @@ static void ipte_lock_simple(struct kvm *kvm)
>  retry:
>  	read_lock(&kvm->arch.sca_lock);
>  	ic = kvm_s390_get_ipte_control(kvm);
> +	old = READ_ONCE(*ic);
>  	do {
> -		old = READ_ONCE(*ic);
>  		if (old.k) {
>  			read_unlock(&kvm->arch.sca_lock);
>  			cond_resched();
> @@ -138,7 +138,7 @@ static void ipte_lock_simple(struct kvm *kvm)
>  		}
>  		new = old;
>  		new.k = 1;
> -	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
> +	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
>  	read_unlock(&kvm->arch.sca_lock);
>  out:
>  	mutex_unlock(&kvm->arch.ipte_mutex);
> @@ -154,11 +154,11 @@ static void ipte_unlock_simple(struct kvm *kvm)
>  		goto out;
>  	read_lock(&kvm->arch.sca_lock);
>  	ic = kvm_s390_get_ipte_control(kvm);
> +	old = READ_ONCE(*ic);
>  	do {
> -		old = READ_ONCE(*ic);
>  		new = old;
>  		new.k = 0;
> -	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
> +	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
>  	read_unlock(&kvm->arch.sca_lock);
>  	wake_up(&kvm->arch.ipte_wq);
>  out:
> @@ -172,8 +172,8 @@ static void ipte_lock_siif(struct kvm *kvm)
>  retry:
>  	read_lock(&kvm->arch.sca_lock);
>  	ic = kvm_s390_get_ipte_control(kvm);
> +	old = READ_ONCE(*ic);
>  	do {
> -		old = READ_ONCE(*ic);
>  		if (old.kg) {
>  			read_unlock(&kvm->arch.sca_lock);
>  			cond_resched();
> @@ -182,7 +182,7 @@ static void ipte_lock_siif(struct kvm *kvm)
>  		new = old;
>  		new.k = 1;
>  		new.kh++;
> -	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
> +	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
>  	read_unlock(&kvm->arch.sca_lock);
>  }
>  
> @@ -192,13 +192,13 @@ static void ipte_unlock_siif(struct kvm *kvm)
>  
>  	read_lock(&kvm->arch.sca_lock);
>  	ic = kvm_s390_get_ipte_control(kvm);
> +	old = READ_ONCE(*ic);
>  	do {
> -		old = READ_ONCE(*ic);
>  		new = old;
>  		new.kh--;
>  		if (!new.kh)
>  			new.k = 0;
> -	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
> +	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
>  	read_unlock(&kvm->arch.sca_lock);
>  	if (!new.kh)
>  		wake_up(&kvm->arch.ipte_wq);
> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> index 4f0e7f61edf7..eff69018cbeb 100644
> --- a/arch/s390/kvm/interrupt.c
> +++ b/arch/s390/kvm/interrupt.c
> @@ -247,12 +247,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
>  {
>  	u64 word, _word;
>  
> +	word = READ_ONCE(gisa->u64.word[0]);
>  	do {
> -		word = READ_ONCE(gisa->u64.word[0]);
>  		if ((u64)gisa != word >> 32)
>  			return -EBUSY;
>  		_word = (word & ~0xffUL) | iam;
> -	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
> +	} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
>  
>  	return 0;
>  }
> @@ -270,10 +270,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
>  {
>  	u64 word, _word;
>  
> +	word = READ_ONCE(gisa->u64.word[0]);
>  	do {
> -		word = READ_ONCE(gisa->u64.word[0]);
>  		_word = word & ~(0xffUL << 24);
> -	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
> +	} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
>  }
>  
>  /**
> @@ -291,14 +291,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
>  	u8 pending_mask, alert_mask;
>  	u64 word, _word;
>  
> +	word = READ_ONCE(gi->origin->u64.word[0]);
>  	do {
> -		word = READ_ONCE(gi->origin->u64.word[0]);
>  		alert_mask = READ_ONCE(gi->alert.mask);
>  		pending_mask = (u8)(word >> 24) & alert_mask;
>  		if (pending_mask)
>  			return pending_mask;
>  		_word = (word & ~0xffUL) | alert_mask;
> -	} while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
> +	} while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
>  
>  	return 0;
>  }
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 442d4a227c0e..d8080c27d45b 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -1937,11 +1937,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
>  
>  	read_lock(&kvm->arch.sca_lock);
>  	sca = kvm->arch.sca;
> +	old = READ_ONCE(sca->utility);
>  	do {
> -		old = READ_ONCE(sca->utility);
>  		new = old;
>  		new.mtcr = val;
> -	} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
> +	} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
>  	read_unlock(&kvm->arch.sca_lock);
>  }
>  
> diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
> index a61518b549f0..9b9e7fdd5380 100644
> --- a/arch/s390/kvm/pci.c
> +++ b/arch/s390/kvm/pci.c
> @@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages)
>  
>  	page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>  
> +	cur_pages = atomic_long_read(&user->locked_vm);
>  	do {
> -		cur_pages = atomic_long_read(&user->locked_vm);
>  		new_pages = cur_pages + nr_pages;
>  		if (new_pages > page_limit)
>  			return -ENOMEM;
> -	} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
> -					new_pages) != cur_pages);
> +	} while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
>  
>  	atomic64_add(nr_pages, &current->mm->pinned_vm);
>  


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes
  2024-11-25 11:50 ` [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
@ 2024-11-25 12:20   ` Claudio Imbrenda
  2024-11-25 13:40     ` Heiko Carstens
  0 siblings, 1 reply; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 12:20 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Mon, 25 Nov 2024 12:50:39 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:

> kvm_s390_update_topology_change_report() modifies a single bit within
> sca_utility using cmpxchg(). Given that the size of the sca_utility union
> is two bytes this generates very inefficient code. Change the size to four
> bytes, so better code can be generated.
> 
> Even though the size of sca_utility doesn't reflect architecture anymore
> this seems to be the easiest and most pragmatic approach to avoid
> inefficient code.

wouldn't an atomic bit_op be better in that case?

> 
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
> ---
>  arch/s390/include/asm/kvm_host.h | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 1cd8eaebd3c0..1cb1de232b9e 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -95,10 +95,10 @@ union ipte_control {
>  };
>  
>  union sca_utility {
> -	__u16 val;
> +	__u32 val;
>  	struct {
> -		__u16 mtcr : 1;
> -		__u16 reserved : 15;
> +		__u32 mtcr : 1;
> +		__u32	   : 31;
>  	};
>  };
>  
> @@ -107,7 +107,7 @@ struct bsca_block {
>  	__u64	reserved[5];
>  	__u64	mcn;
>  	union sca_utility utility;
> -	__u8	reserved2[6];
> +	__u8	reserved2[4];
>  	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
>  };
>  
> @@ -115,7 +115,7 @@ struct esca_block {
>  	union ipte_control ipte_control;
>  	__u64   reserved1[6];
>  	union sca_utility utility;
> -	__u8	reserved2[6];
> +	__u8	reserved2[4];
>  	__u64   mcn[4];
>  	__u64   reserved3[20];
>  	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage
  2024-11-25 12:16   ` Claudio Imbrenda
@ 2024-11-25 13:37     ` Heiko Carstens
  2024-11-25 16:20       ` Claudio Imbrenda
  0 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 13:37 UTC (permalink / raw)
  To: Claudio Imbrenda
  Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Mon, Nov 25, 2024 at 01:16:17PM +0100, Claudio Imbrenda wrote:
> On Mon, 25 Nov 2024 12:50:38 +0100
> Heiko Carstens <hca@linux.ibm.com> wrote:
> > @@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
> >  		struct esca_block *sca = vcpu->kvm->arch.sca;
> >  		union esca_sigp_ctrl *sigp_ctrl =
> >  			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> > -		union esca_sigp_ctrl old;
> >  
> > -		old = READ_ONCE(*sigp_ctrl);
> > -		expect = old.value;
> > -		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
> > +		WRITE_ONCE(sigp_ctrl->value, 9);
> 
> that's supposed to be a 0, right?

Duh... yes, of course. I added the "9" to better find the corresponding
code in assembly, and obviously forgot to replace it with 0 again.
Thanks for pointing this out!

Strange enough this still worked. Hmm.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes
  2024-11-25 12:20   ` Claudio Imbrenda
@ 2024-11-25 13:40     ` Heiko Carstens
  2024-11-25 16:17       ` Claudio Imbrenda
  0 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 13:40 UTC (permalink / raw)
  To: Claudio Imbrenda
  Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Mon, Nov 25, 2024 at 01:20:42PM +0100, Claudio Imbrenda wrote:
> On Mon, 25 Nov 2024 12:50:39 +0100
> Heiko Carstens <hca@linux.ibm.com> wrote:
> 
> > kvm_s390_update_topology_change_report() modifies a single bit within
> > sca_utility using cmpxchg(). Given that the size of the sca_utility union
> > is two bytes this generates very inefficient code. Change the size to four
> > bytes, so better code can be generated.
> > 
> > Even though the size of sca_utility doesn't reflect architecture anymore
> > this seems to be the easiest and most pragmatic approach to avoid
> > inefficient code.
> 
> wouldn't an atomic bit_op be better in that case?

I had that, but decided against it, since the generated code isn't shorter.
And it would require and unsigned long type within the union, or a cast,
which I also both disliked.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes
  2024-11-25 13:40     ` Heiko Carstens
@ 2024-11-25 16:17       ` Claudio Imbrenda
  0 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 16:17 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Mon, 25 Nov 2024 14:40:22 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:

> On Mon, Nov 25, 2024 at 01:20:42PM +0100, Claudio Imbrenda wrote:
> > On Mon, 25 Nov 2024 12:50:39 +0100
> > Heiko Carstens <hca@linux.ibm.com> wrote:
> >   
> > > kvm_s390_update_topology_change_report() modifies a single bit within
> > > sca_utility using cmpxchg(). Given that the size of the sca_utility union
> > > is two bytes this generates very inefficient code. Change the size to four
> > > bytes, so better code can be generated.
> > > 
> > > Even though the size of sca_utility doesn't reflect architecture anymore
> > > this seems to be the easiest and most pragmatic approach to avoid
> > > inefficient code.  
> > 
> > wouldn't an atomic bit_op be better in that case?  
> 
> I had that, but decided against it, since the generated code isn't shorter.
> And it would require and unsigned long type within the union, or a cast,
> which I also both disliked.

fair enough

Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage
  2024-11-25 13:37     ` Heiko Carstens
@ 2024-11-25 16:20       ` Claudio Imbrenda
  0 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 16:20 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Mon, 25 Nov 2024 14:37:55 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:

> On Mon, Nov 25, 2024 at 01:16:17PM +0100, Claudio Imbrenda wrote:
> > On Mon, 25 Nov 2024 12:50:38 +0100
> > Heiko Carstens <hca@linux.ibm.com> wrote:  
> > > @@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
> > >  		struct esca_block *sca = vcpu->kvm->arch.sca;
> > >  		union esca_sigp_ctrl *sigp_ctrl =
> > >  			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> > > -		union esca_sigp_ctrl old;
> > >  
> > > -		old = READ_ONCE(*sigp_ctrl);
> > > -		expect = old.value;
> > > -		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
> > > +		WRITE_ONCE(sigp_ctrl->value, 9);  
> > 
> > that's supposed to be a 0, right?  
> 
> Duh... yes, of course. I added the "9" to better find the corresponding
> code in assembly, and obviously forgot to replace it with 0 again.
> Thanks for pointing this out!
> 
> Strange enough this still worked. Hmm.

with that fixed:

Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2024-11-25 16:21 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-25 11:50 [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
2024-11-25 12:18   ` Claudio Imbrenda
2024-11-25 11:50 ` [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
2024-11-25 12:16   ` Claudio Imbrenda
2024-11-25 13:37     ` Heiko Carstens
2024-11-25 16:20       ` Claudio Imbrenda
2024-11-25 11:50 ` [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
2024-11-25 12:20   ` Claudio Imbrenda
2024-11-25 13:40     ` Heiko Carstens
2024-11-25 16:17       ` Claudio Imbrenda

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox