public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations
@ 2024-11-26 10:25 Heiko Carstens
  2024-11-26 10:25 ` [PATCH v2 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
                   ` (4 more replies)
  0 siblings, 5 replies; 10+ messages in thread
From: Heiko Carstens @ 2024-11-26 10:25 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

v2:
- Replace broken WRITE_ONCE(..., 9) with intended WRITE_ONCE(..., 0).

v1:
Use try_cmpxchg() instead of cmpxchg() so compilers with flag output
operand support (gcc 14 and newer) can generate slightly better code.

Also get rid of two cmpxchg() usages on one/two byte memory areas
which generates inefficient code.

bloat-o-meter statistics of the kvm module:

add/remove: 0/0 grow/shrink: 0/11 up/down: 0/-318 (-318)
Function                                     old     new   delta
kvm_s390_handle_wait                         886     880      -6
kvm_s390_gisa_destroy                        226     220      -6
kvm_s390_gisa_clear                           96      90      -6
ipte_unlock                                  380     372      -8
kvm_s390_gisc_unregister                     270     260     -10
kvm_s390_gisc_register                       290     280     -10
gisa_vcpu_kicker                             200     190     -10
account_mem                                  250     232     -18
ipte_lock                                    416     368     -48
kvm_s390_update_topology_change_report       174     122     -52
kvm_s390_clear_local_irqs                    420     276    -144
Total: Before=316521, After=316203, chg -0.10%

Heiko Carstens (3):
  KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
  KVM: s390: Remove one byte cmpxchg() usage
  KVM: s390: Increase size of union sca_utility to four bytes

 arch/s390/include/asm/kvm_host.h | 10 +++++-----
 arch/s390/kvm/gaccess.c          | 16 ++++++++--------
 arch/s390/kvm/interrupt.c        | 25 ++++++++-----------------
 arch/s390/kvm/kvm-s390.c         |  4 ++--
 arch/s390/kvm/pci.c              |  5 ++---
 5 files changed, 25 insertions(+), 35 deletions(-)

-- 
2.45.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH v2 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
  2024-11-26 10:25 [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
@ 2024-11-26 10:25 ` Heiko Carstens
  2024-11-26 10:25 ` [PATCH v2 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Heiko Carstens @ 2024-11-26 10:25 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

Convert all cmpxchg() loops to try_cmpxchg() loops. With gcc 14 and the
usage of flag output operands in try_cmpxchg() this allows the compiler to
generate slightly better code.

Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kvm/gaccess.c   | 16 ++++++++--------
 arch/s390/kvm/interrupt.c | 12 ++++++------
 arch/s390/kvm/kvm-s390.c  |  4 ++--
 arch/s390/kvm/pci.c       |  5 ++---
 4 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index a688351f4ab5..9816b0060fbe 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -129,8 +129,8 @@ static void ipte_lock_simple(struct kvm *kvm)
 retry:
 	read_lock(&kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		if (old.k) {
 			read_unlock(&kvm->arch.sca_lock);
 			cond_resched();
@@ -138,7 +138,7 @@ static void ipte_lock_simple(struct kvm *kvm)
 		}
 		new = old;
 		new.k = 1;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 out:
 	mutex_unlock(&kvm->arch.ipte_mutex);
@@ -154,11 +154,11 @@ static void ipte_unlock_simple(struct kvm *kvm)
 		goto out;
 	read_lock(&kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		new = old;
 		new.k = 0;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 	wake_up(&kvm->arch.ipte_wq);
 out:
@@ -172,8 +172,8 @@ static void ipte_lock_siif(struct kvm *kvm)
 retry:
 	read_lock(&kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		if (old.kg) {
 			read_unlock(&kvm->arch.sca_lock);
 			cond_resched();
@@ -182,7 +182,7 @@ static void ipte_lock_siif(struct kvm *kvm)
 		new = old;
 		new.k = 1;
 		new.kh++;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 }
 
@@ -192,13 +192,13 @@ static void ipte_unlock_siif(struct kvm *kvm)
 
 	read_lock(&kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(kvm);
+	old = READ_ONCE(*ic);
 	do {
-		old = READ_ONCE(*ic);
 		new = old;
 		new.kh--;
 		if (!new.kh)
 			new.k = 0;
-	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&ic->val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 	if (!new.kh)
 		wake_up(&kvm->arch.ipte_wq);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4f0e7f61edf7..eff69018cbeb 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -247,12 +247,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
 {
 	u64 word, _word;
 
+	word = READ_ONCE(gisa->u64.word[0]);
 	do {
-		word = READ_ONCE(gisa->u64.word[0]);
 		if ((u64)gisa != word >> 32)
 			return -EBUSY;
 		_word = (word & ~0xffUL) | iam;
-	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+	} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
 
 	return 0;
 }
@@ -270,10 +270,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
 {
 	u64 word, _word;
 
+	word = READ_ONCE(gisa->u64.word[0]);
 	do {
-		word = READ_ONCE(gisa->u64.word[0]);
 		_word = word & ~(0xffUL << 24);
-	} while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+	} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
 }
 
 /**
@@ -291,14 +291,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
 	u8 pending_mask, alert_mask;
 	u64 word, _word;
 
+	word = READ_ONCE(gi->origin->u64.word[0]);
 	do {
-		word = READ_ONCE(gi->origin->u64.word[0]);
 		alert_mask = READ_ONCE(gi->alert.mask);
 		pending_mask = (u8)(word >> 24) & alert_mask;
 		if (pending_mask)
 			return pending_mask;
 		_word = (word & ~0xffUL) | alert_mask;
-	} while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
+	} while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
 
 	return 0;
 }
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 442d4a227c0e..d8080c27d45b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1937,11 +1937,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
 
 	read_lock(&kvm->arch.sca_lock);
 	sca = kvm->arch.sca;
+	old = READ_ONCE(sca->utility);
 	do {
-		old = READ_ONCE(sca->utility);
 		new = old;
 		new.mtcr = val;
-	} while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
+	} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
 	read_unlock(&kvm->arch.sca_lock);
 }
 
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index a61518b549f0..9b9e7fdd5380 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages)
 
 	page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
 
+	cur_pages = atomic_long_read(&user->locked_vm);
 	do {
-		cur_pages = atomic_long_read(&user->locked_vm);
 		new_pages = cur_pages + nr_pages;
 		if (new_pages > page_limit)
 			return -ENOMEM;
-	} while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
-					new_pages) != cur_pages);
+	} while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
 
 	atomic64_add(nr_pages, &current->mm->pinned_vm);
 
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 2/3] KVM: s390: Remove one byte cmpxchg() usage
  2024-11-26 10:25 [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
  2024-11-26 10:25 ` [PATCH v2 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
@ 2024-11-26 10:25 ` Heiko Carstens
  2024-11-26 10:25 ` [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 10+ messages in thread
From: Heiko Carstens @ 2024-11-26 10:25 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

Within sca_clear_ext_call() cmpxchg() is used to clear one or two bytes
(depending on sca format). The cmpxchg() calls are not supposed to fail; if
so that would be a bug. Given that cmpxchg() usage on one and two byte
areas generates very inefficient code, replace them with block concurrent
WRITE_ONCE() calls, and remove the WARN_ON().

Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/kvm/interrupt.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index eff69018cbeb..ea8dce299954 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -118,8 +118,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 
 static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 {
-	int rc, expect;
-
 	if (!kvm_s390_use_sca_entries())
 		return;
 	kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
@@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 		struct esca_block *sca = vcpu->kvm->arch.sca;
 		union esca_sigp_ctrl *sigp_ctrl =
 			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-		union esca_sigp_ctrl old;
 
-		old = READ_ONCE(*sigp_ctrl);
-		expect = old.value;
-		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+		WRITE_ONCE(sigp_ctrl->value, 0);
 	} else {
 		struct bsca_block *sca = vcpu->kvm->arch.sca;
 		union bsca_sigp_ctrl *sigp_ctrl =
 			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-		union bsca_sigp_ctrl old;
 
-		old = READ_ONCE(*sigp_ctrl);
-		expect = old.value;
-		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+		WRITE_ONCE(sigp_ctrl->value, 0);
 	}
 	read_unlock(&vcpu->kvm->arch.sca_lock);
-	WARN_ON(rc != expect); /* cannot clear? */
 }
 
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes
  2024-11-26 10:25 [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
  2024-11-26 10:25 ` [PATCH v2 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
  2024-11-26 10:25 ` [PATCH v2 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
@ 2024-11-26 10:25 ` Heiko Carstens
  2024-11-26 11:57   ` Claudio Imbrenda
  2024-11-26 12:09   ` Janosch Frank
  2024-11-26 12:11 ` [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations Claudio Imbrenda
  2024-11-26 12:26 ` Janosch Frank
  4 siblings, 2 replies; 10+ messages in thread
From: Heiko Carstens @ 2024-11-26 10:25 UTC (permalink / raw)
  To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

kvm_s390_update_topology_change_report() modifies a single bit within
sca_utility using cmpxchg(). Given that the size of the sca_utility union
is two bytes this generates very inefficient code. Change the size to four
bytes, so better code can be generated.

Even though the size of sca_utility doesn't reflect architecture anymore
this seems to be the easiest and most pragmatic approach to avoid
inefficient code.

Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1cd8eaebd3c0..1cb1de232b9e 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -95,10 +95,10 @@ union ipte_control {
 };
 
 union sca_utility {
-	__u16 val;
+	__u32 val;
 	struct {
-		__u16 mtcr : 1;
-		__u16 reserved : 15;
+		__u32 mtcr : 1;
+		__u32	   : 31;
 	};
 };
 
@@ -107,7 +107,7 @@ struct bsca_block {
 	__u64	reserved[5];
 	__u64	mcn;
 	union sca_utility utility;
-	__u8	reserved2[6];
+	__u8	reserved2[4];
 	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
 };
 
@@ -115,7 +115,7 @@ struct esca_block {
 	union ipte_control ipte_control;
 	__u64   reserved1[6];
 	union sca_utility utility;
-	__u8	reserved2[6];
+	__u8	reserved2[4];
 	__u64   mcn[4];
 	__u64   reserved3[20];
 	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
-- 
2.45.2


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes
  2024-11-26 10:25 ` [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
@ 2024-11-26 11:57   ` Claudio Imbrenda
  2024-11-26 12:09   ` Janosch Frank
  1 sibling, 0 replies; 10+ messages in thread
From: Claudio Imbrenda @ 2024-11-26 11:57 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Tue, 26 Nov 2024 11:25:15 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:

> kvm_s390_update_topology_change_report() modifies a single bit within
> sca_utility using cmpxchg(). Given that the size of the sca_utility union
> is two bytes this generates very inefficient code. Change the size to four
> bytes, so better code can be generated.
> 
> Even though the size of sca_utility doesn't reflect architecture anymore
> this seems to be the easiest and most pragmatic approach to avoid
> inefficient code.
> 
> Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
> ---
>  arch/s390/include/asm/kvm_host.h | 10 +++++-----
>  1 file changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 1cd8eaebd3c0..1cb1de232b9e 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -95,10 +95,10 @@ union ipte_control {
>  };
>  
>  union sca_utility {
> -	__u16 val;
> +	__u32 val;

I know I said the patch was fine but I realised now that I would like a
short comment here explaining that 32 bits allows for more efficient
code

you can add it when picking, no need to send a v3

>  	struct {
> -		__u16 mtcr : 1;
> -		__u16 reserved : 15;
> +		__u32 mtcr : 1;
> +		__u32	   : 31;
>  	};
>  };
>  
> @@ -107,7 +107,7 @@ struct bsca_block {
>  	__u64	reserved[5];
>  	__u64	mcn;
>  	union sca_utility utility;
> -	__u8	reserved2[6];
> +	__u8	reserved2[4];
>  	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
>  };
>  
> @@ -115,7 +115,7 @@ struct esca_block {
>  	union ipte_control ipte_control;
>  	__u64   reserved1[6];
>  	union sca_utility utility;
> -	__u8	reserved2[6];
> +	__u8	reserved2[4];
>  	__u64   mcn[4];
>  	__u64   reserved3[20];
>  	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes
  2024-11-26 10:25 ` [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
  2024-11-26 11:57   ` Claudio Imbrenda
@ 2024-11-26 12:09   ` Janosch Frank
  2024-11-26 12:21     ` Claudio Imbrenda
  1 sibling, 1 reply; 10+ messages in thread
From: Janosch Frank @ 2024-11-26 12:09 UTC (permalink / raw)
  To: Heiko Carstens, Christian Borntraeger, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

On 11/26/24 11:25 AM, Heiko Carstens wrote:
[...]
>   union sca_utility {

Would you mind adding a comment?


""Utility is defined as 2 bytes but having it 4 bytes wide generates 
more efficient code. Since the following bytes are reserved this makes 
no functional difference.""

> -	__u16 val;
> +	__u32 val;
>   	struct {
> -		__u16 mtcr : 1;
> -		__u16 reserved : 15;
> +		__u32 mtcr : 1;
> +		__u32	   : 31;
>   	};
>   };
>   
> @@ -107,7 +107,7 @@ struct bsca_block {
>   	__u64	reserved[5];
>   	__u64	mcn;
>   	union sca_utility utility;
> -	__u8	reserved2[6];
> +	__u8	reserved2[4];
>   	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
>   };
>   
> @@ -115,7 +115,7 @@ struct esca_block {
>   	union ipte_control ipte_control;
>   	__u64   reserved1[6];
>   	union sca_utility utility;
> -	__u8	reserved2[6];
> +	__u8	reserved2[4];
>   	__u64   mcn[4];
>   	__u64   reserved3[20];
>   	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations
  2024-11-26 10:25 [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
                   ` (2 preceding siblings ...)
  2024-11-26 10:25 ` [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
@ 2024-11-26 12:11 ` Claudio Imbrenda
  2024-11-26 12:26 ` Janosch Frank
  4 siblings, 0 replies; 10+ messages in thread
From: Claudio Imbrenda @ 2024-11-26 12:11 UTC (permalink / raw)
  To: Heiko Carstens
  Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Tue, 26 Nov 2024 11:25:12 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:

looks good, and it's a follow-up of your cmpxchg series, so I think it
should go through the s390 kernel tree

(but please try to add the comment in patch 3)

> v2:
> - Replace broken WRITE_ONCE(..., 9) with intended WRITE_ONCE(..., 0).
> 
> v1:
> Use try_cmpxchg() instead of cmpxchg() so compilers with flag output
> operand support (gcc 14 and newer) can generate slightly better code.
> 
> Also get rid of two cmpxchg() usages on one/two byte memory areas
> which generates inefficient code.
> 
> bloat-o-meter statistics of the kvm module:
> 
> add/remove: 0/0 grow/shrink: 0/11 up/down: 0/-318 (-318)
> Function                                     old     new   delta
> kvm_s390_handle_wait                         886     880      -6
> kvm_s390_gisa_destroy                        226     220      -6
> kvm_s390_gisa_clear                           96      90      -6
> ipte_unlock                                  380     372      -8
> kvm_s390_gisc_unregister                     270     260     -10
> kvm_s390_gisc_register                       290     280     -10
> gisa_vcpu_kicker                             200     190     -10
> account_mem                                  250     232     -18
> ipte_lock                                    416     368     -48
> kvm_s390_update_topology_change_report       174     122     -52
> kvm_s390_clear_local_irqs                    420     276    -144
> Total: Before=316521, After=316203, chg -0.10%
> 
> Heiko Carstens (3):
>   KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
>   KVM: s390: Remove one byte cmpxchg() usage
>   KVM: s390: Increase size of union sca_utility to four bytes
> 
>  arch/s390/include/asm/kvm_host.h | 10 +++++-----
>  arch/s390/kvm/gaccess.c          | 16 ++++++++--------
>  arch/s390/kvm/interrupt.c        | 25 ++++++++-----------------
>  arch/s390/kvm/kvm-s390.c         |  4 ++--
>  arch/s390/kvm/pci.c              |  5 ++---
>  5 files changed, 25 insertions(+), 35 deletions(-)
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes
  2024-11-26 12:09   ` Janosch Frank
@ 2024-11-26 12:21     ` Claudio Imbrenda
  2024-11-26 13:33       ` Heiko Carstens
  0 siblings, 1 reply; 10+ messages in thread
From: Claudio Imbrenda @ 2024-11-26 12:21 UTC (permalink / raw)
  To: Janosch Frank
  Cc: Heiko Carstens, Christian Borntraeger, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Tue, 26 Nov 2024 13:09:56 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:

> On 11/26/24 11:25 AM, Heiko Carstens wrote:
> [...]
> >   union sca_utility {  
> 
> Would you mind adding a comment?
> 
> 
> ""Utility is defined as 2 bytes but having it 4 bytes wide generates 
> more efficient code. Since the following bytes are reserved this makes 
> no functional difference.""

looks good, thanks!

> 
> > -	__u16 val;
> > +	__u32 val;
> >   	struct {
> > -		__u16 mtcr : 1;
> > -		__u16 reserved : 15;
> > +		__u32 mtcr : 1;
> > +		__u32	   : 31;
> >   	};
> >   };
> >   
> > @@ -107,7 +107,7 @@ struct bsca_block {
> >   	__u64	reserved[5];
> >   	__u64	mcn;
> >   	union sca_utility utility;
> > -	__u8	reserved2[6];
> > +	__u8	reserved2[4];
> >   	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
> >   };
> >   
> > @@ -115,7 +115,7 @@ struct esca_block {
> >   	union ipte_control ipte_control;
> >   	__u64   reserved1[6];
> >   	union sca_utility utility;
> > -	__u8	reserved2[6];
> > +	__u8	reserved2[4];
> >   	__u64   mcn[4];
> >   	__u64   reserved3[20];
> >   	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];  
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations
  2024-11-26 10:25 [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
                   ` (3 preceding siblings ...)
  2024-11-26 12:11 ` [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations Claudio Imbrenda
@ 2024-11-26 12:26 ` Janosch Frank
  4 siblings, 0 replies; 10+ messages in thread
From: Janosch Frank @ 2024-11-26 12:26 UTC (permalink / raw)
  To: Heiko Carstens, Christian Borntraeger, Claudio Imbrenda,
	David Hildenbrand
  Cc: kvm, linux-s390, linux-kernel

On 11/26/24 11:25 AM, Heiko Carstens wrote:
> v2:
> - Replace broken WRITE_ONCE(..., 9) with intended WRITE_ONCE(..., 0).
> 
> v1:
> Use try_cmpxchg() instead of cmpxchg() so compilers with flag output
> operand support (gcc 14 and newer) can generate slightly better code.
> 
> Also get rid of two cmpxchg() usages on one/two byte memory areas
> which generates inefficient code.
> 
> bloat-o-meter statistics of the kvm module:
> 

Looks good to me, Ack to the series.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes
  2024-11-26 12:21     ` Claudio Imbrenda
@ 2024-11-26 13:33       ` Heiko Carstens
  0 siblings, 0 replies; 10+ messages in thread
From: Heiko Carstens @ 2024-11-26 13:33 UTC (permalink / raw)
  To: Claudio Imbrenda
  Cc: Janosch Frank, Christian Borntraeger, David Hildenbrand, kvm,
	linux-s390, linux-kernel

On Tue, Nov 26, 2024 at 01:21:52PM +0100, Claudio Imbrenda wrote:
> On Tue, 26 Nov 2024 13:09:56 +0100
> Janosch Frank <frankja@linux.ibm.com> wrote:
> 
> > On 11/26/24 11:25 AM, Heiko Carstens wrote:
> > [...]
> > >   union sca_utility {  
> > 
> > Would you mind adding a comment?
> > 
> > 
> > ""Utility is defined as 2 bytes but having it 4 bytes wide generates 
> > more efficient code. Since the following bytes are reserved this makes 
> > no functional difference.""
> 
> looks good, thanks!

Thanks a lot! I added the comment and applied the series.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2024-11-26 13:33 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-26 10:25 [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
2024-11-26 10:25 ` [PATCH v2 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
2024-11-26 10:25 ` [PATCH v2 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
2024-11-26 10:25 ` [PATCH v2 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
2024-11-26 11:57   ` Claudio Imbrenda
2024-11-26 12:09   ` Janosch Frank
2024-11-26 12:21     ` Claudio Imbrenda
2024-11-26 13:33       ` Heiko Carstens
2024-11-26 12:11 ` [PATCH v2 0/3] KVM: s390: Couple of small cmpxchg() optimizations Claudio Imbrenda
2024-11-26 12:26 ` Janosch Frank

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox