* [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations
@ 2024-11-25 11:50 Heiko Carstens
2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 11:50 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
David Hildenbrand
Cc: kvm, linux-s390, linux-kernel
Use try_cmpxchg() instead of cmpxchg() so compilers with flag output
operand support (gcc 14 and newer) can generate slightly better code.
Also get rid of two cmpxchg() usages on one/two byte memory areas
which generates inefficient code.
bloat-o-meter statistics of the kvm module:
add/remove: 0/0 grow/shrink: 0/11 up/down: 0/-318 (-318)
Function old new delta
kvm_s390_handle_wait 886 880 -6
kvm_s390_gisa_destroy 226 220 -6
kvm_s390_gisa_clear 96 90 -6
ipte_unlock 380 372 -8
kvm_s390_gisc_unregister 270 260 -10
kvm_s390_gisc_register 290 280 -10
gisa_vcpu_kicker 200 190 -10
account_mem 250 232 -18
ipte_lock 416 368 -48
kvm_s390_update_topology_change_report 174 122 -52
kvm_s390_clear_local_irqs 420 276 -144
Total: Before=316521, After=316203, chg -0.10%
Heiko Carstens (3):
KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
KVM: s390: Remove one byte cmpxchg() usage
KVM: s390: Increase size of union sca_utility to four bytes
arch/s390/include/asm/kvm_host.h | 10 +++++-----
arch/s390/kvm/gaccess.c | 16 ++++++++--------
arch/s390/kvm/interrupt.c | 25 ++++++++-----------------
arch/s390/kvm/kvm-s390.c | 4 ++--
arch/s390/kvm/pci.c | 5 ++---
5 files changed, 25 insertions(+), 35 deletions(-)
base-commit: 9f16d5e6f220661f73b36a4be1b21575651d8833
--
2.45.2
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
2024-11-25 11:50 [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
@ 2024-11-25 11:50 ` Heiko Carstens
2024-11-25 12:18 ` Claudio Imbrenda
2024-11-25 11:50 ` [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
2024-11-25 11:50 ` [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
2 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 11:50 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
David Hildenbrand
Cc: kvm, linux-s390, linux-kernel
Convert all cmpxchg() loops to try_cmpxchg() loops. With gcc 14 and the
usage of flag output operands in try_cmpxchg() this allows the compiler to
generate slightly better code.
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
arch/s390/kvm/gaccess.c | 16 ++++++++--------
arch/s390/kvm/interrupt.c | 12 ++++++------
arch/s390/kvm/kvm-s390.c | 4 ++--
arch/s390/kvm/pci.c | 5 ++---
4 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index a688351f4ab5..9816b0060fbe 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -129,8 +129,8 @@ static void ipte_lock_simple(struct kvm *kvm)
retry:
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
if (old.k) {
read_unlock(&kvm->arch.sca_lock);
cond_resched();
@@ -138,7 +138,7 @@ static void ipte_lock_simple(struct kvm *kvm)
}
new = old;
new.k = 1;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
out:
mutex_unlock(&kvm->arch.ipte_mutex);
@@ -154,11 +154,11 @@ static void ipte_unlock_simple(struct kvm *kvm)
goto out;
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
new = old;
new.k = 0;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
wake_up(&kvm->arch.ipte_wq);
out:
@@ -172,8 +172,8 @@ static void ipte_lock_siif(struct kvm *kvm)
retry:
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
if (old.kg) {
read_unlock(&kvm->arch.sca_lock);
cond_resched();
@@ -182,7 +182,7 @@ static void ipte_lock_siif(struct kvm *kvm)
new = old;
new.k = 1;
new.kh++;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
}
@@ -192,13 +192,13 @@ static void ipte_unlock_siif(struct kvm *kvm)
read_lock(&kvm->arch.sca_lock);
ic = kvm_s390_get_ipte_control(kvm);
+ old = READ_ONCE(*ic);
do {
- old = READ_ONCE(*ic);
new = old;
new.kh--;
if (!new.kh)
new.k = 0;
- } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+ } while (!try_cmpxchg(&ic->val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
if (!new.kh)
wake_up(&kvm->arch.ipte_wq);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 4f0e7f61edf7..eff69018cbeb 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -247,12 +247,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
{
u64 word, _word;
+ word = READ_ONCE(gisa->u64.word[0]);
do {
- word = READ_ONCE(gisa->u64.word[0]);
if ((u64)gisa != word >> 32)
return -EBUSY;
_word = (word & ~0xffUL) | iam;
- } while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+ } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
return 0;
}
@@ -270,10 +270,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
{
u64 word, _word;
+ word = READ_ONCE(gisa->u64.word[0]);
do {
- word = READ_ONCE(gisa->u64.word[0]);
_word = word & ~(0xffUL << 24);
- } while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
+ } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
}
/**
@@ -291,14 +291,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
u8 pending_mask, alert_mask;
u64 word, _word;
+ word = READ_ONCE(gi->origin->u64.word[0]);
do {
- word = READ_ONCE(gi->origin->u64.word[0]);
alert_mask = READ_ONCE(gi->alert.mask);
pending_mask = (u8)(word >> 24) & alert_mask;
if (pending_mask)
return pending_mask;
_word = (word & ~0xffUL) | alert_mask;
- } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
+ } while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
return 0;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 442d4a227c0e..d8080c27d45b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1937,11 +1937,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
read_lock(&kvm->arch.sca_lock);
sca = kvm->arch.sca;
+ old = READ_ONCE(sca->utility);
do {
- old = READ_ONCE(sca->utility);
new = old;
new.mtcr = val;
- } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
+ } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
}
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index a61518b549f0..9b9e7fdd5380 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages)
page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ cur_pages = atomic_long_read(&user->locked_vm);
do {
- cur_pages = atomic_long_read(&user->locked_vm);
new_pages = cur_pages + nr_pages;
if (new_pages > page_limit)
return -ENOMEM;
- } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
- new_pages) != cur_pages);
+ } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
atomic64_add(nr_pages, ¤t->mm->pinned_vm);
--
2.45.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage
2024-11-25 11:50 [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
@ 2024-11-25 11:50 ` Heiko Carstens
2024-11-25 12:16 ` Claudio Imbrenda
2024-11-25 11:50 ` [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
2 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 11:50 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
David Hildenbrand
Cc: kvm, linux-s390, linux-kernel
Within sca_clear_ext_call() cmpxchg() is used to clear one or two bytes
(depending on sca format). The cmpxchg() calls are not supposed to fail; if
so that would be a bug. Given that cmpxchg() usage on one and two byte
areas generates very inefficient code, replace them with block concurrent
WRITE_ONCE() calls, and remove the WARN_ON().
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
arch/s390/kvm/interrupt.c | 13 ++-----------
1 file changed, 2 insertions(+), 11 deletions(-)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index eff69018cbeb..3fd21037479f 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -118,8 +118,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
{
- int rc, expect;
-
if (!kvm_s390_use_sca_entries())
return;
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
@@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
struct esca_block *sca = vcpu->kvm->arch.sca;
union esca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union esca_sigp_ctrl old;
- old = READ_ONCE(*sigp_ctrl);
- expect = old.value;
- rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+ WRITE_ONCE(sigp_ctrl->value, 9);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
union bsca_sigp_ctrl *sigp_ctrl =
&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
- union bsca_sigp_ctrl old;
- old = READ_ONCE(*sigp_ctrl);
- expect = old.value;
- rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+ WRITE_ONCE(sigp_ctrl->value, 0);
}
read_unlock(&vcpu->kvm->arch.sca_lock);
- WARN_ON(rc != expect); /* cannot clear? */
}
int psw_extint_disabled(struct kvm_vcpu *vcpu)
--
2.45.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes
2024-11-25 11:50 [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
2024-11-25 11:50 ` [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
@ 2024-11-25 11:50 ` Heiko Carstens
2024-11-25 12:20 ` Claudio Imbrenda
2 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 11:50 UTC (permalink / raw)
To: Christian Borntraeger, Janosch Frank, Claudio Imbrenda,
David Hildenbrand
Cc: kvm, linux-s390, linux-kernel
kvm_s390_update_topology_change_report() modifies a single bit within
sca_utility using cmpxchg(). Given that the size of the sca_utility union
is two bytes this generates very inefficient code. Change the size to four
bytes, so better code can be generated.
Even though the size of sca_utility doesn't reflect architecture anymore
this seems to be the easiest and most pragmatic approach to avoid
inefficient code.
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
arch/s390/include/asm/kvm_host.h | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 1cd8eaebd3c0..1cb1de232b9e 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -95,10 +95,10 @@ union ipte_control {
};
union sca_utility {
- __u16 val;
+ __u32 val;
struct {
- __u16 mtcr : 1;
- __u16 reserved : 15;
+ __u32 mtcr : 1;
+ __u32 : 31;
};
};
@@ -107,7 +107,7 @@ struct bsca_block {
__u64 reserved[5];
__u64 mcn;
union sca_utility utility;
- __u8 reserved2[6];
+ __u8 reserved2[4];
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
};
@@ -115,7 +115,7 @@ struct esca_block {
union ipte_control ipte_control;
__u64 reserved1[6];
union sca_utility utility;
- __u8 reserved2[6];
+ __u8 reserved2[4];
__u64 mcn[4];
__u64 reserved3[20];
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
--
2.45.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage
2024-11-25 11:50 ` [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
@ 2024-11-25 12:16 ` Claudio Imbrenda
2024-11-25 13:37 ` Heiko Carstens
0 siblings, 1 reply; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 12:16 UTC (permalink / raw)
To: Heiko Carstens
Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
linux-s390, linux-kernel
On Mon, 25 Nov 2024 12:50:38 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:
> Within sca_clear_ext_call() cmpxchg() is used to clear one or two bytes
> (depending on sca format). The cmpxchg() calls are not supposed to fail; if
> so that would be a bug. Given that cmpxchg() usage on one and two byte
> areas generates very inefficient code, replace them with block concurrent
> WRITE_ONCE() calls, and remove the WARN_ON().
>
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
> ---
> arch/s390/kvm/interrupt.c | 13 ++-----------
> 1 file changed, 2 insertions(+), 11 deletions(-)
>
> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> index eff69018cbeb..3fd21037479f 100644
> --- a/arch/s390/kvm/interrupt.c
> +++ b/arch/s390/kvm/interrupt.c
> @@ -118,8 +118,6 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
>
> static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
> {
> - int rc, expect;
> -
> if (!kvm_s390_use_sca_entries())
> return;
> kvm_s390_clear_cpuflags(vcpu, CPUSTAT_ECALL_PEND);
> @@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
> struct esca_block *sca = vcpu->kvm->arch.sca;
> union esca_sigp_ctrl *sigp_ctrl =
> &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> - union esca_sigp_ctrl old;
>
> - old = READ_ONCE(*sigp_ctrl);
> - expect = old.value;
> - rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
> + WRITE_ONCE(sigp_ctrl->value, 9);
that's supposed to be a 0, right?
> } else {
> struct bsca_block *sca = vcpu->kvm->arch.sca;
> union bsca_sigp_ctrl *sigp_ctrl =
> &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> - union bsca_sigp_ctrl old;
>
> - old = READ_ONCE(*sigp_ctrl);
> - expect = old.value;
> - rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
> + WRITE_ONCE(sigp_ctrl->value, 0);
> }
> read_unlock(&vcpu->kvm->arch.sca_lock);
> - WARN_ON(rc != expect); /* cannot clear? */
> }
>
> int psw_extint_disabled(struct kvm_vcpu *vcpu)
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops
2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
@ 2024-11-25 12:18 ` Claudio Imbrenda
0 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 12:18 UTC (permalink / raw)
To: Heiko Carstens
Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
linux-s390, linux-kernel
On Mon, 25 Nov 2024 12:50:37 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:
> Convert all cmpxchg() loops to try_cmpxchg() loops. With gcc 14 and the
> usage of flag output operands in try_cmpxchg() this allows the compiler to
> generate slightly better code.
>
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Looks straightforward
Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
> ---
> arch/s390/kvm/gaccess.c | 16 ++++++++--------
> arch/s390/kvm/interrupt.c | 12 ++++++------
> arch/s390/kvm/kvm-s390.c | 4 ++--
> arch/s390/kvm/pci.c | 5 ++---
> 4 files changed, 18 insertions(+), 19 deletions(-)
>
> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
> index a688351f4ab5..9816b0060fbe 100644
> --- a/arch/s390/kvm/gaccess.c
> +++ b/arch/s390/kvm/gaccess.c
> @@ -129,8 +129,8 @@ static void ipte_lock_simple(struct kvm *kvm)
> retry:
> read_lock(&kvm->arch.sca_lock);
> ic = kvm_s390_get_ipte_control(kvm);
> + old = READ_ONCE(*ic);
> do {
> - old = READ_ONCE(*ic);
> if (old.k) {
> read_unlock(&kvm->arch.sca_lock);
> cond_resched();
> @@ -138,7 +138,7 @@ static void ipte_lock_simple(struct kvm *kvm)
> }
> new = old;
> new.k = 1;
> - } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
> + } while (!try_cmpxchg(&ic->val, &old.val, new.val));
> read_unlock(&kvm->arch.sca_lock);
> out:
> mutex_unlock(&kvm->arch.ipte_mutex);
> @@ -154,11 +154,11 @@ static void ipte_unlock_simple(struct kvm *kvm)
> goto out;
> read_lock(&kvm->arch.sca_lock);
> ic = kvm_s390_get_ipte_control(kvm);
> + old = READ_ONCE(*ic);
> do {
> - old = READ_ONCE(*ic);
> new = old;
> new.k = 0;
> - } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
> + } while (!try_cmpxchg(&ic->val, &old.val, new.val));
> read_unlock(&kvm->arch.sca_lock);
> wake_up(&kvm->arch.ipte_wq);
> out:
> @@ -172,8 +172,8 @@ static void ipte_lock_siif(struct kvm *kvm)
> retry:
> read_lock(&kvm->arch.sca_lock);
> ic = kvm_s390_get_ipte_control(kvm);
> + old = READ_ONCE(*ic);
> do {
> - old = READ_ONCE(*ic);
> if (old.kg) {
> read_unlock(&kvm->arch.sca_lock);
> cond_resched();
> @@ -182,7 +182,7 @@ static void ipte_lock_siif(struct kvm *kvm)
> new = old;
> new.k = 1;
> new.kh++;
> - } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
> + } while (!try_cmpxchg(&ic->val, &old.val, new.val));
> read_unlock(&kvm->arch.sca_lock);
> }
>
> @@ -192,13 +192,13 @@ static void ipte_unlock_siif(struct kvm *kvm)
>
> read_lock(&kvm->arch.sca_lock);
> ic = kvm_s390_get_ipte_control(kvm);
> + old = READ_ONCE(*ic);
> do {
> - old = READ_ONCE(*ic);
> new = old;
> new.kh--;
> if (!new.kh)
> new.k = 0;
> - } while (cmpxchg(&ic->val, old.val, new.val) != old.val);
> + } while (!try_cmpxchg(&ic->val, &old.val, new.val));
> read_unlock(&kvm->arch.sca_lock);
> if (!new.kh)
> wake_up(&kvm->arch.ipte_wq);
> diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
> index 4f0e7f61edf7..eff69018cbeb 100644
> --- a/arch/s390/kvm/interrupt.c
> +++ b/arch/s390/kvm/interrupt.c
> @@ -247,12 +247,12 @@ static inline int gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
> {
> u64 word, _word;
>
> + word = READ_ONCE(gisa->u64.word[0]);
> do {
> - word = READ_ONCE(gisa->u64.word[0]);
> if ((u64)gisa != word >> 32)
> return -EBUSY;
> _word = (word & ~0xffUL) | iam;
> - } while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
> + } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
>
> return 0;
> }
> @@ -270,10 +270,10 @@ static inline void gisa_clear_ipm(struct kvm_s390_gisa *gisa)
> {
> u64 word, _word;
>
> + word = READ_ONCE(gisa->u64.word[0]);
> do {
> - word = READ_ONCE(gisa->u64.word[0]);
> _word = word & ~(0xffUL << 24);
> - } while (cmpxchg(&gisa->u64.word[0], word, _word) != word);
> + } while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
> }
>
> /**
> @@ -291,14 +291,14 @@ static inline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
> u8 pending_mask, alert_mask;
> u64 word, _word;
>
> + word = READ_ONCE(gi->origin->u64.word[0]);
> do {
> - word = READ_ONCE(gi->origin->u64.word[0]);
> alert_mask = READ_ONCE(gi->alert.mask);
> pending_mask = (u8)(word >> 24) & alert_mask;
> if (pending_mask)
> return pending_mask;
> _word = (word & ~0xffUL) | alert_mask;
> - } while (cmpxchg(&gi->origin->u64.word[0], word, _word) != word);
> + } while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
>
> return 0;
> }
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 442d4a227c0e..d8080c27d45b 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -1937,11 +1937,11 @@ static void kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
>
> read_lock(&kvm->arch.sca_lock);
> sca = kvm->arch.sca;
> + old = READ_ONCE(sca->utility);
> do {
> - old = READ_ONCE(sca->utility);
> new = old;
> new.mtcr = val;
> - } while (cmpxchg(&sca->utility.val, old.val, new.val) != old.val);
> + } while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
> read_unlock(&kvm->arch.sca_lock);
> }
>
> diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
> index a61518b549f0..9b9e7fdd5380 100644
> --- a/arch/s390/kvm/pci.c
> +++ b/arch/s390/kvm/pci.c
> @@ -208,13 +208,12 @@ static inline int account_mem(unsigned long nr_pages)
>
> page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
>
> + cur_pages = atomic_long_read(&user->locked_vm);
> do {
> - cur_pages = atomic_long_read(&user->locked_vm);
> new_pages = cur_pages + nr_pages;
> if (new_pages > page_limit)
> return -ENOMEM;
> - } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
> - new_pages) != cur_pages);
> + } while (!atomic_long_try_cmpxchg(&user->locked_vm, &cur_pages, new_pages));
>
> atomic64_add(nr_pages, ¤t->mm->pinned_vm);
>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes
2024-11-25 11:50 ` [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
@ 2024-11-25 12:20 ` Claudio Imbrenda
2024-11-25 13:40 ` Heiko Carstens
0 siblings, 1 reply; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 12:20 UTC (permalink / raw)
To: Heiko Carstens
Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
linux-s390, linux-kernel
On Mon, 25 Nov 2024 12:50:39 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:
> kvm_s390_update_topology_change_report() modifies a single bit within
> sca_utility using cmpxchg(). Given that the size of the sca_utility union
> is two bytes this generates very inefficient code. Change the size to four
> bytes, so better code can be generated.
>
> Even though the size of sca_utility doesn't reflect architecture anymore
> this seems to be the easiest and most pragmatic approach to avoid
> inefficient code.
wouldn't an atomic bit_op be better in that case?
>
> Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
> ---
> arch/s390/include/asm/kvm_host.h | 10 +++++-----
> 1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
> index 1cd8eaebd3c0..1cb1de232b9e 100644
> --- a/arch/s390/include/asm/kvm_host.h
> +++ b/arch/s390/include/asm/kvm_host.h
> @@ -95,10 +95,10 @@ union ipte_control {
> };
>
> union sca_utility {
> - __u16 val;
> + __u32 val;
> struct {
> - __u16 mtcr : 1;
> - __u16 reserved : 15;
> + __u32 mtcr : 1;
> + __u32 : 31;
> };
> };
>
> @@ -107,7 +107,7 @@ struct bsca_block {
> __u64 reserved[5];
> __u64 mcn;
> union sca_utility utility;
> - __u8 reserved2[6];
> + __u8 reserved2[4];
> struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
> };
>
> @@ -115,7 +115,7 @@ struct esca_block {
> union ipte_control ipte_control;
> __u64 reserved1[6];
> union sca_utility utility;
> - __u8 reserved2[6];
> + __u8 reserved2[4];
> __u64 mcn[4];
> __u64 reserved3[20];
> struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage
2024-11-25 12:16 ` Claudio Imbrenda
@ 2024-11-25 13:37 ` Heiko Carstens
2024-11-25 16:20 ` Claudio Imbrenda
0 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 13:37 UTC (permalink / raw)
To: Claudio Imbrenda
Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
linux-s390, linux-kernel
On Mon, Nov 25, 2024 at 01:16:17PM +0100, Claudio Imbrenda wrote:
> On Mon, 25 Nov 2024 12:50:38 +0100
> Heiko Carstens <hca@linux.ibm.com> wrote:
> > @@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
> > struct esca_block *sca = vcpu->kvm->arch.sca;
> > union esca_sigp_ctrl *sigp_ctrl =
> > &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> > - union esca_sigp_ctrl old;
> >
> > - old = READ_ONCE(*sigp_ctrl);
> > - expect = old.value;
> > - rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
> > + WRITE_ONCE(sigp_ctrl->value, 9);
>
> that's supposed to be a 0, right?
Duh... yes, of course. I added the "9" to better find the corresponding
code in assembly, and obviously forgot to replace it with 0 again.
Thanks for pointing this out!
Strange enough this still worked. Hmm.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes
2024-11-25 12:20 ` Claudio Imbrenda
@ 2024-11-25 13:40 ` Heiko Carstens
2024-11-25 16:17 ` Claudio Imbrenda
0 siblings, 1 reply; 11+ messages in thread
From: Heiko Carstens @ 2024-11-25 13:40 UTC (permalink / raw)
To: Claudio Imbrenda
Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
linux-s390, linux-kernel
On Mon, Nov 25, 2024 at 01:20:42PM +0100, Claudio Imbrenda wrote:
> On Mon, 25 Nov 2024 12:50:39 +0100
> Heiko Carstens <hca@linux.ibm.com> wrote:
>
> > kvm_s390_update_topology_change_report() modifies a single bit within
> > sca_utility using cmpxchg(). Given that the size of the sca_utility union
> > is two bytes this generates very inefficient code. Change the size to four
> > bytes, so better code can be generated.
> >
> > Even though the size of sca_utility doesn't reflect architecture anymore
> > this seems to be the easiest and most pragmatic approach to avoid
> > inefficient code.
>
> wouldn't an atomic bit_op be better in that case?
I had that, but decided against it, since the generated code isn't shorter.
And it would require and unsigned long type within the union, or a cast,
which I also both disliked.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes
2024-11-25 13:40 ` Heiko Carstens
@ 2024-11-25 16:17 ` Claudio Imbrenda
0 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 16:17 UTC (permalink / raw)
To: Heiko Carstens
Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
linux-s390, linux-kernel
On Mon, 25 Nov 2024 14:40:22 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:
> On Mon, Nov 25, 2024 at 01:20:42PM +0100, Claudio Imbrenda wrote:
> > On Mon, 25 Nov 2024 12:50:39 +0100
> > Heiko Carstens <hca@linux.ibm.com> wrote:
> >
> > > kvm_s390_update_topology_change_report() modifies a single bit within
> > > sca_utility using cmpxchg(). Given that the size of the sca_utility union
> > > is two bytes this generates very inefficient code. Change the size to four
> > > bytes, so better code can be generated.
> > >
> > > Even though the size of sca_utility doesn't reflect architecture anymore
> > > this seems to be the easiest and most pragmatic approach to avoid
> > > inefficient code.
> >
> > wouldn't an atomic bit_op be better in that case?
>
> I had that, but decided against it, since the generated code isn't shorter.
> And it would require and unsigned long type within the union, or a cast,
> which I also both disliked.
fair enough
Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage
2024-11-25 13:37 ` Heiko Carstens
@ 2024-11-25 16:20 ` Claudio Imbrenda
0 siblings, 0 replies; 11+ messages in thread
From: Claudio Imbrenda @ 2024-11-25 16:20 UTC (permalink / raw)
To: Heiko Carstens
Cc: Christian Borntraeger, Janosch Frank, David Hildenbrand, kvm,
linux-s390, linux-kernel
On Mon, 25 Nov 2024 14:37:55 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:
> On Mon, Nov 25, 2024 at 01:16:17PM +0100, Claudio Imbrenda wrote:
> > On Mon, 25 Nov 2024 12:50:38 +0100
> > Heiko Carstens <hca@linux.ibm.com> wrote:
> > > @@ -128,23 +126,16 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
> > > struct esca_block *sca = vcpu->kvm->arch.sca;
> > > union esca_sigp_ctrl *sigp_ctrl =
> > > &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
> > > - union esca_sigp_ctrl old;
> > >
> > > - old = READ_ONCE(*sigp_ctrl);
> > > - expect = old.value;
> > > - rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
> > > + WRITE_ONCE(sigp_ctrl->value, 9);
> >
> > that's supposed to be a 0, right?
>
> Duh... yes, of course. I added the "9" to better find the corresponding
> code in assembly, and obviously forgot to replace it with 0 again.
> Thanks for pointing this out!
>
> Strange enough this still worked. Hmm.
with that fixed:
Acked-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2024-11-25 16:21 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-11-25 11:50 [PATCH 0/3] KVM: s390: Couple of small cmpxchg() optimizations Heiko Carstens
2024-11-25 11:50 ` [PATCH 1/3] KVM: s390: Use try_cmpxchg() instead of cmpxchg() loops Heiko Carstens
2024-11-25 12:18 ` Claudio Imbrenda
2024-11-25 11:50 ` [PATCH 2/3] KVM: s390: Remove one byte cmpxchg() usage Heiko Carstens
2024-11-25 12:16 ` Claudio Imbrenda
2024-11-25 13:37 ` Heiko Carstens
2024-11-25 16:20 ` Claudio Imbrenda
2024-11-25 11:50 ` [PATCH 3/3] KVM: s390: Increase size of union sca_utility to four bytes Heiko Carstens
2024-11-25 12:20 ` Claudio Imbrenda
2024-11-25 13:40 ` Heiko Carstens
2024-11-25 16:17 ` Claudio Imbrenda
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox