All of lore.kernel.org
 help / color / mirror / Atom feed
From: Heiko Carstens <hca@linux.ibm.com>
To: Alexander Gordeev <agordeev@linux.ibm.com>,
	Sven Schnelle <svens@linux.ibm.com>,
	Vasily Gorbik <gor@linux.ibm.com>,
	Christian Borntraeger <borntraeger@linux.ibm.com>,
	Juergen Christ <jchrist@linux.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Yang Shi <yang@os.amperecomputing.com>,
	Shrikanth Hegde <sshegde@linux.ibm.com>,
	linux-kernel@vger.kernel.org, linux-s390@vger.kernel.org
Subject: [PATCH v3 4/9] s390/percpu: Use new percpu code section for arch_this_cpu_add()
Date: Wed, 20 May 2026 11:22:38 +0200	[thread overview]
Message-ID: <20260520092243.264847-5-hca@linux.ibm.com> (raw)
In-Reply-To: <20260520092243.264847-1-hca@linux.ibm.com>

Convert arch_this_cpu_add() to make use of the new percpu code section
infrastructure.

With this the text size of the kernel image is reduced by ~76kb
(defconfig). Also more than 5300 generated preempt_schedule_notrace()
function calls within the kernel image (modules not counted) are removed.

With:

DEFINE_PER_CPU(long, foo);
void bar(long a) { this_cpu_add(foo, a); }

Old arch_this_cpu_add() looks like this:

00000000000000c0 <bar>:
  c0:   c0 04 00 00 00 00       jgnop   c0 <bar>
  c6:   eb 01 03 a8 00 6a       asi     936,1
  cc:   c4 18 00 00 00 00       lgrl    %r1,cc <bar+0xc>
                        ce: R_390_GOTENT        foo+0x2
  d2:   e3 10 03 b8 00 08       ag      %r1,952
  d8:   eb 22 10 00 00 e8       laag    %r2,%r2,0(%r1)
  de:   eb ff 03 a8 00 6e       alsi    936,-1
  e4:   a7 a4 00 05             jhe     ee <bar+0x2e>
  e8:   c0 f4 00 00 00 00       jg      e8 <bar+0x28>
                        ea: R_390_PC32DBL       __s390_indirect_jump_r14+0x2
  ee:   c0 f4 00 00 00 00       jg      ee <bar+0x2e>
                        f0: R_390_PLT32DBL      preempt_schedule_notrace+0x2

New arch_this_cpu_add() looks like this:

00000000000000c0 <bar>:
  c0:   c0 04 00 00 00 00       jgnop   c0 <bar>
  c6:   c4 38 00 00 00 00       lgrl    %r3,c6 <bar+0x6>
                        c8: R_390_GOTENT        foo+0x2
  cc:   b9 04 00 43             lgr     %r4,%r3
  d0:   eb 00 43 c0 00 52       mviy    960(%r0),4
  d6:   e3 40 03 b8 00 08       ag      %r4,952
  dc:   eb 52 40 00 00 e8       laag    %r5,%r2,0(%r4)
  e2:   eb 00 03 c0 00 52       mviy    960,0
  e8:   c0 f4 00 00 00 00       jg      e8 <bar+0x28>
                        ea: R_390_PC32DBL       __s390_indirect_jump_r14+0x2

Note that the conditional function call is removed.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
---
 arch/s390/include/asm/percpu.h | 65 ++++++++++++++++++++++------------
 1 file changed, 43 insertions(+), 22 deletions(-)

diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index c8fc8b320a86..459603c7305a 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -127,28 +127,49 @@
 
 #else /* MARCH_HAS_Z196_FEATURES */
 
-#define arch_this_cpu_add(pcp, val, op1, op2, szcast)			\
-do {									\
-	typedef typeof(pcp) pcp_op_T__; 				\
-	pcp_op_T__ val__ = (val);					\
-	pcp_op_T__ old__, *ptr__;					\
-	preempt_disable_notrace();					\
-	ptr__ = raw_cpu_ptr(&(pcp)); 				\
-	if (__builtin_constant_p(val__) &&				\
-	    ((szcast)val__ > -129) && ((szcast)val__ < 128)) {		\
-		asm volatile(						\
-			op2 "   %[ptr__],%[val__]"			\
-			: [ptr__] "+Q" (*ptr__) 			\
-			: [val__] "i" ((szcast)val__)			\
-			: "cc");					\
-	} else {							\
-		asm volatile(						\
-			op1 "   %[old__],%[val__],%[ptr__]"		\
-			: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__)	\
-			: [val__] "d" (val__)				\
-			: "cc");					\
-	}								\
-	preempt_enable_notrace();					\
+#define arch_this_cpu_add(pcp, val, op1, op2, szcast)				\
+do {										\
+	unsigned long lc_pcpr, lc_pcpo;						\
+	typedef typeof(pcp) pcp_op_T__;						\
+	pcp_op_T__ val__ = (val);						\
+	pcp_op_T__ old__, *ptr__;						\
+										\
+	lc_pcpr = offsetof(struct lowcore, percpu_register);			\
+	lc_pcpo = offsetof(struct lowcore, percpu_offset);			\
+	ptr__ = PERCPU_PTR(&(pcp));						\
+	if (__builtin_constant_p(val__) &&					\
+	    ((szcast)val__ > -129) && ((szcast)val__ < 128)) {			\
+		asm volatile(							\
+			MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
+			AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")	\
+			op2 "   0(%[ptr__]),%[val__]\n"				\
+			MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]", "%%r0")	\
+			: [ptr__] "+&a" (ptr__), "+m" (*ptr__),			\
+			  "=m" (((struct lowcore *)0)->percpu_register)		\
+			: [val__] "i" ((szcast)val__),				\
+			  [disppcpr] "i" (lc_pcpr),				\
+			  [disppcpo] "i" (lc_pcpo),				\
+			  [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),	\
+			  [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),	\
+			  "m" (((struct lowcore *)0)->percpu_offset)		\
+			: "cc");						\
+	} else {								\
+		asm volatile(							\
+			MVIY_PERCPU("%[disppcpr]", "%[dispaltpcpr]", "%[ptr__]")\
+			AG_ALT("%[disppcpo]", "%[dispaltpcpo]", "%[ptr__]")	\
+			op1 "   %[old__],%[val__],0(%[ptr__])\n"		\
+			MVIY_ALT("%[disppcpr]", "%[dispaltpcpr]", "%%r0")	\
+			: [old__] "=&d" (old__),				\
+			  [ptr__] "+&a" (ptr__),  "+m" (*ptr__),		\
+			  "=m" (((struct lowcore *)0)->percpu_register)		\
+			: [val__] "d" (val__),					\
+			  [disppcpr] "i" (lc_pcpr),				\
+			  [disppcpo] "i" (lc_pcpo),				\
+			  [dispaltpcpr] "i" (lc_pcpr + LOWCORE_ALT_ADDRESS),	\
+			  [dispaltpcpo] "i" (lc_pcpo + LOWCORE_ALT_ADDRESS),	\
+			  "m" (((struct lowcore *)0)->percpu_offset)		\
+			: "cc");						\
+	}									\
 } while (0)
 
 #define this_cpu_add_4(pcp, val) arch_this_cpu_add(pcp, val, "laa", "asi", int)
-- 
2.51.0


  parent reply	other threads:[~2026-05-20  9:22 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-20  9:22 [PATCH v3 0/9] s390: Improve this_cpu operations Heiko Carstens
2026-05-20  9:22 ` [PATCH v3 1/9] s390/alternatives: Add new ALT_TYPE_PERCPU type Heiko Carstens
2026-05-20 12:43   ` David Laight
2026-05-20 13:50     ` Heiko Carstens
2026-05-20 14:16       ` Heiko Carstens
2026-05-20  9:22 ` [PATCH v3 2/9] s390/percpu: Infrastructure for more efficient this_cpu operations Heiko Carstens
2026-05-20  9:22 ` [PATCH v3 3/9] s390/percpu: Add missing do { } while (0) constructs Heiko Carstens
2026-05-20  9:22 ` Heiko Carstens [this message]
2026-05-20  9:22 ` [PATCH v3 5/9] s390/percpu: Use new percpu code section for arch_this_cpu_add_return() Heiko Carstens
2026-05-20  9:22 ` [PATCH v3 6/9] s390/percpu: Use new percpu code section for arch_this_cpu_[and|or]() Heiko Carstens
2026-05-20  9:22 ` [PATCH v3 7/9] s390/percpu: Provide arch_this_cpu_read() implementation Heiko Carstens
2026-05-20  9:22 ` [PATCH v3 8/9] s390/percpu: Provide arch_this_cpu_write() implementation Heiko Carstens
2026-05-20  9:22 ` [PATCH v3 9/9] s390/percpu: Remove one and two byte this_cpu operation implementation Heiko Carstens
2026-05-20 18:42 ` [PATCH v3 0/9] s390: Improve this_cpu operations Yang Shi
2026-05-20 22:34   ` David Laight
2026-05-21  0:23     ` Yang Shi
2026-05-21 10:17       ` David Laight
2026-05-21 16:57         ` Yang Shi
2026-05-21 17:55           ` David Laight
2026-05-21 20:46             ` Yang Shi
2026-05-21 22:13               ` David Laight
2026-05-21 23:41                 ` Yang Shi
2026-05-21 10:23       ` David Laight
2026-05-21 17:48         ` Yang Shi
2026-05-21 10:37       ` Heiko Carstens
2026-05-21 17:47         ` Yang Shi
2026-05-22  9:18           ` Heiko Carstens
2026-05-27 19:09             ` Christoph Lameter (Ampere)
2026-05-27 20:38               ` Yang Shi
2026-05-28  8:36                 ` David Laight
2026-05-27 23:44             ` Yang Shi
2026-05-28  9:03               ` David Laight
2026-05-28 19:19                 ` Yang Shi
2026-05-28 20:34                   ` David Laight
2026-05-28 14:14               ` Heiko Carstens
2026-05-28 17:14                 ` David Laight
2026-05-28 18:39                 ` Yang Shi

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260520092243.264847-5-hca@linux.ibm.com \
    --to=hca@linux.ibm.com \
    --cc=agordeev@linux.ibm.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=gor@linux.ibm.com \
    --cc=jchrist@linux.ibm.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=sshegde@linux.ibm.com \
    --cc=svens@linux.ibm.com \
    --cc=yang@os.amperecomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.