From: Nicholas Piggin <npiggin@gmail.com>
To: Tejun Heo <tj@kernel.org>, Christoph Lameter <cl@linux.com>
Cc: linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org,
linuxppc-dev@lists.ozlabs.org
Subject: Re: [PATCH] percpu: improve generic percpu modify-return implementation
Date: Wed, 21 Sep 2016 20:57:11 +1000 [thread overview]
Message-ID: <20160921205711.4e804777@roar.ozlabs.ibm.com> (raw)
In-Reply-To: <20160921085137.862-1-npiggin@gmail.com>
On Wed, 21 Sep 2016 18:51:37 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:
> Some architectures require an additional load to find the address of
> percpu pointers. In some implemenatations, the C aliasing rules do not
> allow the result of that load to be kept over the store that modifies
> the percpu variable, which causes additional loads.
Sorry I picked up an old patch here. This one should be better.
From d0cb9052d6f4c31d24f999b7b0cecb34681eee9b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Wed, 21 Sep 2016 18:23:43 +1000
Subject: [PATCH] percpu: improve generic percpu modify-return implementations
Some architectures require an additional load to find the address of
percpu pointers. In some implemenatations, the C aliasing rules do not
allow the result of that load to be kept over the store that modifies
the percpu variable, which causes additional loads.
Work around this by finding the pointer first, then operating on that.
It's also possible to mark things as restrict and those kind of games,
but that can require larger and arch specific changes.
On powerpc, __this_cpu_inc_return compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
ld 9,48(13)
ldx 3,9,3
With this patch it compiles to:
ld 10,48(13)
ldx 9,3,10
addi 9,9,1
stdx 9,3,10
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
include/asm-generic/percpu.h | 53 +++++++++++++++++++++++++-------------------
1 file changed, 30 insertions(+), 23 deletions(-)
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index 4d9f233..40e8870 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -65,6 +65,11 @@ extern void setup_per_cpu_areas(void);
#define PER_CPU_DEF_ATTRIBUTES
#endif
+#define raw_cpu_generic_read(pcp) \
+({ \
+ *raw_cpu_ptr(&(pcp)); \
+})
+
#define raw_cpu_generic_to_op(pcp, val, op) \
do { \
*raw_cpu_ptr(&(pcp)) op val; \
@@ -72,34 +77,39 @@ do { \
#define raw_cpu_generic_add_return(pcp, val) \
({ \
- raw_cpu_add(pcp, val); \
- raw_cpu_read(pcp); \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
+ \
+ *__p += val; \
+ *__p; \
})
#define raw_cpu_generic_xchg(pcp, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = *__p; \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \
({ \
+ typeof(&(pcp)) __p = raw_cpu_ptr(&(pcp)); \
typeof(pcp) __ret; \
- __ret = raw_cpu_read(pcp); \
+ __ret = *__p; \
if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ *__p = nval; \
__ret; \
})
#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \
({ \
+ typeof(&(pcp1)) __p1 = raw_cpu_ptr(&(pcp1)); \
+ typeof(&(pcp2)) __p2 = raw_cpu_ptr(&(pcp2)); \
int __ret = 0; \
- if (raw_cpu_read(pcp1) == (oval1) && \
- raw_cpu_read(pcp2) == (oval2)) { \
- raw_cpu_write(pcp1, nval1); \
- raw_cpu_write(pcp2, nval2); \
+ if (*__p1 == (oval1) && *__p2 == (oval2)) { \
+ *__p1 = nval1; \
+ *__p2 = nval2; \
__ret = 1; \
} \
(__ret); \
@@ -109,7 +119,7 @@ do { \
({ \
typeof(pcp) __ret; \
preempt_disable(); \
- __ret = *this_cpu_ptr(&(pcp)); \
+ __ret = raw_cpu_generic_read(pcp); \
preempt_enable(); \
__ret; \
})
@@ -118,17 +128,17 @@ do { \
do { \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- *raw_cpu_ptr(&(pcp)) op val; \
+ raw_cpu_generic_to_op(pcp, val, op); \
raw_local_irq_restore(__flags); \
} while (0)
+
#define this_cpu_generic_add_return(pcp, val) \
({ \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- raw_cpu_add(pcp, val); \
- __ret = raw_cpu_read(pcp); \
+ __ret = raw_cpu_generic_add_return(pcp, val); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -138,8 +148,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_xchg(pcp, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -149,9 +158,7 @@ do { \
typeof(pcp) __ret; \
unsigned long __flags; \
raw_local_irq_save(__flags); \
- __ret = raw_cpu_read(pcp); \
- if (__ret == (oval)) \
- raw_cpu_write(pcp, nval); \
+ __ret = raw_cpu_generic_cmpxchg(pcp, oval, nval); \
raw_local_irq_restore(__flags); \
__ret; \
})
@@ -168,16 +175,16 @@ do { \
})
#ifndef raw_cpu_read_1
-#define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_1(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_2
-#define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_2(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_4
-#define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_4(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_read_8
-#define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp)))
+#define raw_cpu_read_8(pcp) raw_cpu_generic_read(pcp)
#endif
#ifndef raw_cpu_write_1
--
2.9.3
next prev parent reply other threads:[~2016-09-21 10:57 UTC|newest]
Thread overview: 19+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-09-21 8:51 [PATCH] percpu: improve generic percpu modify-return implementation Nicholas Piggin
2016-09-21 8:51 ` Nicholas Piggin
2016-09-21 10:25 ` kbuild test robot
2016-09-21 10:25 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:30 ` kbuild test robot
2016-09-21 10:57 ` Nicholas Piggin [this message]
2016-09-21 14:23 ` Tejun Heo
2016-09-21 14:23 ` Tejun Heo
2016-09-21 20:16 ` Christoph Lameter
2016-09-21 20:16 ` Christoph Lameter
2016-09-22 4:42 ` Nicholas Piggin
2016-09-22 4:35 ` Nicholas Piggin
2016-09-22 4:35 ` Nicholas Piggin
2016-09-22 16:07 ` Tejun Heo
2016-09-22 16:07 ` Tejun Heo
2016-09-23 7:33 ` Nicholas Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20160921205711.4e804777@roar.ozlabs.ibm.com \
--to=npiggin@gmail.com \
--cc=cl@linux.com \
--cc=linux-arch@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).