From: Christoph Lameter <cl@linux-foundation.org>
To: Tejun Heo <tj@kernel.org>
Cc: linux-kernel@vger.kernel.org
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Subject: [this_cpu_xx V9 4/7] Generic inc / dec percpu instructions
Date: Mon, 04 Jan 2010 16:34:43 -0600 [thread overview]
Message-ID: <20100104223556.090044300@quilx.com> (raw)
In-Reply-To: 20100104223439.228028923@quilx.com
[-- Attachment #1: x86_inc_dec --]
[-- Type: text/plain, Size: 7073 bytes --]
Optimize code generated for percpu access by checking for increment and
decrements.
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
---
arch/x86/include/asm/percpu.h | 100 ++++++++++++++++++++++++++++++++++++------
1 file changed, 86 insertions(+), 14 deletions(-)
Index: linux-2.6/arch/x86/include/asm/percpu.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/percpu.h 2010-01-04 15:33:02.000000000 -0600
+++ linux-2.6/arch/x86/include/asm/percpu.h 2010-01-04 16:11:29.000000000 -0600
@@ -104,6 +104,78 @@ do { \
} \
} while (0)
+/*
+ * Generate a percpu add to memory instruction and optimize code
+ * if a one is added or subtracted.
+ */
+#define percpu_add_op(var, val) \
+do { \
+ typedef typeof(var) pto_T__; \
+ if (0) { \
+ pto_T__ pto_tmp__; \
+ pto_tmp__ = (val); \
+ } \
+ switch (sizeof(var)) { \
+ case 1: \
+ if (__builtin_constant_p(val) == 1) \
+ asm("incb "__percpu_arg(0) \
+ : "+m" (var) \
+ : ); \
+ else if (__builtin_constant_p(val) == -1) \
+ asm("decb "__percpu_arg(0) \
+ : "+m" (var) \
+ : ); \
+ else \
+ asm("addb %1,"__percpu_arg(0) \
+ : "+m" (var) \
+ : "qi" ((pto_T__)(val))); \
+ break; \
+ case 2: \
+ if (__builtin_constant_p(val) == 1) \
+ asm("incw "__percpu_arg(0) \
+ : "+m" (var) \
+ : ); \
+ else if (__builtin_constant_p(val) == -1) \
+ asm("decw "__percpu_arg(0) \
+ : "+m" (var) \
+ : ); \
+ else \
+ asm("addw %1,"__percpu_arg(0) \
+ : "+m" (var) \
+ : "ri" ((pto_T__)(val))); \
+ break; \
+ case 4: \
+ if (__builtin_constant_p(val) == 1) \
+ asm("incl "__percpu_arg(0) \
+ : "+m" (var) \
+ : ); \
+ else if (__builtin_constant_p(val) == -1) \
+ asm("decl "__percpu_arg(0) \
+ : "+m" (var) \
+ : ); \
+ else \
+ asm("addl %1,"__percpu_arg(0) \
+ : "+m" (var) \
+ : "ri" ((pto_T__)(val))); \
+ break; \
+ case 8: \
+ if (__builtin_constant_p(val) == 1) \
+ asm("incq "__percpu_arg(0) \
+ : "+m" (var) \
+ : ); \
+ else if (__builtin_constant_p(val) == -1) \
+ asm("decq "__percpu_arg(0) \
+ : "+m" (var) \
+ : ); \
+ else \
+ asm("addq %1,"__percpu_arg(0) \
+ : "+m" (var) \
+ : "re" ((pto_T__)(val))); \
+ break; \
+ default: __bad_percpu_size(); \
+ } \
+} while (0)
+
#define percpu_from_op(op, var, constraint) \
({ \
typeof(var) pfo_ret__; \
@@ -147,8 +219,8 @@ do { \
#define percpu_read_stable(var) percpu_from_op("mov", per_cpu__##var, \
"p" (&per_cpu__##var))
#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
-#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
-#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
+#define percpu_add(var, val) percpu_add_op(per_cpu__##var, val)
+#define percpu_sub(var, val) percpu_add_op(per_cpu__##var, -(val))
#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
@@ -160,9 +232,9 @@ do { \
#define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
#define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
-#define __this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
-#define __this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
+#define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
+#define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
#define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -179,9 +251,9 @@ do { \
#define this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val)
#define this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
-#define this_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
-#define this_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
#define this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -192,9 +264,9 @@ do { \
#define this_cpu_xor_2(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_xor_4(pcp, val) percpu_to_op("xor", (pcp), val)
-#define irqsafe_cpu_add_1(pcp, val) percpu_to_op("add", (pcp), val)
-#define irqsafe_cpu_add_2(pcp, val) percpu_to_op("add", (pcp), val)
-#define irqsafe_cpu_add_4(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_add_1(pcp, val) percpu_add_op((pcp), val)
+#define irqsafe_cpu_add_2(pcp, val) percpu_add_op((pcp), val)
+#define irqsafe_cpu_add_4(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val)
@@ -212,19 +284,19 @@ do { \
#ifdef CONFIG_X86_64
#define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define __this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define __this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
#define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
#define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val)
-#define this_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define this_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
-#define irqsafe_cpu_add_8(pcp, val) percpu_to_op("add", (pcp), val)
+#define irqsafe_cpu_add_8(pcp, val) percpu_add_op((pcp), val)
#define irqsafe_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val)
#define irqsafe_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val)
#define irqsafe_cpu_xor_8(pcp, val) percpu_to_op("xor", (pcp), val)
--
next prev parent reply other threads:[~2010-01-04 22:37 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-01-04 22:34 [this_cpu_xx V9 0/7] Per cpu atomics in page allocator, modules, cleanup and optimized inc/dec Christoph Lameter
2010-01-04 22:34 ` [this_cpu_xx V9 1/7] Remove cpu_local_xx macros Christoph Lameter
2010-01-04 22:34 ` [this_cpu_xx V9 2/7] Module handling: Use this_cpu_xx to dynamically allocate counters Christoph Lameter
2010-01-04 22:34 ` [this_cpu_xx V9 3/7] Move local.h include to ringbuffer.c and ring_buffer_benchmark.c Christoph Lameter
2010-01-04 22:34 ` Christoph Lameter [this message]
2010-01-05 1:19 ` [this_cpu_xx V9 4/7] Generic inc / dec percpu instructions Tejun Heo
2010-01-05 15:21 ` Christoph Lameter
2010-01-04 22:34 ` [this_cpu_xx V9 5/7] this_cpu_ops: page allocator conversion Christoph Lameter
2010-01-05 6:32 ` Tejun Heo
2010-01-05 15:22 ` Christoph Lameter
2010-01-05 23:44 ` Tejun Heo
2010-01-04 22:34 ` [this_cpu_xx V9 6/7] this_cpu ops: Remove pageset_notifier Christoph Lameter
2010-01-04 22:34 ` [this_cpu_xx V9 7/7] Remove leftover local.h Christoph Lameter
2010-01-05 6:37 ` [this_cpu_xx V9 0/7] Per cpu atomics in page allocator, modules, cleanup and optimized inc/dec Tejun Heo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100104223556.090044300@quilx.com \
--to=cl@linux-foundation.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tj@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.