All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@elte.hu>,
	Andrew Morton <akpm@linux-foundation.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Frederic Weisbecker <fweisbec@gmail.com>,
	"H. Peter Anvin" <hpa@zytor.com>,
	Mathieu Desnoyers <mathieu.desnoyers@efficios.com>,
	Jason Baron <jbaron@redhat.com>
Subject: [PATCH 2/2] jump labels/x86: Use etiher 5 byte or 2 byte jumps
Date: Wed, 18 Jan 2012 14:53:42 -0500	[thread overview]
Message-ID: <20120118195926.797694014@goodmis.org> (raw)
In-Reply-To: 20120118195340.767928915@goodmis.org

[-- Attachment #1: 0002-jump-labels-x86-Use-etiher-5-byte-or-2-byte-jumps.patch --]
[-- Type: text/plain, Size: 5389 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

Have the jump labels add a "jmp" in the assembly instead
of a default nop. This will cause the assembler to put in
either a 2 byte or 5 byte jmp depending on where the target
lable is.

Then at compile time, the update_jump_label code will replace
the jmps with either 2 or 5 byte nops.

On boot up, the code can be examined to see if the jump label
uses either a 2 or 5 byte nop and replace it.

By allowing the jump labels to be 2 bytes, it speeds up the
nops, not only 2 byte nops are faster than 5 byte nops, but also
because it saves on cache foot print.

   text    data     bss     dec     hex filename
13403667 3666856 2998272 20068795 13239bb ../nobackup/mxtest/vmlinux-old
13398536 3666856 2998272 20063664 13225b0 ../nobackup/mxtest/vmlinux-new

Converting the current v3.2 trace points saved 5,131 bytes.
As more places use jump labels, this will have a bigger savings.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/Kconfig                  |    1 +
 arch/x86/include/asm/jump_label.h |    2 +-
 arch/x86/kernel/jump_label.c      |   86 ++++++++++++++++++++++++++++++++++---
 3 files changed, 81 insertions(+), 8 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index efb4294..b5004c1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -61,6 +61,7 @@ config X86
 	select HAVE_ARCH_KMEMCHECK
 	select HAVE_USER_RETURN_NOTIFIER
 	select HAVE_ARCH_JUMP_LABEL
+	select HAVE_BUILD_TIME_JUMP_LABEL
 	select HAVE_TEXT_POKE_SMP
 	select HAVE_GENERIC_HARDIRQS
 	select HAVE_SPARSE_IRQ
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index a32b18c..872b3e1 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -14,7 +14,7 @@
 static __always_inline bool arch_static_branch(struct jump_label_key *key)
 {
 	asm goto("1:"
-		JUMP_LABEL_INITIAL_NOP
+		"jmp %l[l_yes]\n"
 		".pushsection __jump_table,  \"aw\" \n\t"
 		_ASM_ALIGN "\n\t"
 		_ASM_PTR "1b, %l[l_yes], %c0 \n\t"
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ea9d5f2f..d5b84de 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -16,12 +16,27 @@
 
 #ifdef HAVE_JUMP_LABEL
 
+static unsigned char nop_short[] = { P6_NOP2 };
+
+/* These are the nops added at compile time */
+#ifdef CONFIG_X86_32
+static unsigned char default_nop[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
+#else
+static unsigned char default_nop[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+#endif
+
+static int update_nops;
+
 union jump_code_union {
 	char code[JUMP_LABEL_NOP_SIZE];
 	struct {
 		char jump;
 		int offset;
-	} __attribute__((packed));
+	} __packed;
+	struct {
+		char jump_short;
+		char offset_short;
+	} __packed;
 };
 
 static void __jump_label_transform(struct jump_entry *entry,
@@ -29,20 +44,70 @@ static void __jump_label_transform(struct jump_entry *entry,
 				   void *(*poker)(void *, const void *, size_t))
 {
 	union jump_code_union code;
+	unsigned char nop;
+	unsigned char op;
+	unsigned size;
+	void *ip = (void *)entry->code;
+	void *ideal = (void *)ideal_nops[NOP_ATOMIC5];
+
+	/* Use probe_kernel_read()? */
+	op = *(unsigned char *)ip;
+	nop = ideal_nops[NOP_ATOMIC5][0];
 
 	if (type == JUMP_LABEL_ENABLE) {
-		code.jump = 0xe9;
-		code.offset = entry->target -
-				(entry->code + JUMP_LABEL_NOP_SIZE);
-	} else
-		memcpy(&code, ideal_nops[NOP_ATOMIC5], JUMP_LABEL_NOP_SIZE);
+		if (op == 0xe9 || op == 0xeb)
+			/* Already enabled. Warn? */
+			return;
+
+		if (memcmp(ip, nop_short, 2) == 0) {
+			size = 2;
+			code.jump_short = 0xeb;
+			code.offset = entry->target -
+				(entry->code + 2);
+			/* Check for overflow ? */
+		} else if (memcmp(ip, ideal, 5) == 0 ||
+			   memcmp(ip, default_nop, 5)) {
+			size = JUMP_LABEL_NOP_SIZE;
+			code.jump = 0xe9;
+			code.offset = entry->target - (entry->code + size);
+		} else
+			BUG();
 
-	(*poker)((void *)entry->code, &code, JUMP_LABEL_NOP_SIZE);
+	} else {
+		/* Check if already disabled */
+		if (memcmp(ip, nop_short, 2) == 0)
+			return;
+
+		if (memcmp(ip, ideal, 5) == 0)
+			return;
+
+		/* This may need to update from default to ideal */
+		if (update_nops && memcmp(ip, default_nop, 5) == 0) {
+			/* Set to the ideal nop */
+			size = JUMP_LABEL_NOP_SIZE;
+			memcpy(&code, ideal_nops[NOP_ATOMIC5], size);
+
+		} else if (op == 0xe9) {
+			/* Replace a 5 byte jmp */
+			size = JUMP_LABEL_NOP_SIZE;
+			memcpy(&code, ideal_nops[NOP_ATOMIC5], size);
+		} else if (op == 0xeb) {
+			/* Replace a 2 byte jmp */
+			size = 2;
+			memcpy(&code, nop_short, size);
+		} else
+			BUG();
+	}
+
+	(*poker)((void *)entry->code, &code, size);
 }
 
 void arch_jump_label_transform(struct jump_entry *entry,
 			       enum jump_label_type type)
 {
+	/* All nops should be updated to the ideal nop by now */
+	update_nops = 0;
+
 	get_online_cpus();
 	mutex_lock(&text_mutex);
 	__jump_label_transform(entry, type, text_poke_smp);
@@ -53,6 +118,13 @@ void arch_jump_label_transform(struct jump_entry *entry,
 void arch_jump_label_transform_static(struct jump_entry *entry,
 				      enum jump_label_type type)
 {
+	/*
+	 * If the default nop does not equal the ideal nop, then
+	 * update them.
+	 */
+	if (memcmp(default_nop, ideal_nops[NOP_ATOMIC5], 5) != 0)
+		update_nops = 1;
+
 	__jump_label_transform(entry, type, text_poke_early);
 }
 
-- 
1.7.8.3



  parent reply	other threads:[~2012-01-18 19:59 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-18 19:53 [PATCH 0/2] [RFC] jump-label/x86: Compress jmps to 2 bytes where possible Steven Rostedt
2012-01-18 19:53 ` [PATCH 1/2] jump labels: Add infrastructure to update jump labels at compile time Steven Rostedt
2012-01-19 14:24   ` Mathieu Desnoyers
2012-01-19 14:52     ` Steven Rostedt
2012-01-18 19:53 ` Steven Rostedt [this message]
2012-01-19 12:22   ` [PATCH 2/2] jump labels/x86: Use etiher 5 byte or 2 byte jumps Ingo Molnar
2012-01-19 14:41   ` Mathieu Desnoyers
2012-01-19 14:46     ` H. Peter Anvin
2012-01-19 14:58       ` Steven Rostedt
2012-01-19 15:19         ` Steven Rostedt
2012-01-19 14:56     ` Steven Rostedt
2012-01-19 14:58       ` H. Peter Anvin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120118195926.797694014@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=akpm@linux-foundation.org \
    --cc=fweisbec@gmail.com \
    --cc=hpa@zytor.com \
    --cc=jbaron@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mathieu.desnoyers@efficios.com \
    --cc=mingo@elte.hu \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.