public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
To: akpm@linux-foundation.org, Ingo Molnar <mingo@elte.hu>,
	linux-kernel@vger.kernel.org
Cc: Andi Kleen <andi@firstfloor.org>,
	Rusty Russell <rusty@rustcorp.com.au>,
	Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>,
	pageexec@freemail.hu, Thomas Gleixner <tglx@linutronix.de>,
	Ingo Molnar <mingo@redhat.com>, "H. Peter Anvin" <hpa@zytor.com>,
	Jeremy Fitzhardinge <jeremy@goop.org>
Subject: [patch 04/17] x86 - Enhance DEBUG_RODATA support - alternatives
Date: Wed, 09 Apr 2008 11:08:33 -0400	[thread overview]
Message-ID: <20080409152047.821224339@polymtl.ca> (raw)
In-Reply-To: 20080409150829.855195878@polymtl.ca

[-- Attachment #1: x86-enhance-debug-rodata-support-alternatives.patch --]
[-- Type: text/plain, Size: 9750 bytes --]

Fix a memcpy that should be a text_poke (in apply_alternatives).

Use kernel_wp_save/kernel_wp_restore in text_poke to support DEBUG_RODATA
correctly and so the CPU HOTPLUG special case can be removed.

Add text_poke_early, for alternatives and paravirt boot-time and module load
time patching.

Changelog:

- Fix text_set and text_poke alignment check (mixed up bitwise and and or)
- Remove text_set
- Export add_nops, so it can be used by others.
- Document text_poke_early.
- Remove clflush, since it breaks some VIA architectures and is not strictly
  necessary.
- Add kerneldoc to text_poke and text_poke_early.
- Create a second vmap instead of using the WP bit to support Xen and VMI.
- Move local_irq disable within text_poke and text_poke_early to be able to
  be sleepable in these functions.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Andi Kleen <andi@firstfloor.org>
CC: pageexec@freemail.hu
CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@redhat.com>
CC: H. Peter Anvin <hpa@zytor.com>
CC: Jeremy Fitzhardinge <jeremy@goop.org>
---
 arch/x86/kernel/alternative.c |   88 +++++++++++++++++++++++++++++++-----------
 include/asm-x86/alternative.h |   23 ++++++++++
 2 files changed, 87 insertions(+), 24 deletions(-)

Index: linux-2.6-lttng.mm/arch/x86/kernel/alternative.c
===================================================================
--- linux-2.6-lttng.mm.orig/arch/x86/kernel/alternative.c	2008-04-09 11:02:43.000000000 -0400
+++ linux-2.6-lttng.mm/arch/x86/kernel/alternative.c	2008-04-09 11:04:09.000000000 -0400
@@ -5,12 +5,14 @@
 #include <linux/kprobes.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
+#include <linux/io.h>
 #include <asm/alternative.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
 #include <asm/mce.h>
 #include <asm/nmi.h>
 #include <asm/vsyscall.h>
+#include <asm/cacheflush.h>
 
 #define MAX_PATCH_LEN (255-1)
 
@@ -173,7 +175,7 @@ static const unsigned char*const * find_
 #endif /* CONFIG_X86_64 */
 
 /* Use this to add nops to a buffer, then text_poke the whole buffer. */
-static void add_nops(void *insns, unsigned int len)
+void add_nops(void *insns, unsigned int len)
 {
 	const unsigned char *const *noptable = find_nop_table();
 
@@ -186,6 +188,7 @@ static void add_nops(void *insns, unsign
 		len -= noplen;
 	}
 }
+EXPORT_SYMBOL_GPL(add_nops);
 
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern u8 *__smp_locks[], *__smp_locks_end[];
@@ -219,7 +222,7 @@ void apply_alternatives(struct alt_instr
 		memcpy(insnbuf, a->replacement, a->replacementlen);
 		add_nops(insnbuf + a->replacementlen,
 			 a->instrlen - a->replacementlen);
-		text_poke(instr, insnbuf, a->instrlen);
+		text_poke_early(instr, insnbuf, a->instrlen);
 	}
 }
 
@@ -280,7 +283,6 @@ void alternatives_smp_module_add(struct 
 				 void *text,  void *text_end)
 {
 	struct smp_alt_module *smp;
-	unsigned long flags;
 
 	if (noreplace_smp)
 		return;
@@ -306,39 +308,37 @@ void alternatives_smp_module_add(struct 
 		__FUNCTION__, smp->locks, smp->locks_end,
 		smp->text, smp->text_end, smp->name);
 
-	spin_lock_irqsave(&smp_alt, flags);
+	spin_lock(&smp_alt);
 	list_add_tail(&smp->next, &smp_alt_modules);
 	if (boot_cpu_has(X86_FEATURE_UP))
 		alternatives_smp_unlock(smp->locks, smp->locks_end,
 					smp->text, smp->text_end);
-	spin_unlock_irqrestore(&smp_alt, flags);
+	spin_unlock(&smp_alt);
 }
 
 void alternatives_smp_module_del(struct module *mod)
 {
 	struct smp_alt_module *item;
-	unsigned long flags;
 
 	if (smp_alt_once || noreplace_smp)
 		return;
 
-	spin_lock_irqsave(&smp_alt, flags);
+	spin_lock(&smp_alt);
 	list_for_each_entry(item, &smp_alt_modules, next) {
 		if (mod != item->mod)
 			continue;
 		list_del(&item->next);
-		spin_unlock_irqrestore(&smp_alt, flags);
+		spin_unlock(&smp_alt);
 		DPRINTK("%s: %s\n", __FUNCTION__, item->name);
 		kfree(item);
 		return;
 	}
-	spin_unlock_irqrestore(&smp_alt, flags);
+	spin_unlock(&smp_alt);
 }
 
 void alternatives_smp_switch(int smp)
 {
 	struct smp_alt_module *mod;
-	unsigned long flags;
 
 #ifdef CONFIG_LOCKDEP
 	/*
@@ -355,7 +355,7 @@ void alternatives_smp_switch(int smp)
 		return;
 	BUG_ON(!smp && (num_online_cpus() > 1));
 
-	spin_lock_irqsave(&smp_alt, flags);
+	spin_lock(&smp_alt);
 
 	/*
 	 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -379,7 +379,7 @@ void alternatives_smp_switch(int smp)
 						mod->text, mod->text_end);
 	}
 	smp_mode = smp;
-	spin_unlock_irqrestore(&smp_alt, flags);
+	spin_unlock(&smp_alt);
 }
 
 #endif
@@ -407,7 +407,7 @@ void apply_paravirt(struct paravirt_patc
 
 		/* Pad the rest with nops */
 		add_nops(insnbuf + used, p->len - used);
-		text_poke(p->instr, insnbuf, p->len);
+		text_poke_early(p->instr, insnbuf, p->len);
 	}
 }
 extern struct paravirt_patch_site __start_parainstructions[],
@@ -416,8 +416,6 @@ extern struct paravirt_patch_site __star
 
 void __init alternative_instructions(void)
 {
-	unsigned long flags;
-
 	/* The patching is not fully atomic, so try to avoid local interruptions
 	   that might execute the to be patched code.
 	   Other CPUs are not running. */
@@ -426,7 +424,6 @@ void __init alternative_instructions(voi
 	stop_mce();
 #endif
 
-	local_irq_save(flags);
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
 
 	/* switch to patch-once-at-boottime-only mode and free the
@@ -458,7 +455,6 @@ void __init alternative_instructions(voi
 	}
 #endif
  	apply_paravirt(__parainstructions, __parainstructions_end);
-	local_irq_restore(flags);
 
 	if (smp_alt_once)
 		free_init_pages("SMP alternatives",
@@ -471,18 +467,64 @@ void __init alternative_instructions(voi
 #endif
 }
 
-/*
- * Warning:
+/**
+ * text_poke_early - Update instructions on a live kernel at boot time
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
  * When you use this code to patch more than one byte of an instruction
  * you need to make sure that other CPUs cannot execute this code in parallel.
- * Also no thread must be currently preempted in the middle of these instructions.
- * And on the local CPU you need to be protected again NMI or MCE handlers
- * seeing an inconsistent instruction while you patch.
+ * Also no thread must be currently preempted in the middle of these
+ * instructions. And on the local CPU you need to be protected again NMI or MCE
+ * handlers seeing an inconsistent instruction while you patch.
  */
-void __kprobes text_poke(void *addr, unsigned char *opcode, int len)
+void *text_poke_early(void *addr, const void *opcode, size_t len)
 {
+	unsigned long flags;
+	local_irq_save(flags);
 	memcpy(addr, opcode, len);
+	local_irq_restore(flags);
+	sync_core();
+	/* Could also do a CLFLUSH here to speed up CPU recovery; but
+	   that causes hangs on some VIA CPUs. */
+	return addr;
+}
+
+/**
+ * text_poke - Update instructions on a live kernel
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Only atomic text poke/set should be allowed when not doing early patching.
+ * It means the size must be writable atomically and the address must be aligned
+ * in a way that permits an atomic write. It also makes sure we fit on a single
+ * page.
+ */
+void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
+{
+	unsigned long flags;
+	char *vaddr;
+	int nr_pages = 2;
+
+	BUG_ON(len > sizeof(long));
+	BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1))
+		- ((long)addr & ~(sizeof(long) - 1)));
+	{
+		struct page *pages[2] = { virt_to_page(addr),
+			virt_to_page(addr + PAGE_SIZE) };
+		if (!pages[1])
+			nr_pages = 1;
+		vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+		WARN_ON(!vaddr);
+		local_irq_save(flags);
+		memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
+		local_irq_restore(flags);
+		vunmap(vaddr);
+	}
 	sync_core();
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
+	return addr;
 }
Index: linux-2.6-lttng.mm/include/asm-x86/alternative.h
===================================================================
--- linux-2.6-lttng.mm.orig/include/asm-x86/alternative.h	2008-04-09 11:02:43.000000000 -0400
+++ linux-2.6-lttng.mm/include/asm-x86/alternative.h	2008-04-09 11:03:04.000000000 -0400
@@ -156,6 +156,27 @@ apply_paravirt(struct paravirt_patch_sit
 #define __parainstructions_end	NULL
 #endif
 
-extern void text_poke(void *addr, unsigned char *opcode, int len);
+extern void add_nops(void *insns, unsigned int len);
+
+/*
+ * Clear and restore the kernel write-protection flag on the local CPU.
+ * Allows the kernel to edit read-only pages.
+ * Side-effect: any interrupt handler running between save and restore will have
+ * the ability to write to read-only pages.
+ *
+ * Warning:
+ * Code patching in the UP case is safe if NMIs and MCE handlers are stopped and
+ * no thread can be preempted in the instructions being modified (no iret to an
+ * invalid instruction possible) or if the instructions are changed from a
+ * consistent state to another consistent state atomically.
+ * More care must be taken when modifying code in the SMP case because of
+ * Intel's errata.
+ * On the local CPU you need to be protected again NMI or MCE handlers seeing an
+ * inconsistent instruction while you patch.
+ * The _early version expects the memory to already be RW.
+ */
+
+extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void *text_poke_early(void *addr, const void *opcode, size_t len);
 
 #endif /* _ASM_X86_ALTERNATIVE_H */

-- 
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F  BA06 3F25 A8FE 3BAE 9A68

  parent reply	other threads:[~2008-04-09 16:25 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-04-09 15:08 [patch 00/17] Text Edit Lock and Immediate Values for 2.6.25-rc8-mm1 Mathieu Desnoyers
2008-04-09 15:08 ` [patch 01/17] Kprobes - use a mutex to protect the instruction pages list Mathieu Desnoyers
2008-04-09 20:08   ` Masami Hiramatsu
2008-04-09 15:08 ` [patch 02/17] Kprobes - do not use kprobes mutex in arch code Mathieu Desnoyers
2008-04-09 20:08   ` Masami Hiramatsu
2008-04-09 15:08 ` [patch 03/17] Kprobes - declare kprobe_mutex static Mathieu Desnoyers
2008-04-09 20:08   ` Masami Hiramatsu
2008-04-09 15:08 ` Mathieu Desnoyers [this message]
2008-04-09 15:08 ` [patch 05/17] x86 Fix text_poke for vmalloced pages Mathieu Desnoyers
2008-04-09 15:08 ` [patch 06/17] x86 - Enhance DEBUG_RODATA support for hotplug and kprobes Mathieu Desnoyers
2008-04-09 15:08 ` [patch 07/17] Text Edit Lock - Architecture Independent Code Mathieu Desnoyers
2008-04-09 15:08 ` [patch 08/17] Text Edit Lock - kprobes architecture independent support Mathieu Desnoyers
2008-04-09 15:08 ` [patch 09/17] Add all cpus option to stop machine run Mathieu Desnoyers
2008-04-09 18:10   ` Alexey Dobriyan
2008-04-09 18:24     ` Andi Kleen
2008-04-10  3:34       ` Rusty Russell
2008-04-10  4:26       ` KOSAKI Motohiro
2008-04-09 18:54     ` Mathieu Desnoyers
2008-04-09 15:08 ` [patch 10/17] Immediate Values - Architecture Independent Code Mathieu Desnoyers
2008-04-09 15:08 ` [patch 11/17] Implement immediate update via stop_machine_run Mathieu Desnoyers
2008-04-10  8:04   ` KOSAKI Motohiro
2008-04-10 20:01     ` Mathieu Desnoyers
2008-04-11  4:50       ` KOSAKI Motohiro
2008-04-09 15:08 ` [patch 12/17] Immediate Values - Kconfig menu in EMBEDDED Mathieu Desnoyers
2008-04-10  3:23   ` Rusty Russell
2008-04-10 19:32     ` [patch 12/17] Immediate Values - Kconfig menu in EMBEDDED (updated) Mathieu Desnoyers
2008-04-10 21:54       ` Rusty Russell
2008-04-14 23:52         ` Mathieu Desnoyers
2008-04-09 15:08 ` [patch 13/17] Immediate Values - x86 Optimization Mathieu Desnoyers
2008-04-09 18:01   ` H. Peter Anvin
2008-04-09 19:08     ` Mathieu Desnoyers
2008-04-09 22:33       ` H. Peter Anvin
2008-04-10  0:42         ` Mathieu Desnoyers
2008-04-10  0:47           ` H. Peter Anvin
2008-04-09 20:21     ` [patch 13/17] Immediate Values - x86 Optimization (updated) Mathieu Desnoyers
2008-04-09 22:33       ` H. Peter Anvin
2008-04-09 23:15         ` Mathieu Desnoyers
2008-04-09 15:08 ` [patch 14/17] Add text_poke and sync_core to powerpc Mathieu Desnoyers
2008-04-09 15:08 ` [patch 15/17] Immediate Values - Powerpc Optimization Mathieu Desnoyers
2008-04-09 15:08 ` [patch 16/17] Immediate Values - Documentation Mathieu Desnoyers
2008-04-10  3:33   ` Rusty Russell
2008-04-11  1:16     ` Mathieu Desnoyers
2008-04-11 15:06       ` Rusty Russell
2008-04-15  0:12         ` Mathieu Desnoyers
2008-04-11 13:44     ` [RFC PATCH] Immediate Values Support init Mathieu Desnoyers
2008-04-09 15:08 ` [patch 17/17] Scheduler Profiling - Use Immediate Values Mathieu Desnoyers
2008-04-10  4:23 ` [patch 00/17] Text Edit Lock and Immediate Values for 2.6.25-rc8-mm1 KOSAKI Motohiro
2008-04-10  7:31 ` Takashi Nishiie

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080409152047.821224339@polymtl.ca \
    --to=mathieu.desnoyers@polymtl.ca \
    --cc=akpm@linux-foundation.org \
    --cc=andi@firstfloor.org \
    --cc=hpa@zytor.com \
    --cc=jeremy@goop.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=mingo@redhat.com \
    --cc=pageexec@freemail.hu \
    --cc=rusty@rustcorp.com.au \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox