* [patch 6/8] Immediate Values - Powerpc Optimization
2007-08-12 15:07 [patch 0/8] Immediate Values Mathieu Desnoyers
@ 2007-08-12 15:07 ` Mathieu Desnoyers
0 siblings, 0 replies; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-08-12 15:07 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Christoph Hellwig
[-- Attachment #1: immediate-values-powerpc-optimization.patch --]
[-- Type: text/plain, Size: 8389 bytes --]
PowerPC optimization of the immediate values which uses a li instruction,
patched with an immediate value.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Christoph Hellwig <hch@infradead.org>
---
arch/powerpc/kernel/Makefile | 1
arch/powerpc/kernel/immediate.c | 103 ++++++++++++++++++++++++++++++++
include/asm-powerpc/immediate.h | 127 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 231 insertions(+)
Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2007-08-07 13:33:13.000000000 -0400
@@ -0,0 +1,127 @@
+#ifndef _ASM_POWERPC_IMMEDIATE_H
+#define _ASM_POWERPC_IMMEDIATE_H
+
+/*
+ * Immediate values. PowerPC architecture optimizations.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <asm/asm-compat.h>
+
+struct module;
+
+struct __immediate {
+ long var; /* Identifier variable of the immediate value */
+ long immediate; /*
+ * Pointer to the memory location that holds
+ * the immediate value within the load immediate
+ * instruction.
+ */
+ long size; /* Type size. */
+};
+
+/**
+ * immediate_read - read immediate variable
+ * @var: pointer of type immediate_*_t
+ *
+ * Reads the value of @var.
+ * Optimized version of the immediate.
+ * Do not use in __init and __exit functions. Use _immediate_read() instead.
+ * Makes sure the 2 bytes update will be atomic by aligning the immediate
+ * value. Use a normal memory read for the 4 bytes immediate because there is no
+ * way to atomically update it without using a seqlock read side, which would
+ * cost more in term of total i-cache and d-cache space than a simple memory
+ * read.
+ */
+#define immediate_read(var) \
+ ({ \
+ __typeof__((var)->value) value; \
+ switch (sizeof(value)) { \
+ case 1: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+3), 1;\n\t" \
+ ".previous;\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ case 2: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+2), 2;\n\t" \
+ ".previous;\n\t" \
+ ".align 2\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ default: \
+ value = (var)->value; \
+ break; \
+ }; \
+ value; \
+ })
+
+/**
+ * immediate_set - set immediate variable (with locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var, taking the module_mutex if required by
+ * the architecture.
+ */
+#define immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(1);
+
+/**
+ * _immediate_set - set immediate variable (without locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Must be called with module_mutex held.
+ */
+#define _immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(0);
+
+/**
+ * immediate_set_early - set immediate variable at early boot
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Should be used for early boot updates.
+ */
+#define immediate_set_early(var, i) \
+ (var)->value = (i); \
+ immediate_update_early();
+
+/**
+ * immediate_if - if () statement depending on an immediate value
+ * @var: pointer of type immediate_*_t
+ *
+ * Use as an if () statement depending on an immediate value.
+ * Do not use in __init and __exit functions. Use _immediate_if() instead.
+ * Branch depending on an immediate value. Could eventually be optimized further
+ * by improving gcc to give the ability to patch a jump instruction instead of
+ * the value it depends on.
+ */
+#define immediate_if(var) if (unlikely(immediate_read(var)))
+
+/*
+ * Internal update functions.
+ */
+extern void immediate_update(int lock);
+extern void module_immediate_setup(struct module *mod);
+extern void immediate_update_early(void);
+extern int arch_immediate_update(const struct __immediate *immediate);
+extern void arch_immediate_update_early(const struct __immediate *immediate);
+
+#endif /* _ASM_POWERPC_IMMEDIATE_H */
Index: linux-2.6-lttng/arch/powerpc/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/arch/powerpc/kernel/Makefile 2007-08-07 13:32:35.000000000 -0400
+++ linux-2.6-lttng/arch/powerpc/kernel/Makefile 2007-08-07 13:32:53.000000000 -0400
@@ -103,3 +103,4 @@ obj-$(CONFIG_PPC64) += $(obj64-y)
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_PPC64) += entry_64.o
+obj-$(CONFIG_IMMEDIATE) += immediate.o
Index: linux-2.6-lttng/arch/powerpc/kernel/immediate.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/arch/powerpc/kernel/immediate.c 2007-08-07 13:33:20.000000000 -0400
@@ -0,0 +1,103 @@
+/*
+ * Powerpc optimized immediate values enabling/disabling.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ */
+
+#include <linux/module.h>
+#include <linux/immediate.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+#define LI_OPCODE_LEN 2
+
+/**
+ * arch_immediate_update - update one immediate value
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value. Must be called with immediate_mutex held.
+ */
+int arch_immediate_update(const struct __immediate *immediate)
+{
+#ifdef CONFIG_KPROBES
+ kprobe_opcode_t *insn;
+ /*
+ * Fail if a kprobe has been set on this instruction.
+ * (TODO: we could eventually do better and modify all the (possibly
+ * nested) kprobes for this site if kprobes had an API for this.
+ */
+ switch (immediate->size) {
+ case 1: /* The uint8_t points to the 3rd byte of the
+ * instruction */
+ insn = (void*)(immediate->immediate - 1 - LI_OPCODE_LEN);
+ break;
+ case 2: insn = (void*)(immediate->immediate - LI_OPCODE_LEN);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (unlikely(*insn == BREAKPOINT_INSTRUCTION)) {
+ printk(KERN_WARNING "Immediate value in conflict with kprobe. "
+ "Variable at %p, "
+ "instruction at %p, size %lu\n",
+ (void*)immediate->immediate,
+ (void*)immediate->var, immediate->size);
+ return -EBUSY;
+ }
+#endif
+
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return 0;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return 0;
+ break;
+ default:return -EINVAL;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+ return 0;
+}
+
+/**
+ * arch_immediate_update_early - update one immediate value at boot time
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value at boot time.
+ * We can use flush_icache_range, since the cpu identification has been done in
+ * the early_init stage.
+ */
+void __init arch_immediate_update_early(const struct __immediate *immediate)
+{
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return;
+ break;
+ default:return;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+}
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 6/8] Immediate Values - Powerpc Optimization
2007-08-20 20:23 [patch 0/8] Immediate Values Mathieu Desnoyers
@ 2007-08-20 20:24 ` Mathieu Desnoyers
0 siblings, 0 replies; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-08-20 20:24 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Christoph Hellwig
[-- Attachment #1: immediate-values-powerpc-optimization.patch --]
[-- Type: text/plain, Size: 8389 bytes --]
PowerPC optimization of the immediate values which uses a li instruction,
patched with an immediate value.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Christoph Hellwig <hch@infradead.org>
---
arch/powerpc/kernel/Makefile | 1
arch/powerpc/kernel/immediate.c | 103 ++++++++++++++++++++++++++++++++
include/asm-powerpc/immediate.h | 127 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 231 insertions(+)
Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2007-08-07 13:33:13.000000000 -0400
@@ -0,0 +1,127 @@
+#ifndef _ASM_POWERPC_IMMEDIATE_H
+#define _ASM_POWERPC_IMMEDIATE_H
+
+/*
+ * Immediate values. PowerPC architecture optimizations.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <asm/asm-compat.h>
+
+struct module;
+
+struct __immediate {
+ long var; /* Identifier variable of the immediate value */
+ long immediate; /*
+ * Pointer to the memory location that holds
+ * the immediate value within the load immediate
+ * instruction.
+ */
+ long size; /* Type size. */
+};
+
+/**
+ * immediate_read - read immediate variable
+ * @var: pointer of type immediate_*_t
+ *
+ * Reads the value of @var.
+ * Optimized version of the immediate.
+ * Do not use in __init and __exit functions. Use _immediate_read() instead.
+ * Makes sure the 2 bytes update will be atomic by aligning the immediate
+ * value. Use a normal memory read for the 4 bytes immediate because there is no
+ * way to atomically update it without using a seqlock read side, which would
+ * cost more in term of total i-cache and d-cache space than a simple memory
+ * read.
+ */
+#define immediate_read(var) \
+ ({ \
+ __typeof__((var)->value) value; \
+ switch (sizeof(value)) { \
+ case 1: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+3), 1;\n\t" \
+ ".previous;\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ case 2: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+2), 2;\n\t" \
+ ".previous;\n\t" \
+ ".align 2\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ default: \
+ value = (var)->value; \
+ break; \
+ }; \
+ value; \
+ })
+
+/**
+ * immediate_set - set immediate variable (with locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var, taking the module_mutex if required by
+ * the architecture.
+ */
+#define immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(1);
+
+/**
+ * _immediate_set - set immediate variable (without locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Must be called with module_mutex held.
+ */
+#define _immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(0);
+
+/**
+ * immediate_set_early - set immediate variable at early boot
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Should be used for early boot updates.
+ */
+#define immediate_set_early(var, i) \
+ (var)->value = (i); \
+ immediate_update_early();
+
+/**
+ * immediate_if - if () statement depending on an immediate value
+ * @var: pointer of type immediate_*_t
+ *
+ * Use as an if () statement depending on an immediate value.
+ * Do not use in __init and __exit functions. Use _immediate_if() instead.
+ * Branch depending on an immediate value. Could eventually be optimized further
+ * by improving gcc to give the ability to patch a jump instruction instead of
+ * the value it depends on.
+ */
+#define immediate_if(var) if (unlikely(immediate_read(var)))
+
+/*
+ * Internal update functions.
+ */
+extern void immediate_update(int lock);
+extern void module_immediate_setup(struct module *mod);
+extern void immediate_update_early(void);
+extern int arch_immediate_update(const struct __immediate *immediate);
+extern void arch_immediate_update_early(const struct __immediate *immediate);
+
+#endif /* _ASM_POWERPC_IMMEDIATE_H */
Index: linux-2.6-lttng/arch/powerpc/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/arch/powerpc/kernel/Makefile 2007-08-07 13:32:35.000000000 -0400
+++ linux-2.6-lttng/arch/powerpc/kernel/Makefile 2007-08-07 13:32:53.000000000 -0400
@@ -103,3 +103,4 @@ obj-$(CONFIG_PPC64) += $(obj64-y)
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_PPC64) += entry_64.o
+obj-$(CONFIG_IMMEDIATE) += immediate.o
Index: linux-2.6-lttng/arch/powerpc/kernel/immediate.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/arch/powerpc/kernel/immediate.c 2007-08-07 13:33:20.000000000 -0400
@@ -0,0 +1,103 @@
+/*
+ * Powerpc optimized immediate values enabling/disabling.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ */
+
+#include <linux/module.h>
+#include <linux/immediate.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+#define LI_OPCODE_LEN 2
+
+/**
+ * arch_immediate_update - update one immediate value
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value. Must be called with immediate_mutex held.
+ */
+int arch_immediate_update(const struct __immediate *immediate)
+{
+#ifdef CONFIG_KPROBES
+ kprobe_opcode_t *insn;
+ /*
+ * Fail if a kprobe has been set on this instruction.
+ * (TODO: we could eventually do better and modify all the (possibly
+ * nested) kprobes for this site if kprobes had an API for this.
+ */
+ switch (immediate->size) {
+ case 1: /* The uint8_t points to the 3rd byte of the
+ * instruction */
+ insn = (void*)(immediate->immediate - 1 - LI_OPCODE_LEN);
+ break;
+ case 2: insn = (void*)(immediate->immediate - LI_OPCODE_LEN);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (unlikely(*insn == BREAKPOINT_INSTRUCTION)) {
+ printk(KERN_WARNING "Immediate value in conflict with kprobe. "
+ "Variable at %p, "
+ "instruction at %p, size %lu\n",
+ (void*)immediate->immediate,
+ (void*)immediate->var, immediate->size);
+ return -EBUSY;
+ }
+#endif
+
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return 0;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return 0;
+ break;
+ default:return -EINVAL;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+ return 0;
+}
+
+/**
+ * arch_immediate_update_early - update one immediate value at boot time
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value at boot time.
+ * We can use flush_icache_range, since the cpu identification has been done in
+ * the early_init stage.
+ */
+void __init arch_immediate_update_early(const struct __immediate *immediate)
+{
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return;
+ break;
+ default:return;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+}
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 6/8] Immediate Values - Powerpc Optimization
2007-08-27 15:59 [patch 0/8] Immediate Values Mathieu Desnoyers
@ 2007-08-27 15:59 ` Mathieu Desnoyers
0 siblings, 0 replies; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-08-27 15:59 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Christoph Hellwig
[-- Attachment #1: immediate-values-powerpc-optimization.patch --]
[-- Type: text/plain, Size: 8389 bytes --]
PowerPC optimization of the immediate values which uses a li instruction,
patched with an immediate value.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Christoph Hellwig <hch@infradead.org>
---
arch/powerpc/kernel/Makefile | 1
arch/powerpc/kernel/immediate.c | 103 ++++++++++++++++++++++++++++++++
include/asm-powerpc/immediate.h | 127 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 231 insertions(+)
Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2007-08-27 11:49:21.000000000 -0400
@@ -0,0 +1,127 @@
+#ifndef _ASM_POWERPC_IMMEDIATE_H
+#define _ASM_POWERPC_IMMEDIATE_H
+
+/*
+ * Immediate values. PowerPC architecture optimizations.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <asm/asm-compat.h>
+
+struct module;
+
+struct __immediate {
+ long var; /* Identifier variable of the immediate value */
+ long immediate; /*
+ * Pointer to the memory location that holds
+ * the immediate value within the load immediate
+ * instruction.
+ */
+ long size; /* Type size. */
+};
+
+/**
+ * immediate_read - read immediate variable
+ * @var: pointer of type immediate_*_t
+ *
+ * Reads the value of @var.
+ * Optimized version of the immediate.
+ * Do not use in __init and __exit functions. Use _immediate_read() instead.
+ * Makes sure the 2 bytes update will be atomic by aligning the immediate
+ * value. Use a normal memory read for the 4 bytes immediate because there is no
+ * way to atomically update it without using a seqlock read side, which would
+ * cost more in term of total i-cache and d-cache space than a simple memory
+ * read.
+ */
+#define immediate_read(var) \
+ ({ \
+ __typeof__((var)->value) value; \
+ switch (sizeof(value)) { \
+ case 1: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+3), 1;\n\t" \
+ ".previous;\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ case 2: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+2), 2;\n\t" \
+ ".previous;\n\t" \
+ ".align 2\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ default: \
+ value = (var)->value; \
+ break; \
+ }; \
+ value; \
+ })
+
+/**
+ * immediate_set - set immediate variable (with locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var, taking the module_mutex if required by
+ * the architecture.
+ */
+#define immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(1);
+
+/**
+ * _immediate_set - set immediate variable (without locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Must be called with module_mutex held.
+ */
+#define _immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(0);
+
+/**
+ * immediate_set_early - set immediate variable at early boot
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Should be used for early boot updates.
+ */
+#define immediate_set_early(var, i) \
+ (var)->value = (i); \
+ immediate_update_early();
+
+/**
+ * immediate_if - if () statement depending on an immediate value
+ * @var: pointer of type immediate_*_t
+ *
+ * Use as an if () statement depending on an immediate value.
+ * Do not use in __init and __exit functions. Use _immediate_if() instead.
+ * Branch depending on an immediate value. Could eventually be optimized further
+ * by improving gcc to give the ability to patch a jump instruction instead of
+ * the value it depends on.
+ */
+#define immediate_if(var) if (unlikely(immediate_read(var)))
+
+/*
+ * Internal update functions.
+ */
+extern void immediate_update(int lock);
+extern void module_immediate_setup(struct module *mod);
+extern void immediate_update_early(void);
+extern int arch_immediate_update(const struct __immediate *immediate);
+extern void arch_immediate_update_early(const struct __immediate *immediate);
+
+#endif /* _ASM_POWERPC_IMMEDIATE_H */
Index: linux-2.6-lttng/arch/powerpc/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/arch/powerpc/kernel/Makefile 2007-08-27 11:16:08.000000000 -0400
+++ linux-2.6-lttng/arch/powerpc/kernel/Makefile 2007-08-27 11:49:21.000000000 -0400
@@ -104,3 +104,4 @@ obj-$(CONFIG_PPC64) += $(obj64-y)
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_PPC64) += entry_64.o
+obj-$(CONFIG_IMMEDIATE) += immediate.o
Index: linux-2.6-lttng/arch/powerpc/kernel/immediate.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/arch/powerpc/kernel/immediate.c 2007-08-27 11:49:21.000000000 -0400
@@ -0,0 +1,103 @@
+/*
+ * Powerpc optimized immediate values enabling/disabling.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ */
+
+#include <linux/module.h>
+#include <linux/immediate.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+#define LI_OPCODE_LEN 2
+
+/**
+ * arch_immediate_update - update one immediate value
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value. Must be called with immediate_mutex held.
+ */
+int arch_immediate_update(const struct __immediate *immediate)
+{
+#ifdef CONFIG_KPROBES
+ kprobe_opcode_t *insn;
+ /*
+ * Fail if a kprobe has been set on this instruction.
+ * (TODO: we could eventually do better and modify all the (possibly
+ * nested) kprobes for this site if kprobes had an API for this.
+ */
+ switch (immediate->size) {
+ case 1: /* The uint8_t points to the 3rd byte of the
+ * instruction */
+ insn = (void*)(immediate->immediate - 1 - LI_OPCODE_LEN);
+ break;
+ case 2: insn = (void*)(immediate->immediate - LI_OPCODE_LEN);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (unlikely(*insn == BREAKPOINT_INSTRUCTION)) {
+ printk(KERN_WARNING "Immediate value in conflict with kprobe. "
+ "Variable at %p, "
+ "instruction at %p, size %lu\n",
+ (void*)immediate->immediate,
+ (void*)immediate->var, immediate->size);
+ return -EBUSY;
+ }
+#endif
+
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return 0;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return 0;
+ break;
+ default:return -EINVAL;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+ return 0;
+}
+
+/**
+ * arch_immediate_update_early - update one immediate value at boot time
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value at boot time.
+ * We can use flush_icache_range, since the cpu identification has been done in
+ * the early_init stage.
+ */
+void __init arch_immediate_update_early(const struct __immediate *immediate)
+{
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return;
+ break;
+ default:return;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+}
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 0/8] Immediate Values for 2.6.23-rc4-mm1
@ 2007-09-06 20:02 Mathieu Desnoyers
2007-09-06 20:02 ` [patch 1/8] Immediate Values - Global Modules List and Module Mutex Mathieu Desnoyers
` (7 more replies)
0 siblings, 8 replies; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-06 20:02 UTC (permalink / raw)
To: akpm, linux-kernel
Hi Andrew,
Here are the clean/tested/updated immediate values for 2.6.23-rc4-mm1. They
should be all good now.
Mathieu
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-06 20:02 [patch 0/8] Immediate Values for 2.6.23-rc4-mm1 Mathieu Desnoyers
@ 2007-09-06 20:02 ` Mathieu Desnoyers
2007-09-08 7:28 ` Alexey Dobriyan
2007-09-06 20:02 ` [patch 2/8] Immediate Values - Architecture Independent Code Mathieu Desnoyers
` (6 subsequent siblings)
7 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-06 20:02 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers
[-- Attachment #1: immediate-values-global-modules-list-and-mutex.patch --]
[-- Type: text/plain, Size: 2184 bytes --]
Remove "static" from module_mutex and the modules list so it can be used by
other builtin objects in the kernel. Otherwise, every code depending on the
module list would have to be put in kernel/module.c. Since the immediate values
depends on the module list but can be considered as logically different, it
makes sense to implement them in their own file.
The alternative to this would be to disable preemption in code path that need
such synchronization, so they can be protected against module unload by
stop_machine(), but not being able to sleep within while needing such
synchronization is limiting.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
include/linux/module.h | 4 ++++
kernel/module.c | 4 ++--
2 files changed, 6 insertions(+), 2 deletions(-)
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c 2007-08-07 11:03:56.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c 2007-08-07 11:40:22.000000000 -0400
@@ -64,8 +64,8 @@ extern int module_sysfs_initialized;
/* List of modules, protected by module_mutex or preempt_disable
* (add/delete uses stop_machine). */
-static DEFINE_MUTEX(module_mutex);
-static LIST_HEAD(modules);
+DEFINE_MUTEX(module_mutex);
+LIST_HEAD(modules);
static DECLARE_MUTEX(notify_mutex);
static BLOCKING_NOTIFIER_HEAD(module_notify_list);
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h 2007-08-07 11:03:48.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h 2007-08-07 11:39:55.000000000 -0400
@@ -60,6 +60,10 @@ struct module_kobject
struct kobject *drivers_dir;
};
+/* Protects the list of modules. */
+extern struct mutex module_mutex;
+extern struct list_head modules;
+
/* These are either module local, or the kernel's dummy ones. */
extern int init_module(void);
extern void cleanup_module(void);
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 2/8] Immediate Values - Architecture Independent Code
2007-09-06 20:02 [patch 0/8] Immediate Values for 2.6.23-rc4-mm1 Mathieu Desnoyers
2007-09-06 20:02 ` [patch 1/8] Immediate Values - Global Modules List and Module Mutex Mathieu Desnoyers
@ 2007-09-06 20:02 ` Mathieu Desnoyers
2007-09-06 20:02 ` [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED Mathieu Desnoyers
` (5 subsequent siblings)
7 siblings, 0 replies; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-06 20:02 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers
[-- Attachment #1: immediate-values-architecture-independent-code.patch --]
[-- Type: text/plain, Size: 15435 bytes --]
Immediate values are used as read mostly variables that are rarely updated. They
use code patching to modify the values inscribed in the instruction stream. It
provides a way to save precious cache lines that would otherwise have to be used
by these variables.
There is a generic _immediate_read() version, which uses standard global
variables, and optimized per architecture immediate_read() implementations,
which use a load immediate to remove a data cache hit. When the immediate values
functionnality is disabled in the kernel, it falls back to global variables.
It adds a new rodata section "__immediate" to place the pointers to the enable
value. Immediate values activation functions sits in kernel/immediate.c.
Immediate values refer to the memory address of a previously declared integer.
This integer holds the information about the state of the immediate values
associated, and must be accessed through the API found in linux/immediate.h.
At module load time, each immediate value is checked to see if it must be
enabled. It would be the case if the variable they refer to is exported from
another module and already enabled.
In the early stages of start_kernel(), the immediate values are updated to
reflect the state of the variable they refer to.
* Why should this be merged *
It improves performances on heavy memory I/O workloads.
An interesting result shows the potential this infrastructure has by
showing the slowdown a simple system call such as getppid() suffers when it is
used under heavy user-space cache trashing:
Random walk L1 and L2 trashing surrounding a getppid() call:
(note: in this test, do_syscal_trace was taken at each system call, see
Documentation/immediate.txt in these patches for details)
- No memory pressure : getppid() takes 1573 cycles
- With memory pressure : getppid() takes 15589 cycles
We therefore have a slowdown of 10 times just to get the kernel variables from
memory. Another test on the same architecture (Intel P4) measured the memory
latency to be 559 cycles. Therefore, each cache line removed from the hot path
would improve the syscall time of 3.5% in these conditions.
Changelog:
- section __immediate is already SHF_ALLOC
- Because of the wonders of ELF, section 0 has sh_addr and sh_size 0. So
the if (immediateindex) is unnecessary here.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
include/asm-generic/vmlinux.lds.h | 7 ++
include/linux/immediate.h | 119 +++++++++++++++++++++++++++++++++++
include/linux/module.h | 6 +
init/main.c | 2
kernel/Makefile | 1
kernel/immediate.c | 128 ++++++++++++++++++++++++++++++++++++++
kernel/module.c | 10 ++
7 files changed, 273 insertions(+)
Index: linux-2.6-lttng/include/linux/immediate.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/linux/immediate.h 2007-09-06 15:02:50.000000000 -0400
@@ -0,0 +1,119 @@
+#ifndef _LINUX_IMMEDIATE_H
+#define _LINUX_IMMEDIATE_H
+
+/*
+ * Immediate values, can be updated at runtime and save cache lines.
+ *
+ * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#ifdef CONFIG_IMMEDIATE
+#include <asm/immediate.h>
+#else
+/*
+ * Generic immediate values: a simple, standard, memory load.
+ */
+
+struct module;
+
+/**
+ * immediate_read - read immediate variable
+ * @var: pointer of type immediate_*_t
+ *
+ * Reads the value of @var.
+ */
+#define immediate_read(var) _immediate_read(var)
+
+/**
+ * immediate_set - set immediate variable (with locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var, taking the module_mutex if required by
+ * the architecture.
+ */
+#define immediate_set(var, i) ((var)->value = (i))
+
+/**
+ * _immediate_set - set immediate variable (without locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Must be called with module_mutex held.
+ */
+#define _immediate_set(var, i) immediate_set(var, i)
+
+/**
+ * immediate_set_early - set immediate variable at early boot
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Should be used for early boot updates.
+ */
+#define immediate_set_early(var, i) immediate_set(var, i)
+
+/**
+ * immediate_if - if () statement depending on an immediate value
+ * @var: pointer of type immediate_*_t
+ *
+ * Use as an if () statement depending on an immediate value.
+ */
+#define immediate_if(var) if (immediate_read(var))
+
+/*
+ * Internal update functions.
+ */
+static inline void module_immediate_setup(struct module *mod) { }
+static inline void immediate_update_early(void) { }
+#endif
+
+/**
+ * DEFINE_IMMEDIATE_TYPE - Define an immediate type
+ * @type: type that the immediate should hold
+ * @name: name of the immediate type
+ *
+ * Define new immediate types. Naming scheme is immediate_*_t.
+ * Always access these types with the provided functions.
+ */
+#define DEFINE_IMMEDIATE_TYPE(type, name) \
+ typedef struct { type value; } name
+
+/*
+ * Standard pre-defined immediate types.
+ */
+DEFINE_IMMEDIATE_TYPE(char, immediate_char_t);
+DEFINE_IMMEDIATE_TYPE(short, immediate_short_t);
+DEFINE_IMMEDIATE_TYPE(int, immediate_int_t);
+DEFINE_IMMEDIATE_TYPE(long, immediate_long_t);
+DEFINE_IMMEDIATE_TYPE(void*, immediate_void_ptr_t);
+
+/**
+ * IMMEDIATE_INIT - Static initialization of an immediate variable
+ * @i: required value
+ *
+ * Use this macro to initialize an immediate value to an initial static
+ * value.
+ */
+#define IMMEDIATE_INIT(i) { (i) }
+
+/**
+ * _immediate_read - Read immediate value with standard memory load.
+ * @var: pointer of type immediate_*_t
+ *
+ * Force a data read of the immediate value instead of the immediate value
+ * based mechanism. Useful for __init and __exit section data read.
+ */
+#define _immediate_read(var) (var)->value
+
+/*
+ * _immediate_if - if () statement depending on immediate value (memory load)
+ * @var: pointer of type immediate_*_t
+ *
+ * Force the use of a normal if () statement depending on an immediate value.
+ */
+#define _immediate_if(var) if (_immediate_read(var))
+
+#endif
Index: linux-2.6-lttng/include/asm-generic/vmlinux.lds.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-generic/vmlinux.lds.h 2007-09-06 14:32:10.000000000 -0400
+++ linux-2.6-lttng/include/asm-generic/vmlinux.lds.h 2007-09-06 15:06:36.000000000 -0400
@@ -122,6 +122,13 @@
VMLINUX_SYMBOL(__stop___kcrctab_gpl_future) = .; \
} \
\
+ /* Immediate values: pointers */ \
+ __immediate : AT(ADDR(__immediate) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___immediate) = .; \
+ *(__immediate) \
+ VMLINUX_SYMBOL(__stop___immediate) = .; \
+ } \
+ \
/* Kernel symbol table: strings */ \
__ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
*(__ksymtab_strings) \
Index: linux-2.6-lttng/include/linux/module.h
===================================================================
--- linux-2.6-lttng.orig/include/linux/module.h 2007-09-06 15:02:49.000000000 -0400
+++ linux-2.6-lttng/include/linux/module.h 2007-09-06 15:06:36.000000000 -0400
@@ -15,6 +15,7 @@
#include <linux/stringify.h>
#include <linux/kobject.h>
#include <linux/moduleparam.h>
+#include <linux/immediate.h>
#include <asm/local.h>
#include <asm/module.h>
@@ -374,6 +375,11 @@ struct module
/* The command line arguments (may be mangled). People like
keeping pointers to this stuff */
char *args;
+
+#ifdef CONFIG_IMMEDIATE
+ const struct __immediate *immediate;
+ unsigned int num_immediate;
+#endif
};
#ifndef MODULE_ARCH_INIT
#define MODULE_ARCH_INIT {}
Index: linux-2.6-lttng/kernel/module.c
===================================================================
--- linux-2.6-lttng.orig/kernel/module.c 2007-09-06 15:02:49.000000000 -0400
+++ linux-2.6-lttng/kernel/module.c 2007-09-06 15:07:03.000000000 -0400
@@ -33,6 +33,7 @@
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <linux/errno.h>
+#include <linux/immediate.h>
#include <linux/err.h>
#include <linux/vermagic.h>
#include <linux/notifier.h>
@@ -1717,6 +1718,7 @@ static struct module *load_module(void _
unsigned int unusedcrcindex;
unsigned int unusedgplindex;
unsigned int unusedgplcrcindex;
+ unsigned int immediateindex;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
@@ -1813,6 +1815,7 @@ static struct module *load_module(void _
#ifdef ARCH_UNWIND_SECTION_NAME
unwindex = find_sec(hdr, sechdrs, secstrings, ARCH_UNWIND_SECTION_NAME);
#endif
+ immediateindex = find_sec(hdr, sechdrs, secstrings, "__immediate");
/* Don't keep modinfo section */
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
@@ -1963,6 +1966,11 @@ static struct module *load_module(void _
mod->gpl_future_syms = (void *)sechdrs[gplfutureindex].sh_addr;
if (gplfuturecrcindex)
mod->gpl_future_crcs = (void *)sechdrs[gplfuturecrcindex].sh_addr;
+#ifdef CONFIG_IMMEDIATE
+ mod->immediate = (void *)sechdrs[immediateindex].sh_addr;
+ mod->num_immediate =
+ sechdrs[immediateindex].sh_size / sizeof(*mod->immediate);
+#endif
mod->unused_syms = (void *)sechdrs[unusedindex].sh_addr;
if (unusedcrcindex)
@@ -2028,6 +2036,8 @@ static struct module *load_module(void _
goto nomodsectinfo;
#endif
+ module_immediate_setup(mod);
+
err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
goto cleanup;
Index: linux-2.6-lttng/kernel/immediate.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/kernel/immediate.c 2007-09-06 15:02:50.000000000 -0400
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2007 Mathieu Desnoyers
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/immediate.h>
+#include <linux/memory.h>
+
+extern const struct __immediate __start___immediate[];
+extern const struct __immediate __stop___immediate[];
+
+/*
+ * modules_mutex nests inside immediate_mutex. immediate_mutex protects builtin
+ * immediates and module immediates.
+ */
+static DEFINE_MUTEX(immediate_mutex);
+
+/*
+ * Sets a range of immediates to a enabled state : set the enable bit.
+ */
+static inline void _immediate_update_range(const struct __immediate *begin,
+ const struct __immediate *end)
+{
+ const struct __immediate *iter;
+ int ret;
+
+ for (iter = begin; iter < end; iter++) {
+ mutex_lock(&immediate_mutex);
+ kernel_text_lock();
+ ret = arch_immediate_update(iter);
+ kernel_text_unlock();
+ if (ret)
+ printk(KERN_WARNING "Invalid immediate value. "
+ "Variable at %p, "
+ "instruction at %p, size %lu\n",
+ (void*)iter->immediate,
+ (void*)iter->var, iter->size);
+ mutex_unlock(&immediate_mutex);
+ }
+}
+
+#ifdef CONFIG_MODULES
+/**
+ * module_immediate_setup - Update immediate values in a module
+ * @mod: pointer to the struct module
+ *
+ * Setup the immediate according to the variable upon which it depends. Called
+ * by load_module with module_mutex held. This mutex protects against concurrent
+ * modifications to modules'immediates. Therefore, since
+ * module_immediate_setup() does not modify builtin immediates, it does not need
+ * to take the immediate_mutex.
+ */
+void module_immediate_setup(struct module *mod)
+{
+ _immediate_update_range(mod->immediate,
+ mod->immediate+mod->num_immediate);
+}
+
+/*
+ * immediate mutex nests inside the modules mutex.
+ */
+static inline void immediate_update_modules(int lock)
+{
+ struct module *mod;
+
+ if (lock)
+ mutex_lock(&module_mutex);
+ list_for_each_entry(mod, &modules, list) {
+ if (mod->taints)
+ continue;
+ _immediate_update_range(mod->immediate,
+ mod->immediate + mod->num_immediate);
+ }
+ if (lock)
+ mutex_unlock(&module_mutex);
+}
+#else
+static inline void immediate_update_modules(int lock) { }
+#endif
+
+/**
+ * immediate_update - update all immediate values in the kernel
+ * @lock: should a module_mutex be taken ?
+ *
+ * Iterate on the kernel core and modules to update the immediate values.
+ */
+void immediate_update(int lock)
+{
+ /* Core kernel immediates */
+ _immediate_update_range(__start___immediate, __stop___immediate);
+ /* immediates in modules. */
+ immediate_update_modules(lock);
+}
+EXPORT_SYMBOL_GPL(immediate_update);
+
+static void __init immediate_update_early_range(const struct __immediate *begin,
+ const struct __immediate *end)
+{
+ const struct __immediate *iter;
+
+ for (iter = begin; iter < end; iter++)
+ arch_immediate_update_early(iter);
+}
+
+/**
+ * immediate_update_early - Update immediate values at boot time
+ *
+ * Update the immediate values to the state of the variables they refer to. It
+ * is done before SMP is active, at the very beginning of start_kernel().
+ */
+void __init immediate_update_early(void)
+{
+ immediate_update_early_range(__start___immediate, __stop___immediate);
+}
Index: linux-2.6-lttng/init/main.c
===================================================================
--- linux-2.6-lttng.orig/init/main.c 2007-09-06 14:32:10.000000000 -0400
+++ linux-2.6-lttng/init/main.c 2007-09-06 15:02:50.000000000 -0400
@@ -56,6 +56,7 @@
#include <linux/pid_namespace.h>
#include <linux/device.h>
#include <linux/kthread.h>
+#include <linux/immediate.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -525,6 +526,7 @@ asmlinkage void __init start_kernel(void
unwind_init();
lockdep_init();
container_init_early();
+ immediate_update_early();
local_irq_disable();
early_boot_irqs_off();
Index: linux-2.6-lttng/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/kernel/Makefile 2007-09-06 14:32:10.000000000 -0400
+++ linux-2.6-lttng/kernel/Makefile 2007-09-06 15:05:03.000000000 -0400
@@ -61,6 +61,7 @@ obj-$(CONFIG_SYSCTL) += utsname_sysctl.o
obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o
obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o
obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
+obj-$(CONFIG_IMMEDIATE) += immediate.o
ifneq ($(CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED
2007-09-06 20:02 [patch 0/8] Immediate Values for 2.6.23-rc4-mm1 Mathieu Desnoyers
2007-09-06 20:02 ` [patch 1/8] Immediate Values - Global Modules List and Module Mutex Mathieu Desnoyers
2007-09-06 20:02 ` [patch 2/8] Immediate Values - Architecture Independent Code Mathieu Desnoyers
@ 2007-09-06 20:02 ` Mathieu Desnoyers
2007-09-07 6:49 ` Andi Kleen
2007-09-06 20:02 ` [patch 4/8] Immediate Values - Move Kprobes i386 restore_interrupt to kdebug.h Mathieu Desnoyers
` (4 subsequent siblings)
7 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-06 20:02 UTC (permalink / raw)
To: akpm, linux-kernel
Cc: Mathieu Desnoyers, Adrian Bunk, Andi Kleen, Alexey Dobriyan,
Christoph Hellwig
[-- Attachment #1: immediate-values-kconfig-embedded.patch --]
[-- Type: text/plain, Size: 2258 bytes --]
Immediate values provide a way to use dynamic code patching to update variables
sitting within the instruction stream. It saves caches lines normally used by
static read mostly variables. Enable it by default, but let users disable it
through the EMBEDDED menu with the "Disable immediate values" submenu entry.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Adrian Bunk <bunk@stusta.de>
CC: Andi Kleen <andi@firstfloor.org>
CC: Alexey Dobriyan <adobriyan@gmail.com>
CC: Christoph Hellwig <hch@infradead.org>
---
init/Kconfig | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
Index: linux-2.6-lttng/init/Kconfig
===================================================================
--- linux-2.6-lttng.orig/init/Kconfig 2007-09-04 11:53:59.000000000 -0400
+++ linux-2.6-lttng/init/Kconfig 2007-09-04 12:12:03.000000000 -0400
@@ -407,6 +407,17 @@ config CC_OPTIMIZE_FOR_SIZE
config SYSCTL
bool
+config IMMEDIATE
+ default y if !DISABLE_IMMEDIATE
+ depends on X86_32 || PPC || PPC64
+ bool
+ help
+ Immediate values are used as read mostly variables that are rarely
+ updated. They use code patching to modify the values inscribed in the
+ instruction stream. It provides a way to save precious cache lines
+ that would otherwise have to be used by these variables. Can be
+ disabled through the EMBEDDED menu.
+
menuconfig EMBEDDED
bool "Configure standard kernel features (for small systems)"
help
@@ -645,6 +656,16 @@ config PROC_KPAGEMAP
information on page-level memory usage. Disabling this interface
will reduce the size of the kernel by around 600 bytes.
+config DISABLE_IMMEDIATE
+ default y if EMBEDDED
+ bool "Disable immediate values" if EMBEDDED
+ depends on X86_32 || PPC || PPC64
+ help
+ Disable code patching based immediate values for embedded systems. It
+ consumes slightly more memory and requires to modify the instruction
+ stream each time a variable is updated. Should really be disabled for
+ embedded systems with read-only text.
+
endmenu # General setup
config RT_MUTEXES
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 4/8] Immediate Values - Move Kprobes i386 restore_interrupt to kdebug.h
2007-09-06 20:02 [patch 0/8] Immediate Values for 2.6.23-rc4-mm1 Mathieu Desnoyers
` (2 preceding siblings ...)
2007-09-06 20:02 ` [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED Mathieu Desnoyers
@ 2007-09-06 20:02 ` Mathieu Desnoyers
2007-09-07 10:31 ` Ananth N Mavinakayanahalli
2007-09-06 20:02 ` [patch 5/8] Immediate Values - i386 Optimization Mathieu Desnoyers
` (3 subsequent siblings)
7 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-06 20:02 UTC (permalink / raw)
To: akpm, linux-kernel
Cc: Mathieu Desnoyers, Christoph Hellwig, prasanna, ananth,
anil.s.keshavamurthy, davem
[-- Attachment #1: immediate-values-move-kprobes-i386-restore-interrupt-to-kdebug-h.patch --]
[-- Type: text/plain, Size: 2294 bytes --]
Since the breakpoint handler is useful both to kprobes and immediate values, it
makes sense to make the required restore_interrupt() available through
asm-i386/kdebug.h.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Christoph Hellwig <hch@infradead.org>
CC: prasanna@in.ibm.com
CC: ananth@in.ibm.com
CC: anil.s.keshavamurthy@intel.com
CC: davem@davemloft.net
---
include/asm-i386/kdebug.h | 12 ++++++++++++
include/asm-i386/kprobes.h | 9 ---------
2 files changed, 12 insertions(+), 9 deletions(-)
Index: linux-2.6-lttng/include/asm-i386/kdebug.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-i386/kdebug.h 2007-09-04 11:52:37.000000000 -0400
+++ linux-2.6-lttng/include/asm-i386/kdebug.h 2007-09-04 12:12:06.000000000 -0400
@@ -6,6 +6,9 @@
* from x86_64 architecture.
*/
+#include <asm/ptrace.h>
+#include <asm/system.h>
+
struct pt_regs;
/* Grossly misnamed. */
@@ -25,4 +28,13 @@ enum die_val {
DIE_PAGE_FAULT_NO_CONTEXT,
};
+/* trap3/1 are intr gates for kprobes. So, restore the status of IF,
+ * if necessary, before executing the original int3/1 (trap) handler.
+ */
+static inline void restore_interrupts(struct pt_regs *regs)
+{
+ if (regs->eflags & IF_MASK)
+ local_irq_enable();
+}
+
#endif
Index: linux-2.6-lttng/include/asm-i386/kprobes.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-i386/kprobes.h 2007-09-04 11:53:26.000000000 -0400
+++ linux-2.6-lttng/include/asm-i386/kprobes.h 2007-09-04 12:12:06.000000000 -0400
@@ -79,15 +79,6 @@ struct kprobe_ctlblk {
struct prev_kprobe prev_kprobe;
};
-/* trap3/1 are intr gates for kprobes. So, restore the status of IF,
- * if necessary, before executing the original int3/1 (trap) handler.
- */
-static inline void restore_interrupts(struct pt_regs *regs)
-{
- if (regs->eflags & IF_MASK)
- local_irq_enable();
-}
-
extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 5/8] Immediate Values - i386 Optimization
2007-09-06 20:02 [patch 0/8] Immediate Values for 2.6.23-rc4-mm1 Mathieu Desnoyers
` (3 preceding siblings ...)
2007-09-06 20:02 ` [patch 4/8] Immediate Values - Move Kprobes i386 restore_interrupt to kdebug.h Mathieu Desnoyers
@ 2007-09-06 20:02 ` Mathieu Desnoyers
2007-09-06 20:02 ` [patch 6/8] Immediate Values - Powerpc Optimization Mathieu Desnoyers
` (2 subsequent siblings)
7 siblings, 0 replies; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-06 20:02 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Christoph Hellwig
[-- Attachment #1: immediate-values-i386-optimization.patch --]
[-- Type: text/plain, Size: 17977 bytes --]
i386 optimization of the immediate values which uses a movl with code patching
to set/unset the value used to populate the register used as variable source.
Changelog:
- Use text_poke_early with cr0 WP save/restore to patch the bypass. We are doing
non atomic writes to a code region only touched by us (nobody can execute it
since we are protected by the immediate_mutex).
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Reviewed-by: Andi Kleen <ak@muc.de>
Reviewed-by: "H. Peter Anvin" <hpa@zytor.com>
Reviewed-by: Chuck Ebbert <cebbert@redhat.com>
CC: Christoph Hellwig <hch@infradead.org>
---
arch/i386/kernel/Makefile | 1
arch/i386/kernel/immediate.c | 307 +++++++++++++++++++++++++++++++++++++++++++
arch/i386/kernel/traps.c | 8 -
include/asm-i386/immediate.h | 137 +++++++++++++++++++
4 files changed, 449 insertions(+), 4 deletions(-)
Index: linux-2.6-lttng/include/asm-i386/immediate.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-i386/immediate.h 2007-09-04 14:05:59.000000000 -0400
@@ -0,0 +1,137 @@
+#ifndef _ASM_I386_IMMEDIATE_H
+#define _ASM_I386_IMMEDIATE_H
+
+/*
+ * Immediate values. i386 architecture optimizations.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+struct module;
+
+struct __immediate {
+ long var; /* Pointer to the identifier variable of the
+ * immediate value
+ */
+ long immediate; /*
+ * Pointer to the memory location of the
+ * immediate value within the instruction.
+ */
+ long size; /* Type size. */
+};
+
+/**
+ * immediate_read - read immediate variable
+ * @var: pointer of type immediate_*_t
+ *
+ * Reads the value of @var.
+ * Optimized version of the immediate.
+ * Do not use in __init and __exit functions. Use _immediate_read() instead.
+ * Makes sure the 2 and 4 bytes update will be atomic by aligning the immediate
+ * value. 2 bytes (short) uses a 66H prefix. If size is bigger than 4 bytes,
+ * fall back on a memory read.
+ */
+#define immediate_read(var) \
+ ({ \
+ __typeof__((var)->value) value; \
+ switch (sizeof(value)) { \
+ case 1: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ ".long %1, (0f)+1, 1;\n\t" \
+ ".previous;\n\t" \
+ "0:\n\t" \
+ "mov %2,%0;\n\t" \
+ : "=r" (value) \
+ : "m" ((var)->value), \
+ "i" (0)); \
+ break; \
+ case 2: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ ".long %1, (0f)+2, 2;\n\t" \
+ ".previous;\n\t" \
+ "1:\n\t" \
+ ".align 2;\n\t" \
+ "0:\n\t" \
+ "mov %2,%0;\n\t" \
+ : "=r" (value) \
+ : "m" ((var)->value), \
+ "i" (0)); \
+ break; \
+ case 4: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ ".long %1, (0f)+1, 4;\n\t" \
+ ".previous;\n\t" \
+ "1:\n\t" \
+ ".org (1b)+(3-((1b)%%4)), 0x90;\n\t" \
+ "0:\n\t" \
+ "mov %2,%0;\n\t" \
+ : "=r" (value) \
+ : "m" ((var)->value), \
+ "i" (0)); \
+ break; \
+ default:value = (var)->value; \
+ break; \
+ }; \
+ value; \
+ })
+
+
+/**
+ * immediate_set - set immediate variable (with locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var, taking the module_mutex if required by
+ * the architecture.
+ */
+#define immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(1);
+
+/**
+ * _immediate_set - set immediate variable (without locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Must be called with module_mutex held.
+ */
+#define _immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(0);
+
+/**
+ * immediate_set_early - set immediate variable at early boot
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Should be used for early boot updates.
+ */
+#define immediate_set_early(var, i) \
+ (var)->value = (i); \
+ immediate_update_early();
+
+/**
+ * immediate_if - if () statement depending on an immediate value
+ * @var: pointer of type immediate_*_t
+ *
+ * Use as an if () statement depending on an immediate value.
+ * Do not use in __init and __exit functions. Use _immediate_if() instead.
+ * Branch depending on an immediate value. Could eventually be optimized further
+ * by improving gcc to give the ability to patch a jump instruction instead of
+ * the value it depends on.
+ */
+#define immediate_if(var) if (unlikely(immediate_read(var)))
+
+/*
+ * Internal update functions.
+ */
+extern void immediate_update(int lock);
+extern void module_immediate_setup(struct module *mod);
+extern void immediate_update_early(void);
+extern int arch_immediate_update(const struct __immediate *immediate);
+extern void arch_immediate_update_early(const struct __immediate *immediate);
+
+#endif /* _ASM_I386_IMMEDIATE_H */
Index: linux-2.6-lttng/arch/i386/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/arch/i386/kernel/Makefile 2007-09-04 14:04:48.000000000 -0400
+++ linux-2.6-lttng/arch/i386/kernel/Makefile 2007-09-04 14:05:59.000000000 -0400
@@ -37,6 +37,7 @@ obj-$(CONFIG_MODULES) += module.o
obj-y += sysenter.o vsyscall.o
obj-$(CONFIG_ACPI_SRAT) += srat.o
obj-$(CONFIG_EFI) += efi.o efi_stub.o
+obj-$(CONFIG_IMMEDIATE) += immediate.o
obj-$(CONFIG_DOUBLEFAULT) += doublefault.o
obj-$(CONFIG_KGDB) += kgdb.o kgdb-jmp.o
obj-$(CONFIG_VM86) += vm86.o
Index: linux-2.6-lttng/arch/i386/kernel/immediate.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/arch/i386/kernel/immediate.c 2007-09-04 14:38:12.000000000 -0400
@@ -0,0 +1,307 @@
+/*
+ * Immediate Value - i386 architecture specific code.
+ *
+ * Rationale
+ *
+ * Required because of :
+ * - Erratum 49 fix for Intel PIII.
+ * - Still present on newer processors : Intel Core 2 Duo Processor for Intel
+ * Centrino Duo Processor Technology Specification Update, AH33.
+ * Unsynchronized Cross-Modifying Code Operations Can Cause Unexpected
+ * Instruction Execution Results.
+ *
+ * Permits immediate value modification by XMC with correct serialization.
+ *
+ * Reentrant for NMI and trap handler instrumentation. Permits XMC to a
+ * location that has preemption enabled because it involves no temporary or
+ * reused data structure.
+ *
+ * Quoting Richard J Moore, source of the information motivating this
+ * implementation which differs from the one proposed by Intel which is not
+ * suitable for kernel context (does not support NMI and would require disabling
+ * interrupts on every CPU for a long period) :
+ *
+ * "There is another issue to consider when looking into using probes other
+ * then int3:
+ *
+ * Intel erratum 54 - Unsynchronized Cross-modifying code - refers to the
+ * practice of modifying code on one processor where another has prefetched
+ * the unmodified version of the code. Intel states that unpredictable general
+ * protection faults may result if a synchronizing instruction (iret, int,
+ * int3, cpuid, etc ) is not executed on the second processor before it
+ * executes the pre-fetched out-of-date copy of the instruction.
+ *
+ * When we became aware of this I had a long discussion with Intel's
+ * microarchitecture guys. It turns out that the reason for this erratum
+ * (which incidentally Intel does not intend to fix) is because the trace
+ * cache - the stream of micorops resulting from instruction interpretation -
+ * cannot guaranteed to be valid. Reading between the lines I assume this
+ * issue arises because of optimization done in the trace cache, where it is
+ * no longer possible to identify the original instruction boundaries. If the
+ * CPU discoverers that the trace cache has been invalidated because of
+ * unsynchronized cross-modification then instruction execution will be
+ * aborted with a GPF. Further discussion with Intel revealed that replacing
+ * the first opcode byte with an int3 would not be subject to this erratum.
+ *
+ * So, is cmpxchg reliable? One has to guarantee more than mere atomicity."
+ *
+ * Overall design
+ *
+ * The algorithm proposed by Intel applies not so well in kernel context: it
+ * would imply disabling interrupts and looping on every CPUs while modifying
+ * the code and would not support instrumentation of code called from interrupt
+ * sources that cannot be disabled.
+ *
+ * Therefore, we use a different algorithm to respect Intel's erratum (see the
+ * quoted discussion above). We make sure that no CPU sees an out-of-date copy
+ * of a pre-fetched instruction by 1 - using a breakpoint, which skips the
+ * instruction that is going to be modified, 2 - issuing an IPI to every CPU to
+ * execute a sync_core(), to make sure that even when the breakpoint is removed,
+ * no cpu could possibly still have the out-of-date copy of the instruction,
+ * modify the now unused 2nd byte of the instruction, and then put back the
+ * original 1st byte of the instruction.
+ *
+ * It has exactly the same intent as the algorithm proposed by Intel, but
+ * it has less side-effects, scales better and supports NMI, SMI and MCE.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ */
+
+#include <linux/notifier.h>
+#include <linux/preempt.h>
+#include <linux/smp.h>
+#include <linux/notifier.h>
+#include <linux/module.h>
+#include <linux/immediate.h>
+#include <linux/kdebug.h>
+#include <linux/rcupdate.h>
+#include <linux/kprobes.h>
+
+#include <asm/cacheflush.h>
+
+#define BREAKPOINT_INSTRUCTION 0xcc
+#define BREAKPOINT_INS_LEN 1
+#define NOP_INSTRUCTION 0x90
+#define NR_NOPS 8
+
+static long target_after_int3; /* EIP of the target after the int3 */
+static long bypass_eip; /* EIP of the bypass. */
+static long bypass_after_int3; /* EIP after the end-of-bypass int3 */
+static long after_immediate; /*
+ * EIP where to resume after the
+ * single-stepping.
+ */
+
+/*
+ * Size of the movl instruction (without the immediate value) in bytes.
+ * The 2 bytes load immediate has a 66H prefix, which makes the opcode 2 bytes
+ * wide.
+ */
+static inline size_t _immediate_get_insn_size(long size)
+{
+ switch (size) {
+ case 1: return 1;
+ case 2: return 2;
+ case 4: return 1;
+ default: BUG();
+ };
+}
+
+/*
+ * Internal bypass used during value update. The bypass is skipped by the
+ * function in which it is inserted.
+ * No need to be aligned because we exclude readers from the site during
+ * update.
+ * Layout is:
+ * nop nop nop nop nop nop nop nop int3
+ * The nops are the target replaced by the instruction to single-step.
+ */
+static inline void _immediate_bypass(long *bypassaddr, long *breaknextaddr)
+{
+ asm volatile ( "jmp 2f;\n\t"
+ "0:\n\t"
+ ".space 8, 0x90;\n\t"
+ "1:\n\t"
+ "int3;\n\t"
+ "2:\n\t"
+ "movl $(0b),%0;\n\t"
+ "movl $((1b)+1),%1;\n\t"
+ : "=r" (*bypassaddr),
+ "=r" (*breaknextaddr) : );
+}
+
+static void immediate_synchronize_core(void *info)
+{
+ sync_core(); /* use cpuid to stop speculative execution */
+}
+
+/*
+ * The eip value points right after the breakpoint instruction, in the second
+ * byte of the movl.
+ * Disable preemption in the bypass to make sure no thread will be preempted in
+ * it. We can then use synchronize_sched() to make sure every bypass users have
+ * ended.
+ */
+static int immediate_notifier(struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ enum die_val die_val = (enum die_val) val;
+ struct die_args *args = data;
+
+ if (!args->regs || user_mode_vm(args->regs))
+ return NOTIFY_DONE;
+
+ if (die_val == DIE_INT3) {
+ if (args->regs->eip == target_after_int3) {
+ preempt_disable();
+ args->regs->eip = bypass_eip;
+ return NOTIFY_STOP;
+ } else if (args->regs->eip == bypass_after_int3) {
+ args->regs->eip = after_immediate;
+ preempt_enable();
+ return NOTIFY_STOP;
+ }
+ }
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block immediate_notify = {
+ .notifier_call = immediate_notifier,
+ .priority = 0x7fffffff, /* we need to be notified first */
+};
+
+
+/**
+ * arch_immediate_update - update one immediate value
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value. Must be called with immediate_mutex held.
+ */
+__kprobes int arch_immediate_update(const struct __immediate *immediate)
+{
+ int ret;
+ size_t insn_size = _immediate_get_insn_size(immediate->size);
+ long insn = immediate->immediate - insn_size;
+ unsigned long cr0;
+
+#ifdef CONFIG_KPROBES
+ /*
+ * Fail if a kprobe has been set on this instruction.
+ * (TODO: we could eventually do better and modify all the (possibly
+ * nested) kprobes for this site if kprobes had an API for this.
+ */
+ if (unlikely(*(unsigned char*)insn == BREAKPOINT_INSTRUCTION)) {
+ printk(KERN_WARNING "Immediate value in conflict with kprobe. "
+ "Variable at %p, "
+ "instruction at %p, size %lu\n",
+ (void*)immediate->immediate,
+ (void*)immediate->var, immediate->size);
+ return -EBUSY;
+ }
+#endif
+
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return 0;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return 0;
+ break;
+ case 4: if (*(uint32_t*)immediate->immediate
+ == *(uint32_t*)immediate->var)
+ return 0;
+ break;
+ default:return -EINVAL;
+ }
+
+ _immediate_bypass(&bypass_eip, &bypass_after_int3);
+
+ after_immediate = immediate->immediate + immediate->size;
+
+ /*
+ * Using the _early variants because nobody is executing the
+ * bypass code while we patch it. It is protected by the
+ * immediate_mutex. Since we modify the instructions non atomically (for
+ * nops), we have to use the _early variant.
+ * We must however deal with the WP flag in cr0 by ourself.
+ */
+ kernel_wp_save(cr0);
+ text_poke_early((void*)bypass_eip, (void*)insn,
+ insn_size + immediate->size);
+ /*
+ * Fill the rest with nops.
+ */
+ text_set_early((void*)(bypass_eip + insn_size + immediate->size),
+ NOP_INSTRUCTION,
+ NR_NOPS - immediate->size - insn_size);
+ kernel_wp_restore(cr0);
+
+ target_after_int3 = insn + BREAKPOINT_INS_LEN;
+ /* register_die_notifier has memory barriers */
+ register_die_notifier(&immediate_notify);
+ /* The breakpoint will single-step the bypass */
+ text_set((void*)insn, BREAKPOINT_INSTRUCTION, 1);
+ wmb();
+ /*
+ * Execute serializing instruction on each CPU.
+ * Acts as a memory barrier.
+ */
+ ret = on_each_cpu(immediate_synchronize_core, NULL, 1, 1);
+ BUG_ON(ret != 0);
+
+ text_poke((void*)(insn + insn_size), (void*)immediate->var,
+ immediate->size);
+ wmb();
+ text_set((void*)insn, *(char*)bypass_eip, 1);
+ /*
+ * Wait for all int3 handlers to end
+ * (interrupts are disabled in int3).
+ * This CPU is clearly not in a int3 handler,
+ * because int3 handler is not preemptible and
+ * there cannot be any more int3 handler called
+ * for this site, because we placed the original
+ * instruction back.
+ * synchronize_sched has memory barriers.
+ */
+ synchronize_sched();
+ unregister_die_notifier(&immediate_notify);
+ /* unregister_die_notifier has memory barriers */
+ return 0;
+}
+
+/**
+ * arch_immediate_update_early - update one immediate value at boot time
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value at boot time.
+ */
+void __init arch_immediate_update_early(const struct __immediate *immediate)
+{
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return;
+ break;
+ case 4: if (*(uint32_t*)immediate->immediate
+ == *(uint32_t*)immediate->var)
+ return;
+ break;
+ default:return;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+}
Index: linux-2.6-lttng/arch/i386/kernel/traps.c
===================================================================
--- linux-2.6-lttng.orig/arch/i386/kernel/traps.c 2007-09-04 14:04:48.000000000 -0400
+++ linux-2.6-lttng/arch/i386/kernel/traps.c 2007-09-04 14:45:22.000000000 -0400
@@ -601,7 +601,7 @@ fastcall void do_##name(struct pt_regs *
}
DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip)
-#ifndef CONFIG_KPROBES
+#if !defined(CONFIG_KPROBES) && !defined(CONFIG_IMMEDIATE)
DO_VM86_ERROR( 3, SIGTRAP, "int3", int3)
#endif
DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow)
@@ -843,14 +843,14 @@ void restart_nmi(void)
acpi_nmi_enable();
}
-#ifdef CONFIG_KPROBES
+#if defined(CONFIG_KPROBES) || defined(CONFIG_IMMEDIATE)
fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
{
if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
== NOTIFY_STOP)
return;
- /* This is an interrupt gate, because kprobes wants interrupts
- disabled. Normal trap handlers don't. */
+ /* This is an interrupt gate, because kprobes and immediate values wants
+ * interrupts disabled. Normal trap handlers don't. */
restore_interrupts(regs);
do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
}
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 6/8] Immediate Values - Powerpc Optimization
2007-09-06 20:02 [patch 0/8] Immediate Values for 2.6.23-rc4-mm1 Mathieu Desnoyers
` (4 preceding siblings ...)
2007-09-06 20:02 ` [patch 5/8] Immediate Values - i386 Optimization Mathieu Desnoyers
@ 2007-09-06 20:02 ` Mathieu Desnoyers
2007-09-06 20:02 ` [patch 7/8] Immediate Values Powerpc Optimization Fix Mathieu Desnoyers
2007-09-06 20:02 ` [patch 8/8] Immediate Values - Documentation Mathieu Desnoyers
7 siblings, 0 replies; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-06 20:02 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Christoph Hellwig
[-- Attachment #1: immediate-values-powerpc-optimization.patch --]
[-- Type: text/plain, Size: 8387 bytes --]
PowerPC optimization of the immediate values which uses a li instruction,
patched with an immediate value.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Christoph Hellwig <hch@infradead.org>
---
arch/powerpc/kernel/Makefile | 1
arch/powerpc/kernel/immediate.c | 103 ++++++++++++++++++++++++++++++++
include/asm-powerpc/immediate.h | 127 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 231 insertions(+)
Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2007-09-04 12:12:12.000000000 -0400
@@ -0,0 +1,127 @@
+#ifndef _ASM_POWERPC_IMMEDIATE_H
+#define _ASM_POWERPC_IMMEDIATE_H
+
+/*
+ * Immediate values. PowerPC architecture optimizations.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <asm/asm-compat.h>
+
+struct module;
+
+struct __immediate {
+ long var; /* Identifier variable of the immediate value */
+ long immediate; /*
+ * Pointer to the memory location that holds
+ * the immediate value within the load immediate
+ * instruction.
+ */
+ long size; /* Type size. */
+};
+
+/**
+ * immediate_read - read immediate variable
+ * @var: pointer of type immediate_*_t
+ *
+ * Reads the value of @var.
+ * Optimized version of the immediate.
+ * Do not use in __init and __exit functions. Use _immediate_read() instead.
+ * Makes sure the 2 bytes update will be atomic by aligning the immediate
+ * value. Use a normal memory read for the 4 bytes immediate because there is no
+ * way to atomically update it without using a seqlock read side, which would
+ * cost more in term of total i-cache and d-cache space than a simple memory
+ * read.
+ */
+#define immediate_read(var) \
+ ({ \
+ __typeof__((var)->value) value; \
+ switch (sizeof(value)) { \
+ case 1: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+3), 1;\n\t" \
+ ".previous;\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ case 2: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+2), 2;\n\t" \
+ ".previous;\n\t" \
+ ".align 2\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ default: \
+ value = (var)->value; \
+ break; \
+ }; \
+ value; \
+ })
+
+/**
+ * immediate_set - set immediate variable (with locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var, taking the module_mutex if required by
+ * the architecture.
+ */
+#define immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(1);
+
+/**
+ * _immediate_set - set immediate variable (without locking)
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Must be called with module_mutex held.
+ */
+#define _immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(0);
+
+/**
+ * immediate_set_early - set immediate variable at early boot
+ * @var: pointer of type immediate_*_t
+ * @i: required value
+ *
+ * Sets the value of @var. Should be used for early boot updates.
+ */
+#define immediate_set_early(var, i) \
+ (var)->value = (i); \
+ immediate_update_early();
+
+/**
+ * immediate_if - if () statement depending on an immediate value
+ * @var: pointer of type immediate_*_t
+ *
+ * Use as an if () statement depending on an immediate value.
+ * Do not use in __init and __exit functions. Use _immediate_if() instead.
+ * Branch depending on an immediate value. Could eventually be optimized further
+ * by improving gcc to give the ability to patch a jump instruction instead of
+ * the value it depends on.
+ */
+#define immediate_if(var) if (unlikely(immediate_read(var)))
+
+/*
+ * Internal update functions.
+ */
+extern void immediate_update(int lock);
+extern void module_immediate_setup(struct module *mod);
+extern void immediate_update_early(void);
+extern int arch_immediate_update(const struct __immediate *immediate);
+extern void arch_immediate_update_early(const struct __immediate *immediate);
+
+#endif /* _ASM_POWERPC_IMMEDIATE_H */
Index: linux-2.6-lttng/arch/powerpc/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/arch/powerpc/kernel/Makefile 2007-09-04 11:52:37.000000000 -0400
+++ linux-2.6-lttng/arch/powerpc/kernel/Makefile 2007-09-04 12:12:12.000000000 -0400
@@ -97,3 +97,4 @@ obj-$(CONFIG_PPC64) += $(obj64-y)
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_PPC64) += entry_64.o
+obj-$(CONFIG_IMMEDIATE) += immediate.o
Index: linux-2.6-lttng/arch/powerpc/kernel/immediate.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/arch/powerpc/kernel/immediate.c 2007-09-04 12:12:12.000000000 -0400
@@ -0,0 +1,103 @@
+/*
+ * Powerpc optimized immediate values enabling/disabling.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ */
+
+#include <linux/module.h>
+#include <linux/immediate.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+#define LI_OPCODE_LEN 2
+
+/**
+ * arch_immediate_update - update one immediate value
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value. Must be called with immediate_mutex held.
+ */
+int arch_immediate_update(const struct __immediate *immediate)
+{
+#ifdef CONFIG_KPROBES
+ kprobe_opcode_t *insn;
+ /*
+ * Fail if a kprobe has been set on this instruction.
+ * (TODO: we could eventually do better and modify all the (possibly
+ * nested) kprobes for this site if kprobes had an API for this.
+ */
+ switch (immediate->size) {
+ case 1: /* The uint8_t points to the 3rd byte of the
+ * instruction */
+ insn = (void*)(immediate->immediate - 1 - LI_OPCODE_LEN);
+ break;
+ case 2: insn = (void*)(immediate->immediate - LI_OPCODE_LEN);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (unlikely(*insn == BREAKPOINT_INSTRUCTION)) {
+ printk(KERN_WARNING "Immediate value in conflict with kprobe. "
+ "Variable at %p, "
+ "instruction at %p, size %lu\n",
+ (void*)immediate->immediate,
+ (void*)immediate->var, immediate->size);
+ return -EBUSY;
+ }
+#endif
+
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return 0;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return 0;
+ break;
+ default:return -EINVAL;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+ return 0;
+}
+
+/**
+ * arch_immediate_update_early - update one immediate value at boot time
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value at boot time.
+ * We can use flush_icache_range, since the cpu identification has been done in
+ * the early_init stage.
+ */
+void __init arch_immediate_update_early(const struct __immediate *immediate)
+{
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return;
+ break;
+ default:return;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+}
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 7/8] Immediate Values Powerpc Optimization Fix
2007-09-06 20:02 [patch 0/8] Immediate Values for 2.6.23-rc4-mm1 Mathieu Desnoyers
` (5 preceding siblings ...)
2007-09-06 20:02 ` [patch 6/8] Immediate Values - Powerpc Optimization Mathieu Desnoyers
@ 2007-09-06 20:02 ` Mathieu Desnoyers
2007-09-06 20:02 ` [patch 8/8] Immediate Values - Documentation Mathieu Desnoyers
7 siblings, 0 replies; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-06 20:02 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Christoph Hellwig
[-- Attachment #1: immediate-values-powerpc-optimization-fix.patch --]
[-- Type: text/plain, Size: 1371 bytes --]
Fix a bad call to flush_icache_range(). The second parameter is the end address
of the range, not the length.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Christoph Hellwig <hch@infradead.org>
---
arch/powerpc/kernel/immediate.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
Index: linux-2.6-lttng/arch/powerpc/kernel/immediate.c
===================================================================
--- linux-2.6-lttng.orig/arch/powerpc/kernel/immediate.c 2007-08-28 16:36:10.000000000 -0400
+++ linux-2.6-lttng/arch/powerpc/kernel/immediate.c 2007-08-28 16:36:40.000000000 -0400
@@ -67,7 +67,7 @@ int arch_immediate_update(const struct _
memcpy((void*)immediate->immediate, (void*)immediate->var,
immediate->size);
flush_icache_range((unsigned long)immediate->immediate,
- immediate->size);
+ (unsigned long)immediate->immediate + immediate->size);
return 0;
}
@@ -99,5 +99,5 @@ void __init arch_immediate_update_early(
memcpy((void*)immediate->immediate, (void*)immediate->var,
immediate->size);
flush_icache_range((unsigned long)immediate->immediate,
- immediate->size);
+ (unsigned long)immediate->immediate + immediate->size);
}
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 8/8] Immediate Values - Documentation
2007-09-06 20:02 [patch 0/8] Immediate Values for 2.6.23-rc4-mm1 Mathieu Desnoyers
` (6 preceding siblings ...)
2007-09-06 20:02 ` [patch 7/8] Immediate Values Powerpc Optimization Fix Mathieu Desnoyers
@ 2007-09-06 20:02 ` Mathieu Desnoyers
2007-09-06 21:20 ` Randy Dunlap
7 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-06 20:02 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: immediate-values-documentation.patch --]
[-- Type: text/plain, Size: 9274 bytes --]
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
---
Documentation/immediate.txt | 232 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 232 insertions(+)
Index: linux-2.6-lttng/Documentation/immediate.txt
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/Documentation/immediate.txt 2007-08-20 15:55:26.000000000 -0400
@@ -0,0 +1,232 @@
+ Using the Immediate Values
+
+ Mathieu Desnoyers
+
+
+This document introduces Immediate Values and their use.
+
+* Purpose of immediate values
+
+An immediate value is used to compile into the kernel variables that sits within
+the instruction stream. They are meant to be rarely updated but read often.
+Using immediate values for these variables will save cache lines.
+
+This infrastructure is specialized in supporting dynamic patching of the values
+in the instruction stream when multiple CPUs are running without disturbing the
+normal system behavior.
+
+Compiling code meant to be rarely enabled at runtime can be done using
+immediate_if() as condition surrounding the code.
+
+* Usage
+
+In order to use the macro immediate, you should include linux/immediate.h.
+
+#include <linux/immediate.h>
+
+immediate_char_t this_immediate;
+EXPORT_SYMBOL(this_immediate);
+
+
+Add, in your code :
+
+Use immediate_set(&this_immediate) to set the immediate value.
+
+Use immediate_read(&this_immediate) to read the immediate value.
+
+The immediate mechanism supports inserting multiple instances of the same
+immediate. Immediate values can be put in inline functions, inlined static
+functions, and unrolled loops.
+
+If you have to read the immediate values from a function declared as __init or
+__exit, you should explicitly use _immediate_read(), which will fall back on a
+global variable read. Failing to do so will leave a reference to the __init
+section after it is freed (it would generate a modpost warning).
+
+The prefered idiom to dynamically enable compiled-in code is to use
+immediate_if (&this_immediate), which may eventually use gcc improvements to
+provide a jump instruction patching based condition instead of a immediate value
+feeding a conditional jump. You should use _immediate_if () instead of
+immediate_if () in functions marked __init or __exit.
+
+immediate_set_early() should be used only at early kernel boot time, before SMP
+is activated.
+
+If you need to declare your own immediate types (for instance, a pointer to
+struct task_struct), use:
+
+DEFINE_IMMEDIATE_TYPE(struct task_struct*, immediate_task_struct_ptr_t);
+
+and declare your variable with:
+immediate_task_struct_ptr_t myptr;
+
+You can choose to set an initial static value to the immediate by using, for
+instance:
+
+immediate_task_struct_ptr_t myptr = IMMEDIATE_INIT(10);
+
+
+* Optimization for a given architecture
+
+One can implement optimized immediate values for a given architecture by
+replacing asm-$ARCH/immediate.h.
+
+* Performance improvement
+
+* Memory hit for a data-based branch
+
+Here are the results on a 3GHz Pentium 4:
+
+number of tests : 100
+number of branches per test : 100000
+memory hit cycles per iteration (mean) : 636.611
+L1 cache hit cycles per iteration (mean) : 89.6413
+instruction stream based test, cycles per iteration (mean) : 85.3438
+Just getting the pointer from a modulo on a pseudo-random value, doing
+ noting with it, cycles per iteration (mean) : 77.5044
+
+So:
+Base case: 77.50 cycles
+instruction stream based test: +7.8394 cycles
+L1 cache hit based test: +12.1369 cycles
+Memory load based test: +559.1066 cycles
+
+So let's say we have a ping flood coming at
+(14014 packets transmitted, 14014 received, 0% packet loss, time 1826ms)
+7674 packets per second. If we put 2 markers for irq entry/exit, it
+brings us to 15348 markers sites executed per second.
+
+(15348 exec/s) * (559 cycles/exec) / (3G cycles/s) = 0.0029
+We therefore have a 0.29% slowdown just on this case.
+
+Compared to this, the instruction stream based test will cause a
+slowdown of:
+
+(15348 exec/s) * (7.84 cycles/exec) / (3G cycles/s) = 0.00004
+For a 0.004% slowdown.
+
+If we plan to use this for memory allocation, spinlock, and all sort of
+very high event rate tracing, we can assume it will execute 10 to 100
+times more sites per second, which brings us to 0.4% slowdown with the
+instruction stream based test compared to 29% slowdown with the memory
+load based test on a system with high memory pressure.
+
+
+
+* Markers impact under heavy memory load
+
+Running a kernel with my LTTng instrumentation set, in a test that
+generates memory pressure (from userspace) by trashing L1 and L2 caches
+between calls to getppid() (note: syscall_trace is active and calls
+a marker upon syscall entry and syscall exit; markers are disarmed).
+This test is done in user-space, so there are some delays due to IRQs
+coming and to the scheduler. (UP 2.6.22-rc6-mm1 kernel, task with -20
+nice level)
+
+My first set of results : Linear cache trashing, turned out not to be
+very interesting, because it seems like the linearity of the memset on a
+full array is somehow detected and it does not "really" trash the
+caches.
+
+Now the most interesting result : Random walk L1 and L2 trashing
+surrounding a getppid() call.
+
+- Markers compiled out (but syscall_trace execution forced)
+number of tests : 10000
+No memory pressure
+Reading timestamps takes 108.033 cycles
+getppid : 1681.4 cycles
+With memory pressure
+Reading timestamps takes 102.938 cycles
+getppid : 15691.6 cycles
+
+
+- With the immediate values based markers:
+number of tests : 10000
+No memory pressure
+Reading timestamps takes 108.006 cycles
+getppid : 1681.84 cycles
+With memory pressure
+Reading timestamps takes 100.291 cycles
+getppid : 11793 cycles
+
+
+- With global variables based markers:
+number of tests : 10000
+No memory pressure
+Reading timestamps takes 107.999 cycles
+getppid : 1669.06 cycles
+With memory pressure
+Reading timestamps takes 102.839 cycles
+getppid : 12535 cycles
+
+The result is quite interesting in that the kernel is slower without
+markers than with markers. I explain it by the fact that the data
+accessed is not layed out in the same manner in the cache lines when the
+markers are compiled in or out. It seems that it aligns the function's
+data better to compile-in the markers in this case.
+
+But since the interesting comparison is between the immediate values and
+global variables based markers, and because they share the same memory
+layout, except for the movl being replaced by a movz, we see that the
+global variable based markers (2 markers) adds 742 cycles to each system
+call (syscall entry and exit are traced and memory locations for both
+global variables lie on the same cache line).
+
+
+- Test redone with less iterations, but with error estimates
+
+10 runs of 100 iterations each: Tests done on a 3GHz P4. Here I run getppid with
+syscall trace inactive, comparing memory pressure and w/o memory pressure.
+(sorry, my system is not setup to execute syscall_trace this time, but it will
+make the point anyway).
+
+No memory pressure
+Reading timestamps: 150.92 cycles, std dev. 1.01 cycles
+getppid: 1462.09 cycles, std dev. 18.87 cycles
+
+With memory pressure
+Reading timestamps: 578.22 cycles, std dev. 269.51 cycles
+getppid: 17113.33 cycles, std dev. 1655.92 cycles
+
+
+Now for memory read timing: (10 runs, branches per test: 100000)
+Memory read based branch:
+ 644.09 cycles, std dev. 11.39 cycles
+L1 cache hit based branch:
+ 88.16 cycles, std dev. 1.35 cycles
+
+
+So, now that we have the raw results, let's calculate:
+
+Memory read:
+644.09±11.39 - 88.16±1.35 = 555.93±11.46 cycles
+
+Getppid without memory pressure:
+1462.09±18.87 - 150.92±1.01 = 1311.17±18.90 cycles
+
+Getppid with memory pressure:
+17113.33±1655.92 - 578.22±269.51 = 16535.11±1677.71 cycles
+
+Therefore, if we add 2 markers not based on immediate values to the getppid
+code, which would add 2 memory reads, we would add
+2 * 555.93±12.74 = 1111.86±25.48 cycles
+
+Therefore,
+
+1111.86±25.48 / 16535.11±1677.71 = 0.0672
+ relative error: sqrt(((25.48/1111.86)^2)+((1677.71/16535.11)^2))
+ = 0.1040
+ absolute error: 0.1040 * 0.0672 = 0.0070
+
+Therefore: 0.0672±0.0070 * 100% = 6.72±0.70 %
+
+We can therefore affirm that adding 2 markers to getppid, on a system with high
+memory pressure, would have a performance hit of at least 6.0% on the system
+call time, all within the uncertainty limits of these tests. The same applies to
+other kernel code paths. The smaller those code paths are, the highest the
+impact ratio will be.
+
+Therefore, not only is it interesting to use the immediate values to dynamically
+activate dormant code such as the markers, but I think it should also be
+considered as a replacement for many of the "read mostly" static variables.
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 8/8] Immediate Values - Documentation
2007-09-06 20:02 ` [patch 8/8] Immediate Values - Documentation Mathieu Desnoyers
@ 2007-09-06 21:20 ` Randy Dunlap
2007-09-07 12:23 ` Mathieu Desnoyers
0 siblings, 1 reply; 36+ messages in thread
From: Randy Dunlap @ 2007-09-06 21:20 UTC (permalink / raw)
To: Mathieu Desnoyers; +Cc: akpm, linux-kernel
On Thu, 06 Sep 2007 16:02:36 -0400 Mathieu Desnoyers wrote:
> Documentation/immediate.txt | 232 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 232 insertions(+)
>
> Index: linux-2.6-lttng/Documentation/immediate.txt
> ===================================================================
> --- /dev/null 1970-01-01 00:00:00.000000000 +0000
> +++ linux-2.6-lttng/Documentation/immediate.txt 2007-08-20 15:55:26.000000000 -0400
> @@ -0,0 +1,232 @@
> + Using the Immediate Values
> +
> + Mathieu Desnoyers
> +
> +
> +This document introduces Immediate Values and their use.
> +
> +* Purpose of immediate values
> +
> +An immediate value is used to compile into the kernel variables that sits within
s/sits/sit/
> +the instruction stream. They are meant to be rarely updated but read often.
> +Using immediate values for these variables will save cache lines.
> +
> +This infrastructure is specialized in supporting dynamic patching of the values
> +in the instruction stream when multiple CPUs are running without disturbing the
> +normal system behavior.
> +
> +Compiling code meant to be rarely enabled at runtime can be done using
> +immediate_if() as condition surrounding the code.
> +
> +* Usage
> +
> +In order to use the macro immediate, you should include linux/immediate.h.
"immediate" macros,
> +#include <linux/immediate.h>
> +
> +immediate_char_t this_immediate;
> +EXPORT_SYMBOL(this_immediate);
> +
> +
> +Add, in your code :
And, (?)
> +Use immediate_set(&this_immediate) to set the immediate value.
> +
> +Use immediate_read(&this_immediate) to read the immediate value.
> +
> +The immediate mechanism supports inserting multiple instances of the same
> +immediate. Immediate values can be put in inline functions, inlined static
> +functions, and unrolled loops.
> +
> +If you have to read the immediate values from a function declared as __init or
> +__exit, you should explicitly use _immediate_read(), which will fall back on a
> +global variable read. Failing to do so will leave a reference to the __init
> +section after it is freed (it would generate a modpost warning).
> +
> +The prefered idiom to dynamically enable compiled-in code is to use
preferred
> +immediate_if (&this_immediate), which may eventually use gcc improvements to
> +provide a jump instruction patching based condition instead of a immediate value
of an
> +feeding a conditional jump. You should use _immediate_if () instead of
> +immediate_if () in functions marked __init or __exit.
> +
> +immediate_set_early() should be used only at early kernel boot time, before SMP
> +is activated.
More explanation of immediate_set_early() would be good, such as
What? Why? How?
> +
> +If you need to declare your own immediate types (for instance, a pointer to
> +struct task_struct), use:
> +
> +DEFINE_IMMEDIATE_TYPE(struct task_struct*, immediate_task_struct_ptr_t);
> +
> +and declare your variable with:
> +immediate_task_struct_ptr_t myptr;
> +
> +You can choose to set an initial static value to the immediate by using, for
> +instance:
> +
> +immediate_task_struct_ptr_t myptr = IMMEDIATE_INIT(10);
> +
> +
> +* Optimization for a given architecture
> +
> +One can implement optimized immediate values for a given architecture by
> +replacing asm-$ARCH/immediate.h.
> +
> +* Performance improvement
> +
> +* Memory hit for a data-based branch
> +
> +Here are the results on a 3GHz Pentium 4:
> +
> +number of tests : 100
> +number of branches per test : 100000
> +memory hit cycles per iteration (mean) : 636.611
> +L1 cache hit cycles per iteration (mean) : 89.6413
> +instruction stream based test, cycles per iteration (mean) : 85.3438
> +Just getting the pointer from a modulo on a pseudo-random value, doing
> + noting with it, cycles per iteration (mean) : 77.5044
nothing
> +
> +So:
> +Base case: 77.50 cycles
> +instruction stream based test: +7.8394 cycles
> +L1 cache hit based test: +12.1369 cycles
> +Memory load based test: +559.1066 cycles
> +
> +So let's say we have a ping flood coming at
> +(14014 packets transmitted, 14014 received, 0% packet loss, time 1826ms)
> +7674 packets per second. If we put 2 markers for irq entry/exit, it
> +brings us to 15348 markers sites executed per second.
> +
> +(15348 exec/s) * (559 cycles/exec) / (3G cycles/s) = 0.0029
> +We therefore have a 0.29% slowdown just on this case.
> +
> +Compared to this, the instruction stream based test will cause a
> +slowdown of:
> +
> +(15348 exec/s) * (7.84 cycles/exec) / (3G cycles/s) = 0.00004
> +For a 0.004% slowdown.
> +
> +If we plan to use this for memory allocation, spinlock, and all sort of
> +very high event rate tracing, we can assume it will execute 10 to 100
> +times more sites per second, which brings us to 0.4% slowdown with the
> +instruction stream based test compared to 29% slowdown with the memory
> +load based test on a system with high memory pressure.
> +
> +
> +
> +* Markers impact under heavy memory load
> +
> +Running a kernel with my LTTng instrumentation set, in a test that
> +generates memory pressure (from userspace) by trashing L1 and L2 caches
> +between calls to getppid() (note: syscall_trace is active and calls
> +a marker upon syscall entry and syscall exit; markers are disarmed).
> +This test is done in user-space, so there are some delays due to IRQs
> +coming and to the scheduler. (UP 2.6.22-rc6-mm1 kernel, task with -20
> +nice level)
> +
> +My first set of results : Linear cache trashing, turned out not to be
> +very interesting, because it seems like the linearity of the memset on a
> +full array is somehow detected and it does not "really" trash the
> +caches.
> +
> +Now the most interesting result : Random walk L1 and L2 trashing
> +surrounding a getppid() call.
> +
> +- Markers compiled out (but syscall_trace execution forced)
> +number of tests : 10000
> +No memory pressure
> +Reading timestamps takes 108.033 cycles
> +getppid : 1681.4 cycles
> +With memory pressure
> +Reading timestamps takes 102.938 cycles
> +getppid : 15691.6 cycles
> +
> +
> +- With the immediate values based markers:
> +number of tests : 10000
> +No memory pressure
> +Reading timestamps takes 108.006 cycles
> +getppid : 1681.84 cycles
> +With memory pressure
> +Reading timestamps takes 100.291 cycles
> +getppid : 11793 cycles
> +
> +
> +- With global variables based markers:
> +number of tests : 10000
> +No memory pressure
> +Reading timestamps takes 107.999 cycles
> +getppid : 1669.06 cycles
> +With memory pressure
> +Reading timestamps takes 102.839 cycles
> +getppid : 12535 cycles
> +
> +The result is quite interesting in that the kernel is slower without
> +markers than with markers. I explain it by the fact that the data
> +accessed is not layed out in the same manner in the cache lines when the
laid out
> +markers are compiled in or out. It seems that it aligns the function's
> +data better to compile-in the markers in this case.
> +
> +But since the interesting comparison is between the immediate values and
> +global variables based markers, and because they share the same memory
> +layout, except for the movl being replaced by a movz, we see that the
> +global variable based markers (2 markers) adds 742 cycles to each system
> +call (syscall entry and exit are traced and memory locations for both
> +global variables lie on the same cache line).
> +
> +
> +- Test redone with less iterations, but with error estimates
> +
> +10 runs of 100 iterations each: Tests done on a 3GHz P4. Here I run getppid with
> +syscall trace inactive, comparing memory pressure and w/o memory pressure.
^ +with (?)
also, spell out "without", please.
> +(sorry, my system is not setup to execute syscall_trace this time, but it will
> +make the point anyway).
> +
> +No memory pressure
> +Reading timestamps: 150.92 cycles, std dev. 1.01 cycles
> +getppid: 1462.09 cycles, std dev. 18.87 cycles
> +
> +With memory pressure
> +Reading timestamps: 578.22 cycles, std dev. 269.51 cycles
> +getppid: 17113.33 cycles, std dev. 1655.92 cycles
> +
> +
> +Now for memory read timing: (10 runs, branches per test: 100000)
> +Memory read based branch:
> + 644.09 cycles, std dev. 11.39 cycles
> +L1 cache hit based branch:
> + 88.16 cycles, std dev. 1.35 cycles
> +
> +
> +So, now that we have the raw results, let's calculate:
> +
> +Memory read:
> +644.09±11.39 - 88.16±1.35 = 555.93±11.46 cycles
What character is this that I cannot read (not displayed properly
by my email client maybe)? <something> after 644.09 and before the
+- symbol, repeated just before all of the +- symbols.
> +Getppid without memory pressure:
> +1462.09±18.87 - 150.92±1.01 = 1311.17±18.90 cycles
> +
> +Getppid with memory pressure:
> +17113.33±1655.92 - 578.22±269.51 = 16535.11±1677.71 cycles
> +
> +Therefore, if we add 2 markers not based on immediate values to the getppid
> +code, which would add 2 memory reads, we would add
> +2 * 555.93±12.74 = 1111.86±25.48 cycles
> +
> +Therefore,
> +
> +1111.86±25.48 / 16535.11±1677.71 = 0.0672
> + relative error: sqrt(((25.48/1111.86)^2)+((1677.71/16535.11)^2))
> + = 0.1040
> + absolute error: 0.1040 * 0.0672 = 0.0070
> +
> +Therefore: 0.0672±0.0070 * 100% = 6.72±0.70 %
> +
> +We can therefore affirm that adding 2 markers to getppid, on a system with high
> +memory pressure, would have a performance hit of at least 6.0% on the system
> +call time, all within the uncertainty limits of these tests. The same applies to
> +other kernel code paths. The smaller those code paths are, the highest the
> +impact ratio will be.
> +
> +Therefore, not only is it interesting to use the immediate values to dynamically
> +activate dormant code such as the markers, but I think it should also be
> +considered as a replacement for many of the "read mostly" static variables.
---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED
2007-09-06 20:02 ` [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED Mathieu Desnoyers
@ 2007-09-07 6:49 ` Andi Kleen
2007-09-07 12:46 ` Mathieu Desnoyers
0 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2007-09-07 6:49 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: akpm, linux-kernel, Adrian Bunk, Andi Kleen, Alexey Dobriyan,
Christoph Hellwig
> +config IMMEDIATE
> + default y if !DISABLE_IMMEDIATE
It's still unclear to me why DISABLE_IMMEDIATE is needed. It would
be better to make it just the default.
> +config DISABLE_IMMEDIATE
> + default y if EMBEDDED
> + bool "Disable immediate values" if EMBEDDED
> + depends on X86_32 || PPC || PPC64
> + help
> + Disable code patching based immediate values for embedded systems. It
> + consumes slightly more memory and requires to modify the instruction
> + stream each time a variable is updated.
> Should really be disabled for
> + embedded systems with read-only text.
There are no such on x86 at least And for other architectures like
PPC it would be better to have a high level CONFIG_READ_ONLY_TEXT
then that does the necessary changes internally. But I'm not sure it's even
supporting r/o text.
-Andi
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 4/8] Immediate Values - Move Kprobes i386 restore_interrupt to kdebug.h
2007-09-06 20:02 ` [patch 4/8] Immediate Values - Move Kprobes i386 restore_interrupt to kdebug.h Mathieu Desnoyers
@ 2007-09-07 10:31 ` Ananth N Mavinakayanahalli
0 siblings, 0 replies; 36+ messages in thread
From: Ananth N Mavinakayanahalli @ 2007-09-07 10:31 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: akpm, linux-kernel, Christoph Hellwig, prasanna,
anil.s.keshavamurthy, davem
On Thu, Sep 06, 2007 at 04:02:32PM -0400, Mathieu Desnoyers wrote:
> Since the breakpoint handler is useful both to kprobes and immediate values, it
> makes sense to make the required restore_interrupt() available through
> asm-i386/kdebug.h.
>
> Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Acked-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
> CC: Christoph Hellwig <hch@infradead.org>
> CC: prasanna@in.ibm.com
> CC: ananth@in.ibm.com
> CC: anil.s.keshavamurthy@intel.com
> CC: davem@davemloft.net
> ---
> include/asm-i386/kdebug.h | 12 ++++++++++++
> include/asm-i386/kprobes.h | 9 ---------
> 2 files changed, 12 insertions(+), 9 deletions(-)
>
> Index: linux-2.6-lttng/include/asm-i386/kdebug.h
> ===================================================================
> --- linux-2.6-lttng.orig/include/asm-i386/kdebug.h 2007-09-04 11:52:37.000000000 -0400
> +++ linux-2.6-lttng/include/asm-i386/kdebug.h 2007-09-04 12:12:06.000000000 -0400
> @@ -6,6 +6,9 @@
> * from x86_64 architecture.
> */
>
> +#include <asm/ptrace.h>
> +#include <asm/system.h>
> +
> struct pt_regs;
>
> /* Grossly misnamed. */
> @@ -25,4 +28,13 @@ enum die_val {
> DIE_PAGE_FAULT_NO_CONTEXT,
> };
>
> +/* trap3/1 are intr gates for kprobes. So, restore the status of IF,
> + * if necessary, before executing the original int3/1 (trap) handler.
> + */
> +static inline void restore_interrupts(struct pt_regs *regs)
> +{
> + if (regs->eflags & IF_MASK)
> + local_irq_enable();
> +}
> +
> #endif
> Index: linux-2.6-lttng/include/asm-i386/kprobes.h
> ===================================================================
> --- linux-2.6-lttng.orig/include/asm-i386/kprobes.h 2007-09-04 11:53:26.000000000 -0400
> +++ linux-2.6-lttng/include/asm-i386/kprobes.h 2007-09-04 12:12:06.000000000 -0400
> @@ -79,15 +79,6 @@ struct kprobe_ctlblk {
> struct prev_kprobe prev_kprobe;
> };
>
> -/* trap3/1 are intr gates for kprobes. So, restore the status of IF,
> - * if necessary, before executing the original int3/1 (trap) handler.
> - */
> -static inline void restore_interrupts(struct pt_regs *regs)
> -{
> - if (regs->eflags & IF_MASK)
> - local_irq_enable();
> -}
> -
> extern int kprobe_exceptions_notify(struct notifier_block *self,
> unsigned long val, void *data);
> extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
>
> --
> Mathieu Desnoyers
> Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
> OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
>
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 8/8] Immediate Values - Documentation
2007-09-06 21:20 ` Randy Dunlap
@ 2007-09-07 12:23 ` Mathieu Desnoyers
2007-09-07 14:24 ` Randy Dunlap
0 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-07 12:23 UTC (permalink / raw)
To: Randy Dunlap; +Cc: akpm, linux-kernel
* Randy Dunlap (randy.dunlap@oracle.com) wrote:
[fixing the rest of the comments in the patch]
> What character is this that I cannot read (not displayed properly
> by my email client maybe)? <something> after 644.09 and before the
> +- symbol, repeated just before all of the +- symbols.
>
This file seems to be UTF-8 and the mail has been send by quilt. Is it
ok or should I change this to +/- just to make sure ?
>
> > +Getppid without memory pressure:
> > +1462.09±18.87 - 150.92±1.01 = 1311.17±18.90 cycles
> > +
> > +Getppid with memory pressure:
> > +17113.33±1655.92 - 578.22±269.51 = 16535.11±1677.71 cycles
> > +
> > +Therefore, if we add 2 markers not based on immediate values to the getppid
> > +code, which would add 2 memory reads, we would add
> > +2 * 555.93±12.74 = 1111.86±25.48 cycles
> > +
> > +Therefore,
> > +
> > +1111.86±25.48 / 16535.11±1677.71 = 0.0672
> > + relative error: sqrt(((25.48/1111.86)^2)+((1677.71/16535.11)^2))
> > + = 0.1040
> > + absolute error: 0.1040 * 0.0672 = 0.0070
> > +
> > +Therefore: 0.0672±0.0070 * 100% = 6.72±0.70 %
> > +
> > +We can therefore affirm that adding 2 markers to getppid, on a system with high
> > +memory pressure, would have a performance hit of at least 6.0% on the system
> > +call time, all within the uncertainty limits of these tests. The same applies to
> > +other kernel code paths. The smaller those code paths are, the highest the
> > +impact ratio will be.
> > +
> > +Therefore, not only is it interesting to use the immediate values to dynamically
> > +activate dormant code such as the markers, but I think it should also be
> > +considered as a replacement for many of the "read mostly" static variables.
>
>
> ---
> ~Randy
> *** Remember to use Documentation/SubmitChecklist when testing your code ***
>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED
2007-09-07 6:49 ` Andi Kleen
@ 2007-09-07 12:46 ` Mathieu Desnoyers
2007-09-07 22:39 ` Andi Kleen
0 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-07 12:46 UTC (permalink / raw)
To: Andi Kleen
Cc: akpm, linux-kernel, Adrian Bunk, Alexey Dobriyan,
Christoph Hellwig
* Andi Kleen (andi@firstfloor.org) wrote:
> > +config IMMEDIATE
> > + default y if !DISABLE_IMMEDIATE
>
> It's still unclear to me why DISABLE_IMMEDIATE is needed. It would
> be better to make it just the default.
>
It is actually the default on any non embedded configuration. Do you
think we should make it default to on on embedded configs too ?
> > +config DISABLE_IMMEDIATE
> > + default y if EMBEDDED
> > + bool "Disable immediate values" if EMBEDDED
> > + depends on X86_32 || PPC || PPC64
> > + help
> > + Disable code patching based immediate values for embedded systems. It
> > + consumes slightly more memory and requires to modify the instruction
> > + stream each time a variable is updated.
>
> > Should really be disabled for
> > + embedded systems with read-only text.
>
> There are no such on x86 at least And for other architectures like
> PPC it would be better to have a high level CONFIG_READ_ONLY_TEXT
> then that does the necessary changes internally. But I'm not sure it's even
> supporting r/o text.
>
The idea here is to give embedded system developers incentives to
create an optimized immediate value header for their architecture. I
fear that if it is not trivial to disable when they need to use ROM to
put the kernel code (as kprobes is, meaning, with a single config
option), they will refuse to event think about including an optimized
immediate value header for their architecture.
And yes, having a CONFIG_READ_ONLY_TEXT makes sense, but it implies
menu dependencies with not only immediate values but also kprobes,
paravirt, alternatives, (am I missing others ?)
As long as we find a way for people to disable _all_ code patching in
their kernel, I'm happy with that. But since every existing code
patching mechanism can currently be disabled one by one, it makes sense
to do the same for the immediate values. Having a global
CONFIG_READ_ONLY_TEXT should IMHO come in a separate effort.
Mathieu
> -Andi
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 8/8] Immediate Values - Documentation
2007-09-07 12:23 ` Mathieu Desnoyers
@ 2007-09-07 14:24 ` Randy Dunlap
0 siblings, 0 replies; 36+ messages in thread
From: Randy Dunlap @ 2007-09-07 14:24 UTC (permalink / raw)
To: Mathieu Desnoyers; +Cc: akpm, linux-kernel
Mathieu Desnoyers wrote:
> * Randy Dunlap (randy.dunlap@oracle.com) wrote:
> [fixing the rest of the comments in the patch]
>> What character is this that I cannot read (not displayed properly
>> by my email client maybe)? <something> after 644.09 and before the
>> +- symbol, repeated just before all of the +- symbols.
>>
>
> This file seems to be UTF-8 and the mail has been send by quilt. Is it
> ok or should I change this to +/- just to make sure ?
I think that most people on lkml say that UTF-8 is OK in text doc files.
>>> +Getppid without memory pressure:
>>> +1462.09±18.87 - 150.92±1.01 = 1311.17±18.90 cycles
>>> +
>>> +Getppid with memory pressure:
>>> +17113.33±1655.92 - 578.22±269.51 = 16535.11±1677.71 cycles
>>> +
>>> +Therefore, if we add 2 markers not based on immediate values to the getppid
>>> +code, which would add 2 memory reads, we would add
>>> +2 * 555.93±12.74 = 1111.86±25.48 cycles
>>> +
>>> +Therefore,
>>> +
>>> +1111.86±25.48 / 16535.11±1677.71 = 0.0672
>>> + relative error: sqrt(((25.48/1111.86)^2)+((1677.71/16535.11)^2))
>>> + = 0.1040
>>> + absolute error: 0.1040 * 0.0672 = 0.0070
>>> +
>>> +Therefore: 0.0672±0.0070 * 100% = 6.72±0.70 %
>>> +
>>> +We can therefore affirm that adding 2 markers to getppid, on a system with high
>>> +memory pressure, would have a performance hit of at least 6.0% on the system
>>> +call time, all within the uncertainty limits of these tests. The same applies to
>>> +other kernel code paths. The smaller those code paths are, the highest the
>>> +impact ratio will be.
>>> +
>>> +Therefore, not only is it interesting to use the immediate values to dynamically
>>> +activate dormant code such as the markers, but I think it should also be
>>> +considered as a replacement for many of the "read mostly" static variables.
--
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED
2007-09-07 12:46 ` Mathieu Desnoyers
@ 2007-09-07 22:39 ` Andi Kleen
2007-09-11 20:22 ` Mathieu Desnoyers
0 siblings, 1 reply; 36+ messages in thread
From: Andi Kleen @ 2007-09-07 22:39 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: Andi Kleen, akpm, linux-kernel, Adrian Bunk, Alexey Dobriyan,
Christoph Hellwig
On Fri, Sep 07, 2007 at 08:46:48AM -0400, Mathieu Desnoyers wrote:
> * Andi Kleen (andi@firstfloor.org) wrote:
> > > +config IMMEDIATE
> > > + default y if !DISABLE_IMMEDIATE
> >
> > It's still unclear to me why DISABLE_IMMEDIATE is needed. It would
> > be better to make it just the default.
> >
>
> It is actually the default on any non embedded configuration. Do you
> think we should make it default to on on embedded configs too ?
I would prefer to not have any config options at all and let
the non converted architectures always use a asm-generic fallback.
> The idea here is to give embedded system developers incentives to
> create an optimized immediate value header for their architecture. I
Sounds like a quite bogus way to do this.
> fear that if it is not trivial to disable when they need to use ROM to
> put the kernel code (as kprobes is, meaning, with a single config
> option), they will refuse to event think about including an optimized
> immediate value header for their architecture.
#ifdef CONFIG_ARCH_SPECIFIC_READONLY
#include <asm-generic/generic-immediate.h>
#else
/* optimized implementation */
#endif
That's trivial.
> And yes, having a CONFIG_READ_ONLY_TEXT makes sense, but it implies
> menu dependencies with not only immediate values but also kprobes,
> paravirt, alternatives, (am I missing others ?)
paravirt and alternatives are x86 only.
I don't think CONFIG_READ_ONLY_TEXT on x86 makes sense.
On other architectures they have to deal with kprobes, but they
presumably do this already. Not really your problem I suspect.
> As long as we find a way for people to disable _all_ code patching in
> their kernel, I'm happy with that. But since every existing code
> patching mechanism can currently be disabled one by one, it makes sense
> to do the same for the immediate values. Having a global
> CONFIG_READ_ONLY_TEXT should IMHO come in a separate effort.
You're clearly deep into overdesign territory here.
-Andi
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-06 20:02 ` [patch 1/8] Immediate Values - Global Modules List and Module Mutex Mathieu Desnoyers
@ 2007-09-08 7:28 ` Alexey Dobriyan
2007-09-10 23:53 ` Rusty Russell
0 siblings, 1 reply; 36+ messages in thread
From: Alexey Dobriyan @ 2007-09-08 7:28 UTC (permalink / raw)
To: rusty; +Cc: akpm, linux-kernel, Mathieu Desnoyers
On Thu, Sep 06, 2007 at 04:02:29PM -0400, Mathieu Desnoyers wrote:
> Remove "static" from module_mutex and the modules list so it can be used by
> other builtin objects in the kernel. Otherwise, every code depending on the
> module list would have to be put in kernel/module.c. Since the immediate values
> depends on the module list but can be considered as logically different, it
> makes sense to implement them in their own file.
>
> The alternative to this would be to disable preemption in code path that need
> such synchronization, so they can be protected against module unload by
> stop_machine(), but not being able to sleep within while needing such
> synchronization is limiting.
> --- linux-2.6-lttng.orig/kernel/module.c
> +++ linux-2.6-lttng/kernel/module.c
> @@ -64,8 +64,8 @@ extern int module_sysfs_initialized;
>
> /* List of modules, protected by module_mutex or preempt_disable
> * (add/delete uses stop_machine). */
> -static DEFINE_MUTEX(module_mutex);
> -static LIST_HEAD(modules);
> +DEFINE_MUTEX(module_mutex);
> +LIST_HEAD(modules);
> static DECLARE_MUTEX(notify_mutex);
>
> static BLOCKING_NOTIFIER_HEAD(module_notify_list);
> --- linux-2.6-lttng.orig/include/linux/module.h
> +++ linux-2.6-lttng/include/linux/module.h
> @@ -60,6 +60,10 @@ struct module_kobject
> struct kobject *drivers_dir;
> };
>
> +/* Protects the list of modules. */
> +extern struct mutex module_mutex;
> +extern struct list_head modules;
Rusty, do you still want to keep module_mutex virgin? If not, I can
backout /proc/*/wchan vs rmmod race fix et al and use muuuch simpler version.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-08 7:28 ` Alexey Dobriyan
@ 2007-09-10 23:53 ` Rusty Russell
2007-09-11 0:45 ` Mathieu Desnoyers
0 siblings, 1 reply; 36+ messages in thread
From: Rusty Russell @ 2007-09-10 23:53 UTC (permalink / raw)
To: Alexey Dobriyan; +Cc: akpm, linux-kernel, Mathieu Desnoyers
On Sat, 2007-09-08 at 11:28 +0400, Alexey Dobriyan wrote:
> On Thu, Sep 06, 2007 at 04:02:29PM -0400, Mathieu Desnoyers wrote:
> > Remove "static" from module_mutex and the modules list so it can be used by
> > other builtin objects in the kernel. Otherwise, every code depending on the
> > module list would have to be put in kernel/module.c. Since the immediate values
> > depends on the module list but can be considered as logically different, it
> > makes sense to implement them in their own file.
If I understand this code correctly, then changing immediate values
needs some exclusion to avoid patching live code. You leave this to the
user with some very unclear rules.
The result is a real mess that has nothing to do with the module mutex
and list. These patches need a lot more work 8(
1) The immediate types are just kind of silly. See per-cpu for how it
handles this already. DECLARE_IMMEDIATE(type, var) is probably enough.
2) immediate_if() needs an implementation before you introduce it. Your
assumption that it's always unlikely seems non-orthogonal.
3) immediate_set(), _immediate_set() and immediate_set_early()? No
thanks! AFAICT you really want an "init_immediate(var, val)". This
means "you can patch all the references now, they're not executing".
Later on we could possibly have a super-stop-machine version which
ensures noone's preempted and handles the concurrent case. Maybe.
4) With an "init" interface not a "set" interface, you don't need
locking. Simpler.
Hope that helps,
Rusty.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-10 23:53 ` Rusty Russell
@ 2007-09-11 0:45 ` Mathieu Desnoyers
2007-09-11 5:18 ` Rusty Russell
0 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-11 0:45 UTC (permalink / raw)
To: Rusty Russell; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Sat, 2007-09-08 at 11:28 +0400, Alexey Dobriyan wrote:
> > On Thu, Sep 06, 2007 at 04:02:29PM -0400, Mathieu Desnoyers wrote:
> > > Remove "static" from module_mutex and the modules list so it can be used by
> > > other builtin objects in the kernel. Otherwise, every code depending on the
> > > module list would have to be put in kernel/module.c. Since the immediate values
> > > depends on the module list but can be considered as logically different, it
> > > makes sense to implement them in their own file.
>
> If I understand this code correctly, then changing immediate values
> needs some exclusion to avoid patching live code. You leave this to the
> user with some very unclear rules.
>
I think you really misunderstood some major features of these patches
then. Doing an immediate_set on a variable has the same semantic as
setting a normal static variable. If the write to the equivalent
variable would be performed atomically (e.g. 32 bits or less variable on
a 32 bits architecture), it will also be done atomically on an
immediate value reference.
Code patching of _live_ SMP code is allowed. This is why I went through
all this trouble on i386.
So the locking is the same for an immediate value that it would be for a
static variable. If you are concerned about the fact that we have to
iterate on multiple load immediate sites to enable the variable (which
is done non atomically when there are multiple references), we end up in
a moment where references may be in a different state at a given time.
But this is no different from out of order read/writes of/from a static
variable and how older copies may be seen by another CPU : if you hope
that multiple CPUs will see a coherent copy of a static variable at a
given point in time, you clearly have to use the proper locking that
will order read/writes.
Immediate values are mostly meant to be used in contexts where we can
allow this time window where, during the update, some references will
have one value and others won't. Therefore, doing :
immediate_int_t var;
in function:
BUG_ON(immediate_read(&var) != immediate_read(&var));
might fail if it is executed while an immediate_set is done on var. But
if we would have:
volatile int var;
in function:
BUG_ON(var != var);
and, in another thread:
var++;
We _might_ race and see a different value. What is do different about
it?
When we activate a feature that is protected by a boolean, we currently
have to accept that we will, at some point, be in a state where code
paths will see the enabled value and others will see the disabled one.
(this is the choice we do in tracing, and the choice that has to be done
when using atomic updates without locking).
The only guarantee you get is that once the immediate_set is over, all
further references to the variable in memory will see the new variable.
> The result is a real mess that has nothing to do with the module mutex
> and list. These patches need a lot more work 8(
>
module mutex is only there to allow coherent iteration on the module
list so we can patch each module's code.
> 1) The immediate types are just kind of silly. See per-cpu for how it
> handles this already. DECLARE_IMMEDIATE(type, var) is probably enough.
>
I would be ok with that.
> 2) immediate_if() needs an implementation before you introduce it. Your
> assumption that it's always unlikely seems non-orthogonal.
>
I could remove the unlikely(), no problem with that. Your point about
this is valid. However, I would like to leave the immediate_if() there
because it may become very useful if someday gcc permits to extract the
address of a branch instruction (and to generate assembly that would not
be reached without doing code patching).
Quoting my discussion with H. Peter Anvin:
<quote>
Mathieu Desnoyers wrote:
>
> If we can change the compiler, here is what we could do:
>
> Tell GCC to put NOPs that could be altered by a branch alternative to
> some specified code. We should be able to get the instruction pointers
> (think of inlines) to these nop/branch instructions so we can change
> them dynamically.
>
Changing the compiler should be perfectly feasible, *BUT* I think we
need a transitional solution that works on existing compilers.
> I suspect this would be inherently tricky. If someone is ready to do
> this and tells me "yes, it will be there in 1 month", I am more than
> ready to switch my markers to this and help, but since the core of my
> work is kernel tracing, I don't have the time nor the ressources to
> tackle this problem.
>
> In the event that someone answers "we'll do this in the following 3
> years", I might consider to change the if (immediate(var)) into an
> immediate_if (var) so we can later proceed to the change with simple
> ifdefs without rewriting all the kernel code that would use it.
This is much more of "we'll do that in the following 1-2 years", since
we have to deal with a full gcc development cycle. However, I really
want to see this being implemented in a way that would let us DTRT in
the long run."
</quote>
> 3) immediate_set(), _immediate_set() and immediate_set_early()? No
> thanks! AFAICT you really want an "init_immediate(var, val)". This
> means "you can patch all the references now, they're not executing".
> Later on we could possibly have a super-stop-machine version which
> ensures noone's preempted and handles the concurrent case. Maybe.
>
> 4) With an "init" interface not a "set" interface, you don't need
> locking. Simpler.
>
We don't need to stop execution of references at all. Not needed. (as
explained above). I wouldn't want that anyway, since I want to use this
for tracing which must be as non-intrusive as possible.
No locking, live update, is imho much simpler :)
> Hope that helps,
> Rusty.
>
Thanks for the review,
Mathieu
>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-11 0:45 ` Mathieu Desnoyers
@ 2007-09-11 5:18 ` Rusty Russell
2007-09-11 14:27 ` Mathieu Desnoyers
0 siblings, 1 reply; 36+ messages in thread
From: Rusty Russell @ 2007-09-11 5:18 UTC (permalink / raw)
To: Mathieu Desnoyers; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
On Mon, 2007-09-10 at 20:45 -0400, Mathieu Desnoyers wrote:
> Code patching of _live_ SMP code is allowed. This is why I went through
> all this trouble on i386.
Oh, I was pretty sure it wasn't. OK.
So now why three versions of immediate_set()? And why are you using my
lock for exclusion? Against what?
Why not just have one immediate_set() which iterates through and fixes
up all the references? It can use an internal lock if you want to avoid
concurrent immediate_set() calls.
I understand the need for a "module_immediate_fixup()" but that can also
use your internal lock.
> > 2) immediate_if() needs an implementation before you introduce it. Your
> > assumption that it's always unlikely seems non-orthogonal.
>
> I could remove the unlikely(), no problem with that. Your point about
> this is valid. However, I would like to leave the immediate_if() there
> because it may become very useful if someday gcc permits to extract the
> address of a branch instruction (and to generate assembly that would not
> be reached without doing code patching).
Why is it easier to patch the sites now than later? Currently it's just
churn. You could go back and find them when this mythical patch gets
merged into this mythical future gcc version. It could well need a
completely different macro style, like "cond_imm(var, code)".
Rusty.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-11 5:18 ` Rusty Russell
@ 2007-09-11 14:27 ` Mathieu Desnoyers
2007-09-13 5:47 ` Rusty Russell
0 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-11 14:27 UTC (permalink / raw)
To: Rusty Russell; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Mon, 2007-09-10 at 20:45 -0400, Mathieu Desnoyers wrote:
> > Code patching of _live_ SMP code is allowed. This is why I went through
> > all this trouble on i386.
>
> Oh, I was pretty sure it wasn't. OK.
>
> So now why three versions of immediate_set()? And why are you using my
> lock for exclusion? Against what?
>
If we need to patch code at boot time, when interrupts are still
disabled (it happens when we parse the kernel arguments for instance),
we cannot afford to use IPIs to call sync_core() on each cpu, using
breakpoints/notifier chains could be tricky (because we are very early
at boot and alternatives or paravirt may not have been applied yet).
So, for early boot code patching, immediate_set_early() is used. It
presumes that variables are not used when the code is patched, that we
are in UP context and it is really minimalistic.
On the other hand, immediate_set() updates kernel and module variables
while the (potentially smp) system is running. It provides coherent
variable updates even if the code referencing then is executing.
_immediate_set() has been introduced because of the way immediate values
are used by markers: the linux kernel markers already hold the module
mutex when they need to update the immediate values. Taking the mutex
twice makes no sence, so _immediate_set() is used when the caller
already holds the module mutex.
> Why not just have one immediate_set() which iterates through and fixes
> up all the references?
(reasons explained above)
> It can use an internal lock if you want to avoid
> concurrent immediate_set() calls.
>
An internal lock won't protect against modules load/unload race. We have
to iterate on the module list.
> I understand the need for a "module_immediate_fixup()" but that can also
> use your internal lock.
It looks interesting.. can you elaborate a little bit more on this idea?
If we can find a way to encapsulate in module.c everything that needs to
touch the module list, I am all for it.
>
> > > 2) immediate_if() needs an implementation before you introduce it. Your
> > > assumption that it's always unlikely seems non-orthogonal.
> >
> > I could remove the unlikely(), no problem with that. Your point about
> > this is valid. However, I would like to leave the immediate_if() there
> > because it may become very useful if someday gcc permits to extract the
> > address of a branch instruction (and to generate assembly that would not
> > be reached without doing code patching).
>
> Why is it easier to patch the sites now than later? Currently it's just
> churn. You could go back and find them when this mythical patch gets
> merged into this mythical future gcc version. It could well need a
> completely different macro style, like "cond_imm(var, code)".
>
Maybe you're right. My though was that if we have a way to express a
strictly boolean if() statement that can later be optimized further by
gcc using a jump rather than a conditionnal branch and currently emulate
it by using a load immediate/test/branch, we might want to do so right
now so we don't have to do a second code transition from
if (immediate_read(&var)) to immediate_if (&var) later. But you might be
right in that the form could potentially change anyway when the
implementation would come, although I don't see how.
Mathieu
> Rusty.
>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED
2007-09-07 22:39 ` Andi Kleen
@ 2007-09-11 20:22 ` Mathieu Desnoyers
2007-09-12 12:42 ` Andi Kleen
0 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-11 20:22 UTC (permalink / raw)
To: Andi Kleen
Cc: akpm, linux-kernel, Adrian Bunk, Alexey Dobriyan,
Christoph Hellwig
* Andi Kleen (andi@firstfloor.org) wrote:
> On Fri, Sep 07, 2007 at 08:46:48AM -0400, Mathieu Desnoyers wrote:
> > * Andi Kleen (andi@firstfloor.org) wrote:
> > > > +config IMMEDIATE
> > > > + default y if !DISABLE_IMMEDIATE
> > >
> > > It's still unclear to me why DISABLE_IMMEDIATE is needed. It would
> > > be better to make it just the default.
> > >
> >
> > It is actually the default on any non embedded configuration. Do you
> > think we should make it default to on on embedded configs too ?
>
> I would prefer to not have any config options at all and let
> the non converted architectures always use a asm-generic fallback.
>
> > The idea here is to give embedded system developers incentives to
> > create an optimized immediate value header for their architecture. I
>
> Sounds like a quite bogus way to do this.
>
> > fear that if it is not trivial to disable when they need to use ROM to
> > put the kernel code (as kprobes is, meaning, with a single config
> > option), they will refuse to event think about including an optimized
> > immediate value header for their architecture.
>
> #ifdef CONFIG_ARCH_SPECIFIC_READONLY
> #include <asm-generic/generic-immediate.h>
> #else
> /* optimized implementation */
> #endif
>
> That's trivial.
>
Yeah, but then you would make immediate.o an obj-y (both for
kernel/Makefile and arch/*/kernel/Makefile) and it would be built
even on systems that would be configured not to use immediate values.
Therefore, it would compile-in unused code if we do as you propose,
which I am reluctant to do on embedded systems where code size matters.
Mathieu
> > And yes, having a CONFIG_READ_ONLY_TEXT makes sense, but it implies
> > menu dependencies with not only immediate values but also kprobes,
> > paravirt, alternatives, (am I missing others ?)
>
> paravirt and alternatives are x86 only.
>
> I don't think CONFIG_READ_ONLY_TEXT on x86 makes sense.
>
> On other architectures they have to deal with kprobes, but they
> presumably do this already. Not really your problem I suspect.
>
>
> > As long as we find a way for people to disable _all_ code patching in
> > their kernel, I'm happy with that. But since every existing code
> > patching mechanism can currently be disabled one by one, it makes sense
> > to do the same for the immediate values. Having a global
> > CONFIG_READ_ONLY_TEXT should IMHO come in a separate effort.
>
> You're clearly deep into overdesign territory here.
>
> -Andi
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED
2007-09-11 20:22 ` Mathieu Desnoyers
@ 2007-09-12 12:42 ` Andi Kleen
0 siblings, 0 replies; 36+ messages in thread
From: Andi Kleen @ 2007-09-12 12:42 UTC (permalink / raw)
To: Mathieu Desnoyers
Cc: Andi Kleen, akpm, linux-kernel, Adrian Bunk, Alexey Dobriyan,
Christoph Hellwig
> Yeah, but then you would make immediate.o an obj-y (both for
> kernel/Makefile and arch/*/kernel/Makefile) and it would be built
> even on systems that would be configured not to use immediate values.
You can stick a define into generic-immediate.h and put an ifndef
for that around immediate.c
Or the recently posted function garbage collection patches would
also take care of it.
-Andi
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-11 14:27 ` Mathieu Desnoyers
@ 2007-09-13 5:47 ` Rusty Russell
2007-09-13 21:21 ` Mathieu Desnoyers
0 siblings, 1 reply; 36+ messages in thread
From: Rusty Russell @ 2007-09-13 5:47 UTC (permalink / raw)
To: Mathieu Desnoyers; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
On Tue, 2007-09-11 at 10:27 -0400, Mathieu Desnoyers wrote:
> * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > On Mon, 2007-09-10 at 20:45 -0400, Mathieu Desnoyers wrote:
> > > Code patching of _live_ SMP code is allowed. This is why I went through
> > > all this trouble on i386.
> >
> > Oh, I was pretty sure it wasn't. OK.
> >
> > So now why three versions of immediate_set()? And why are you using my
> > lock for exclusion? Against what?
> >
>
> If we need to patch code at boot time, when interrupts are still
> disabled (it happens when we parse the kernel arguments for instance),
> we cannot afford to use IPIs to call sync_core() on each cpu, using
> breakpoints/notifier chains could be tricky (because we are very early
> at boot and alternatives or paravirt may not have been applied yet).
Hi Mathieu,
Sure, but why is that the caller's problem? immediate_set() isn't
fastpath, so why not make it do an "if (early_boot)" internally?
> _immediate_set() has been introduced because of the way immediate values
> are used by markers: the linux kernel markers already hold the module
> mutex when they need to update the immediate values. Taking the mutex
> twice makes no sence, so _immediate_set() is used when the caller
> already holds the module mutex.
> Why not just have one immediate_set() which iterates through and fixes
> > up all the references?
>
> (reasons explained above)
>
> > It can use an internal lock if you want to avoid
> > concurrent immediate_set() calls.
> >
>
> An internal lock won't protect against modules load/unload race. We have
> to iterate on the module list.
Sure, but it seems like that's fairly easy to do within module.c:
/* This updates all the immediates even though only one might have
* changed. But it's so rare it's not worth optimizing. */
void module_update_immediates(void)
{
mutex_lock(&module_mutex);
list_for_each_entry(mod, &modules, list)
update_immediates(mod->immediate, mod->num_immediate);
mutex_unlock(&module_mutex);
}
Then during module load you do:
update_immediates(mod->immediate, mod->num_immediate);
Your immediate_update() just becomes:
update_immediates(__start___immediate,
__stop___immediate - __start___immediate);
module_update_immediates();
update_immediates() can grab the immediate_mutex if you want.
> > Why is it easier to patch the sites now than later? Currently it's just
> > churn. You could go back and find them when this mythical patch gets
> > merged into this mythical future gcc version. It could well need a
> > completely different macro style, like "cond_imm(var, code)".
>
> Maybe you're right. My though was that if we have a way to express a
> strictly boolean if() statement that can later be optimized further by
> gcc using a jump rather than a conditionnal branch and currently emulate
> it by using a load immediate/test/branch, we might want to do so right
> now so we don't have to do a second code transition from
> if (immediate_read(&var)) to immediate_if (&var) later. But you might be
> right in that the form could potentially change anyway when the
> implementation would come, although I don't see how.
I was thinking that we might find useful specific cases before we get
GCC support, which archs can override with tricky asm if they wish.
Cheers,
Rusty.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-13 5:47 ` Rusty Russell
@ 2007-09-13 21:21 ` Mathieu Desnoyers
2007-09-13 23:15 ` Rusty Russell
0 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-13 21:21 UTC (permalink / raw)
To: Rusty Russell; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Tue, 2007-09-11 at 10:27 -0400, Mathieu Desnoyers wrote:
> > * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > > On Mon, 2007-09-10 at 20:45 -0400, Mathieu Desnoyers wrote:
> > > > Code patching of _live_ SMP code is allowed. This is why I went through
> > > > all this trouble on i386.
> > >
> > > Oh, I was pretty sure it wasn't. OK.
> > >
> > > So now why three versions of immediate_set()? And why are you using my
> > > lock for exclusion? Against what?
> > >
> >
> > If we need to patch code at boot time, when interrupts are still
> > disabled (it happens when we parse the kernel arguments for instance),
> > we cannot afford to use IPIs to call sync_core() on each cpu, using
> > breakpoints/notifier chains could be tricky (because we are very early
> > at boot and alternatives or paravirt may not have been applied yet).
>
> Hi Mathieu,
>
> Sure, but why is that the caller's problem? immediate_set() isn't
> fastpath, so why not make it do an "if (early_boot)" internally?
>
I see two reasons:
1 - early_boot, or anything that looks like this, does not exist
currently (and the following reason might show why).
2 - If we use this, we cannot declare the early code with __init, so it
will have to stay there forever insteaf of being removable once boot is
over.
Therefore, I think it's better to stick to an immediate_set_early
version.
> > _immediate_set() has been introduced because of the way immediate values
> > are used by markers: the linux kernel markers already hold the module
> > mutex when they need to update the immediate values. Taking the mutex
> > twice makes no sence, so _immediate_set() is used when the caller
> > already holds the module mutex.
>
> > Why not just have one immediate_set() which iterates through and fixes
> > > up all the references?
> >
> > (reasons explained above)
> >
> > > It can use an internal lock if you want to avoid
> > > concurrent immediate_set() calls.
> > >
> >
> > An internal lock won't protect against modules load/unload race. We have
> > to iterate on the module list.
>
> Sure, but it seems like that's fairly easy to do within module.c:
>
> /* This updates all the immediates even though only one might have
> * changed. But it's so rare it's not worth optimizing. */
> void module_update_immediates(void)
> {
> mutex_lock(&module_mutex);
> list_for_each_entry(mod, &modules, list)
> update_immediates(mod->immediate, mod->num_immediate);
> mutex_unlock(&module_mutex);
> }
>
> Then during module load you do:
>
> update_immediates(mod->immediate, mod->num_immediate);
>
> Your immediate_update() just becomes:
>
> update_immediates(__start___immediate,
> __stop___immediate - __start___immediate);
> module_update_immediates();
>
> update_immediates() can grab the immediate_mutex if you want.
>
Yup, excellent idea. I just changed the linux kernel markers too.
> > > Why is it easier to patch the sites now than later? Currently it's just
> > > churn. You could go back and find them when this mythical patch gets
> > > merged into this mythical future gcc version. It could well need a
> > > completely different macro style, like "cond_imm(var, code)".
> >
> > Maybe you're right. My though was that if we have a way to express a
> > strictly boolean if() statement that can later be optimized further by
> > gcc using a jump rather than a conditionnal branch and currently emulate
> > it by using a load immediate/test/branch, we might want to do so right
> > now so we don't have to do a second code transition from
> > if (immediate_read(&var)) to immediate_if (&var) later. But you might be
> > right in that the form could potentially change anyway when the
> > implementation would come, although I don't see how.
>
> I was thinking that we might find useful specific cases before we get
> GCC support, which archs can override with tricky asm if they wish.
>
The first useful case is the Linux Kernel Markers, which really needs a
completely boolean if: active or inactive. That would be a good test
case to get gcc support.
Mathieu
> Cheers,
> Rusty.
>
>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-13 21:21 ` Mathieu Desnoyers
@ 2007-09-13 23:15 ` Rusty Russell
2007-09-14 15:32 ` Mathieu Desnoyers
0 siblings, 1 reply; 36+ messages in thread
From: Rusty Russell @ 2007-09-13 23:15 UTC (permalink / raw)
To: Mathieu Desnoyers; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
On Thu, 2007-09-13 at 17:21 -0400, Mathieu Desnoyers wrote:
> * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > Sure, but why is that the caller's problem? immediate_set() isn't
> > fastpath, so why not make it do an "if (early_boot)" internally?
>
> I see two reasons:
> 1 - early_boot, or anything that looks like this, does not exist
> currently (and the following reason might show why).
> 2 - If we use this, we cannot declare the early code with __init, so it
> will have to stay there forever insteaf of being removable once boot is
> over.
>
> Therefore, I think it's better to stick to an immediate_set_early
> version.
My problem is that I don't know when to use immediate_set_early! You
say "early boot": what does that mean? Is it arch-specific? And why is
it my problem anyway?
Since arch_immediate_update_early() is a memcpy, I don't really buy the
bloat argument (it has some silly-looking optimization, but that should
be removed anyway). I'd really rather see arch_immediate_update()
examine an internal flag and do the memcpy if it's too early for the
full algorithm. That's the only place which really knows, after all.
You may need some external hook to update that internal "too early"
flag, of course.
Alternatively, if you called it "immediate_init" then the semantics
change slightly, but are more obvious (ie. only use this when the value
isn't being accessed yet). But it can't be __init then anyway.
On an unrelated note, did you consider simply IPI-ing and doing the
substitution with all CPUs stopped? If you only updated the immediate
references to this particular var, it should be fast enough not to upset
the RT guys, even.
> > I was thinking that we might find useful specific cases before we get
> > GCC support, which archs can override with tricky asm if they wish.
> >
>
> The first useful case is the Linux Kernel Markers, which really needs a
> completely boolean if: active or inactive. That would be a good test
> case to get gcc support.
Well, you can do that in asm without gcc support. It's a little nasty:
since gcc will know nothing about the function call, it can't have side
effects which are visible in this function, and you'll have to save and
restore *all* regs if you decide to do the function call. But it's
possible (a 5-byte nop gets changed to a call, the call does the pushes
and sets the args regs, calls the function, then pops everything and
rets).
Cheers,
Rusty.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-13 23:15 ` Rusty Russell
@ 2007-09-14 15:32 ` Mathieu Desnoyers
2007-09-17 22:54 ` Rusty Russell
0 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-14 15:32 UTC (permalink / raw)
To: Rusty Russell; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Thu, 2007-09-13 at 17:21 -0400, Mathieu Desnoyers wrote:
> > * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > > Sure, but why is that the caller's problem? immediate_set() isn't
> > > fastpath, so why not make it do an "if (early_boot)" internally?
> >
> > I see two reasons:
> > 1 - early_boot, or anything that looks like this, does not exist
> > currently (and the following reason might show why).
> > 2 - If we use this, we cannot declare the early code with __init, so it
> > will have to stay there forever insteaf of being removable once boot is
> > over.
> >
> > Therefore, I think it's better to stick to an immediate_set_early
> > version.
>
> My problem is that I don't know when to use immediate_set_early! You
> say "early boot": what does that mean? Is it arch-specific? And why is
> it my problem anyway?
>
> Since arch_immediate_update_early() is a memcpy, I don't really buy the
> bloat argument (it has some silly-looking optimization, but that should
> be removed anyway). I'd really rather see arch_immediate_update()
> examine an internal flag and do the memcpy if it's too early for the
> full algorithm. That's the only place which really knows, after all.
> You may need some external hook to update that internal "too early"
> flag, of course.
>
> Alternatively, if you called it "immediate_init" then the semantics
> change slightly, but are more obvious (ie. only use this when the value
> isn't being accessed yet). But it can't be __init then anyway.
>
I think your idea is good. immediate_init() could be used to update the
immediate values at boot time _and_ at module load time, and we could
use an architecture specific arch_immediate_update_init() to support it.
As for "when" to use this, it should be used at boot time when
interrupts are still disabled, still running in UP. It can also be used
at module load time before any of the module code is executed, as long
as the module code pages are writable (which they always are, for
now..). Therefore, the flag seems inappropriate for module load
arch_immediate_update_init. It cannot be put in __init section neither
though if we use it like this.
> On an unrelated note, did you consider simply IPI-ing and doing the
> substitution with all CPUs stopped? If you only updated the immediate
> references to this particular var, it should be fast enough not to upset
> the RT guys, even.
>
Yes, I thought about this, but since I use immediate values in the
kernel markers, which can be put in exception handlers (including nmi,
mce handler), which cannot be disabled without important side-effects, I
don't think trying to stop the CPUs is a workable solution.
> > > I was thinking that we might find useful specific cases before we get
> > > GCC support, which archs can override with tricky asm if they wish.
> > >
> >
> > The first useful case is the Linux Kernel Markers, which really needs a
> > completely boolean if: active or inactive. That would be a good test
> > case to get gcc support.
>
> Well, you can do that in asm without gcc support. It's a little nasty:
> since gcc will know nothing about the function call, it can't have side
> effects which are visible in this function, and you'll have to save and
> restore *all* regs if you decide to do the function call. But it's
> possible (a 5-byte nop gets changed to a call, the call does the pushes
> and sets the args regs, calls the function, then pops everything and
> rets).
>
GCC support is required if we want to embed inline functions inside
unlikely branches depending on immediate values (no function call
there). It also permits passing local variables as arguments to the
function call (stack setup), which would be tricky, instrumentation site
specific and non portable if done in assembly.
Mathieu
> Cheers,
> Rusty.
>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-14 15:32 ` Mathieu Desnoyers
@ 2007-09-17 22:54 ` Rusty Russell
2007-09-18 13:41 ` Mathieu Desnoyers
0 siblings, 1 reply; 36+ messages in thread
From: Rusty Russell @ 2007-09-17 22:54 UTC (permalink / raw)
To: Mathieu Desnoyers; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
On Fri, 2007-09-14 at 11:32 -0400, Mathieu Desnoyers wrote:
> * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > Alternatively, if you called it "immediate_init" then the semantics
> > change slightly, but are more obvious (ie. only use this when the value
> > isn't being accessed yet). But it can't be __init then anyway.
> >
>
> I think your idea is good. immediate_init() could be used to update the
> immediate values at boot time _and_ at module load time, and we could
> use an architecture specific arch_immediate_update_init() to support it.
Right.
> As for "when" to use this, it should be used at boot time when
> interrupts are still disabled, still running in UP. It can also be used
> at module load time before any of the module code is executed, as long
> as the module code pages are writable (which they always are, for
> now..). Therefore, the flag seems inappropriate for module load
> arch_immediate_update_init. It cannot be put in __init section neither
> though if we use it like this.
I think from a user's POV it would be nice to have a 1:1 mapping with
normal initialization semantics (ie. it will work as long as you don't
access this value until initialized). And I think this would be the
case. eg:
int foo_func(void)
{
if (immediate_read(&some_immediate))
return 0;
...
}
int some_init(void)
{
immediate_init(some_immediate, 0);
register_foo(foo_func);
...
}
> > On an unrelated note, did you consider simply IPI-ing and doing the
> > substitution with all CPUs stopped? If you only updated the immediate
> > references to this particular var, it should be fast enough not to upset
> > the RT guys, even.
> >
>
> Yes, I thought about this, but since I use immediate values in the
> kernel markers, which can be put in exception handlers (including nmi,
> mce handler), which cannot be disabled without important side-effects, I
> don't think trying to stop the CPUs is a workable solution.
OK, but can you justify the use of immediates within the nmi or mce
handlers? They don't strike me as useful candidates for optimization.
> > Well, you can do that in asm without gcc support. It's a little nasty:
> > since gcc will know nothing about the function call, it can't have side
> > effects which are visible in this function, and you'll have to save and
> > restore *all* regs if you decide to do the function call. But it's
> > possible (a 5-byte nop gets changed to a call, the call does the pushes
> > and sets the args regs, calls the function, then pops everything and
> > rets).
>
> GCC support is required if we want to embed inline functions inside
> unlikely branches depending on immediate values (no function call
> there). It also permits passing local variables as arguments to the
> function call (stack setup), which would be tricky, instrumentation site
> specific and non portable if done in assembly.
Well if this is the slow path, you don't want inline anyway. But it
would be horribly, horribly arch-specific, yes.
Rusty.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-17 22:54 ` Rusty Russell
@ 2007-09-18 13:41 ` Mathieu Desnoyers
2007-09-20 12:29 ` Rusty Russell
0 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-18 13:41 UTC (permalink / raw)
To: Rusty Russell; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Fri, 2007-09-14 at 11:32 -0400, Mathieu Desnoyers wrote:
> > * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > > Alternatively, if you called it "immediate_init" then the semantics
> > > change slightly, but are more obvious (ie. only use this when the value
> > > isn't being accessed yet). But it can't be __init then anyway.
> > >
> >
> > I think your idea is good. immediate_init() could be used to update the
> > immediate values at boot time _and_ at module load time, and we could
> > use an architecture specific arch_immediate_update_init() to support it.
>
> Right.
>
> > As for "when" to use this, it should be used at boot time when
> > interrupts are still disabled, still running in UP. It can also be used
> > at module load time before any of the module code is executed, as long
> > as the module code pages are writable (which they always are, for
> > now..). Therefore, the flag seems inappropriate for module load
> > arch_immediate_update_init. It cannot be put in __init section neither
> > though if we use it like this.
>
> I think from a user's POV it would be nice to have a 1:1 mapping with
> normal initialization semantics (ie. it will work as long as you don't
> access this value until initialized). And I think this would be the
> case. eg:
>
> int foo_func(void)
> {
> if (immediate_read(&some_immediate))
> return 0;
> ...
> }
>
> int some_init(void)
> {
> immediate_init(some_immediate, 0);
> register_foo(foo_func);
> ...
> }
>
There are other considerations that differs between the boot-time case
and the general "init" case: the write-protection flag must be
cleared-saved/restored when the kernel is running to patch read-only
text, but we don't want to modify cr0 at early boot on i386 because
paravirt is not executed yet (at boot time, pages are not
write-protected yet).
And I am not sure that it buys us anything to create an immediate_init()
when we can do exactly the same job with immediate_set. Yes, it might be
a bit slower, but we are not on a fast path.
>
> > > On an unrelated note, did you consider simply IPI-ing and doing the
> > > substitution with all CPUs stopped? If you only updated the immediate
> > > references to this particular var, it should be fast enough not to upset
> > > the RT guys, even.
> > >
> >
> > Yes, I thought about this, but since I use immediate values in the
> > kernel markers, which can be put in exception handlers (including nmi,
> > mce handler), which cannot be disabled without important side-effects, I
> > don't think trying to stop the CPUs is a workable solution.
>
> OK, but can you justify the use of immediates within the nmi or mce
> handlers? They don't strike me as useful candidates for optimization.
>
Yes, immediate values are used by the Linux Kernel Markers, which
instrument many code paths, including functions called from nmi and mce
contexts (including printk).
> > > Well, you can do that in asm without gcc support. It's a little nasty:
> > > since gcc will know nothing about the function call, it can't have side
> > > effects which are visible in this function, and you'll have to save and
> > > restore *all* regs if you decide to do the function call. But it's
> > > possible (a 5-byte nop gets changed to a call, the call does the pushes
> > > and sets the args regs, calls the function, then pops everything and
> > > rets).
> >
> > GCC support is required if we want to embed inline functions inside
> > unlikely branches depending on immediate values (no function call
> > there). It also permits passing local variables as arguments to the
> > function call (stack setup), which would be tricky, instrumentation site
> > specific and non portable if done in assembly.
>
> Well if this is the slow path, you don't want inline anyway. But it
> would be horribly, horribly arch-specific, yes.
>
Yes, doing arch specific calls without gcc support seems to be unlikely
to give us a neat portable solution.
Mathieu
> Rusty.
>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-18 13:41 ` Mathieu Desnoyers
@ 2007-09-20 12:29 ` Rusty Russell
2007-09-21 13:37 ` Mathieu Desnoyers
0 siblings, 1 reply; 36+ messages in thread
From: Rusty Russell @ 2007-09-20 12:29 UTC (permalink / raw)
To: Mathieu Desnoyers; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
On Tue, 2007-09-18 at 09:41 -0400, Mathieu Desnoyers wrote:
> * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > On Fri, 2007-09-14 at 11:32 -0400, Mathieu Desnoyers wrote:
> > > * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > > > Alternatively, if you called it "immediate_init" then the semantics
> > > > change slightly, but are more obvious (ie. only use this when the value
> > > > isn't being accessed yet). But it can't be __init then anyway.
> > > >
> > >
> > > I think your idea is good. immediate_init() could be used to update the
> > > immediate values at boot time _and_ at module load time, and we could
> > > use an architecture specific arch_immediate_update_init() to support it.
> >
> > Right.
> >
> > > As for "when" to use this, it should be used at boot time when
> > > interrupts are still disabled, still running in UP. It can also be used
> > > at module load time before any of the module code is executed, as long
> > > as the module code pages are writable (which they always are, for
> > > now..). Therefore, the flag seems inappropriate for module load
> > > arch_immediate_update_init. It cannot be put in __init section neither
> > > though if we use it like this.
> >
> > I think from a user's POV it would be nice to have a 1:1 mapping with
> > normal initialization semantics (ie. it will work as long as you don't
> > access this value until initialized). And I think this would be the
> > case. eg:
> >
> > int foo_func(void)
> > {
> > if (immediate_read(&some_immediate))
> > return 0;
> > ...
> > }
> >
> > int some_init(void)
> > {
> > immediate_init(some_immediate, 0);
> > register_foo(foo_func);
> > ...
> > }
> >
>
> There are other considerations that differs between the boot-time case
> and the general "init" case: the write-protection flag must be
> cleared-saved/restored when the kernel is running to patch read-only
> text, but we don't want to modify cr0 at early boot on i386 because
> paravirt is not executed yet (at boot time, pages are not
> write-protected yet).
>
> And I am not sure that it buys us anything to create an immediate_init()
> when we can do exactly the same job with immediate_set. Yes, it might be
> a bit slower, but we are not on a fast path.
Good points. Well I'd say hiding it all behind a friendly
"immediate_set()" interface is the best option then.
> > OK, but can you justify the use of immediates within the nmi or mce
> > handlers? They don't strike me as useful candidates for optimization.
>
> Yes, immediate values are used by the Linux Kernel Markers, which
> instrument many code paths, including functions called from nmi and mce
> contexts (including printk).
Is this really worth worrying about? Isn't there already a problem with
printk() in nmi?
Cheers,
Rusty.
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-20 12:29 ` Rusty Russell
@ 2007-09-21 13:37 ` Mathieu Desnoyers
2007-09-22 7:15 ` Rusty Russell
0 siblings, 1 reply; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-09-21 13:37 UTC (permalink / raw)
To: Rusty Russell; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
* Rusty Russell (rusty@rustcorp.com.au) wrote:
> On Tue, 2007-09-18 at 09:41 -0400, Mathieu Desnoyers wrote:
> > * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > > On Fri, 2007-09-14 at 11:32 -0400, Mathieu Desnoyers wrote:
> > > > * Rusty Russell (rusty@rustcorp.com.au) wrote:
> > > > > Alternatively, if you called it "immediate_init" then the semantics
> > > > > change slightly, but are more obvious (ie. only use this when the value
> > > > > isn't being accessed yet). But it can't be __init then anyway.
> > > > >
> > > >
> > > > I think your idea is good. immediate_init() could be used to update the
> > > > immediate values at boot time _and_ at module load time, and we could
> > > > use an architecture specific arch_immediate_update_init() to support it.
> > >
> > > Right.
> > >
> > > > As for "when" to use this, it should be used at boot time when
> > > > interrupts are still disabled, still running in UP. It can also be used
> > > > at module load time before any of the module code is executed, as long
> > > > as the module code pages are writable (which they always are, for
> > > > now..). Therefore, the flag seems inappropriate for module load
> > > > arch_immediate_update_init. It cannot be put in __init section neither
> > > > though if we use it like this.
> > >
> > > I think from a user's POV it would be nice to have a 1:1 mapping with
> > > normal initialization semantics (ie. it will work as long as you don't
> > > access this value until initialized). And I think this would be the
> > > case. eg:
> > >
> > > int foo_func(void)
> > > {
> > > if (immediate_read(&some_immediate))
> > > return 0;
> > > ...
> > > }
> > >
> > > int some_init(void)
> > > {
> > > immediate_init(some_immediate, 0);
> > > register_foo(foo_func);
> > > ...
> > > }
> > >
> >
> > There are other considerations that differs between the boot-time case
> > and the general "init" case: the write-protection flag must be
> > cleared-saved/restored when the kernel is running to patch read-only
> > text, but we don't want to modify cr0 at early boot on i386 because
> > paravirt is not executed yet (at boot time, pages are not
> > write-protected yet).
> >
> > And I am not sure that it buys us anything to create an immediate_init()
> > when we can do exactly the same job with immediate_set. Yes, it might be
> > a bit slower, but we are not on a fast path.
>
> Good points. Well I'd say hiding it all behind a friendly
> "immediate_set()" interface is the best option then.
>
Then we can't benefit of the __init section to have the code removed
after boot. I don't see the point in doing so.
> > > OK, but can you justify the use of immediates within the nmi or mce
> > > handlers? They don't strike me as useful candidates for optimization.
> >
> > Yes, immediate values are used by the Linux Kernel Markers, which
> > instrument many code paths, including functions called from nmi and mce
> > contexts (including printk).
>
> Is this really worth worrying about? Isn't there already a problem with
> printk() in nmi?
>
printk() is just an example taken from my current instrumentation. Let's
say I plan to instrument kmalloc() (which will happen someday): it's
used in every context, including NMI. And it's not because printk is
broken that its instrumentation can afford to be broken even further,
that logic seems wrong to me. Somebody go fix printk then ;)
Mathieu
> Cheers,
> Rusty.
>
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [patch 1/8] Immediate Values - Global Modules List and Module Mutex
2007-09-21 13:37 ` Mathieu Desnoyers
@ 2007-09-22 7:15 ` Rusty Russell
0 siblings, 0 replies; 36+ messages in thread
From: Rusty Russell @ 2007-09-22 7:15 UTC (permalink / raw)
To: Mathieu Desnoyers; +Cc: Alexey Dobriyan, akpm, linux-kernel, H. Peter Anvin
On Fri, 2007-09-21 at 09:37 -0400, Mathieu Desnoyers wrote:
> > Good points. Well I'd say hiding it all behind a friendly
> > "immediate_set()" interface is the best option then.
>
> Then we can't benefit of the __init section to have the code removed
> after boot. I don't see the point in doing so.
Because having a magic early version is a bad burden to place on
programmers. It's an admission of failure that we cannot create a
simpler interface.
AFAICT it's a handful of bytes which would be freed.
> > Is this really worth worrying about? Isn't there already a problem with
> > printk() in nmi?
>
> printk() is just an example taken from my current instrumentation. Let's
> say I plan to instrument kmalloc() (which will happen someday): it's
> used in every context, including NMI.
Again, I don't think calling kmalloc in NMI is valid. Unless I'm
missing something, any code which uses locks is susceptible to deadlock
if used from an NMI handler. So you really can't do much.
It's nice that you have the perfect solution. But I'd really rather see
a sufficient solution which is 1/4 of the code and 1/10 the complexity.
Rusty.
^ permalink raw reply [flat|nested] 36+ messages in thread
* [patch 6/8] Immediate Values - Powerpc Optimization
2007-11-13 18:58 [patch 0/8] Immediate Values (now with merged x86 support) Mathieu Desnoyers
@ 2007-11-13 18:58 ` Mathieu Desnoyers
0 siblings, 0 replies; 36+ messages in thread
From: Mathieu Desnoyers @ 2007-11-13 18:58 UTC (permalink / raw)
To: akpm, linux-kernel; +Cc: Mathieu Desnoyers, Christoph Hellwig
[-- Attachment #1: immediate-values-powerpc-optimization.patch --]
[-- Type: text/plain, Size: 7433 bytes --]
PowerPC optimization of the immediate values which uses a li instruction,
patched with an immediate value.
Changelog:
- Put immediate_set and _immediate_set in the architecture independent header.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Christoph Hellwig <hch@infradead.org>
---
arch/powerpc/Kconfig | 3 +
arch/powerpc/kernel/Makefile | 1
arch/powerpc/kernel/immediate.c | 103 ++++++++++++++++++++++++++++++++++++++++
include/asm-powerpc/immediate.h | 71 +++++++++++++++++++++++++++
4 files changed, 178 insertions(+)
Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2007-11-13 13:53:28.000000000 -0500
@@ -0,0 +1,71 @@
+#ifndef _ASM_POWERPC_IMMEDIATE_H
+#define _ASM_POWERPC_IMMEDIATE_H
+
+/*
+ * Immediate values. PowerPC architecture optimizations.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <asm/asm-compat.h>
+
+struct __immediate {
+ long var; /* Identifier variable of the immediate value */
+ long immediate; /*
+ * Pointer to the memory location that holds
+ * the immediate value within the load immediate
+ * instruction.
+ */
+ long size; /* Type size. */
+} __attribute__ ((aligned(sizeof(long))));
+
+/**
+ * immediate_read - read immediate variable
+ * @name: immediate value name
+ *
+ * Reads the value of @name.
+ * Optimized version of the immediate.
+ * Do not use in __init and __exit functions. Use _immediate_read() instead.
+ * Makes sure the 2 bytes update will be atomic by aligning the immediate
+ * value. Use a normal memory read for the 4 bytes immediate because there is no
+ * way to atomically update it without using a seqlock read side, which would
+ * cost more in term of total i-cache and d-cache space than a simple memory
+ * read.
+ */
+#define immediate_read(name) \
+ ({ \
+ __typeof__(name##__immediate) value; \
+ switch (sizeof(value)) { \
+ case 1: \
+ asm(".section __immediate,\"a\",@progbits\n\t" \
+ PPC_LONG "%c1, ((0f)+3), 1\n\t" \
+ ".previous\n\t" \
+ "0:\n\t" \
+ "li %0,0\n\t" \
+ : "=r" (value) \
+ : "i" (&name##__immediate)); \
+ break; \
+ case 2: \
+ asm(".section __immediate,\"a\",@progbits\n\t" \
+ PPC_LONG "%c1, ((0f)+2), 2\n\t" \
+ ".previous\n\t" \
+ ".align 2\n\t" \
+ "0:\n\t" \
+ "li %0,0\n\t" \
+ : "=r" (value) \
+ : "i" (&name##__immediate)); \
+ break; \
+ default: \
+ value = name##__immediate; \
+ break; \
+ }; \
+ value; \
+ })
+
+extern int arch_immediate_update(const struct __immediate *immediate);
+extern void arch_immediate_update_early(const struct __immediate *immediate);
+
+#endif /* _ASM_POWERPC_IMMEDIATE_H */
Index: linux-2.6-lttng/arch/powerpc/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/arch/powerpc/kernel/Makefile 2007-11-13 13:49:11.000000000 -0500
+++ linux-2.6-lttng/arch/powerpc/kernel/Makefile 2007-11-13 13:53:28.000000000 -0500
@@ -91,3 +91,4 @@ obj-$(CONFIG_PPC64) += $(obj64-y)
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_PPC64) += entry_64.o
+obj-$(CONFIG_IMMEDIATE) += immediate.o
Index: linux-2.6-lttng/arch/powerpc/kernel/immediate.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/arch/powerpc/kernel/immediate.c 2007-11-13 13:53:28.000000000 -0500
@@ -0,0 +1,103 @@
+/*
+ * Powerpc optimized immediate values enabling/disabling.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ */
+
+#include <linux/module.h>
+#include <linux/immediate.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+#define LI_OPCODE_LEN 2
+
+/**
+ * arch_immediate_update - update one immediate value
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value. Must be called with immediate_mutex held.
+ */
+int arch_immediate_update(const struct __immediate *immediate)
+{
+#ifdef CONFIG_KPROBES
+ kprobe_opcode_t *insn;
+ /*
+ * Fail if a kprobe has been set on this instruction.
+ * (TODO: we could eventually do better and modify all the (possibly
+ * nested) kprobes for this site if kprobes had an API for this.
+ */
+ switch (immediate->size) {
+ case 1: /* The uint8_t points to the 3rd byte of the
+ * instruction */
+ insn = (void *)(immediate->immediate - 1 - LI_OPCODE_LEN);
+ break;
+ case 2: insn = (void *)(immediate->immediate - LI_OPCODE_LEN);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (unlikely(*insn == BREAKPOINT_INSTRUCTION)) {
+ printk(KERN_WARNING "Immediate value in conflict with kprobe. "
+ "Variable at %p, "
+ "instruction at %p, size %lu\n",
+ (void *)immediate->immediate,
+ (void *)immediate->var, immediate->size);
+ return -EBUSY;
+ }
+#endif
+
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t *)immediate->immediate
+ == *(uint8_t *)immediate->var)
+ return 0;
+ break;
+ case 2: if (*(uint16_t *)immediate->immediate
+ == *(uint16_t *)immediate->var)
+ return 0;
+ break;
+ default:return -EINVAL;
+ }
+ memcpy((void *)immediate->immediate, (void *)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ (unsigned long)immediate->immediate + immediate->size);
+ return 0;
+}
+
+/**
+ * arch_immediate_update_early - update one immediate value at boot time
+ * @immediate: pointer of type const struct __immediate to update
+ *
+ * Update one immediate value at boot time.
+ * We can use flush_icache_range, since the cpu identification has been done in
+ * the early_init stage.
+ */
+void arch_immediate_update_early(const struct __immediate *immediate)
+{
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t *)immediate->immediate
+ == *(uint8_t *)immediate->var)
+ return;
+ break;
+ case 2: if (*(uint16_t *)immediate->immediate
+ == *(uint16_t *)immediate->var)
+ return;
+ break;
+ default:return;
+ }
+ memcpy((void *)immediate->immediate, (void *)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ (unsigned long)immediate->immediate + immediate->size);
+}
Index: linux-2.6-lttng/arch/powerpc/Kconfig
===================================================================
--- linux-2.6-lttng.orig/arch/powerpc/Kconfig 2007-11-13 13:53:36.000000000 -0500
+++ linux-2.6-lttng/arch/powerpc/Kconfig 2007-11-13 13:53:58.000000000 -0500
@@ -169,6 +169,9 @@ config ARCH_SUPPORTS_OPROFILE
config ARCH_SUPPORTS_KPROBES
def_bool y
+config ARCH_SUPPORTS_IMMEDIATE
+ def_bool y
+
source "init/Kconfig"
source "arch/powerpc/platforms/Kconfig"
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
^ permalink raw reply [flat|nested] 36+ messages in thread
end of thread, other threads:[~2007-11-13 19:02 UTC | newest]
Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-09-06 20:02 [patch 0/8] Immediate Values for 2.6.23-rc4-mm1 Mathieu Desnoyers
2007-09-06 20:02 ` [patch 1/8] Immediate Values - Global Modules List and Module Mutex Mathieu Desnoyers
2007-09-08 7:28 ` Alexey Dobriyan
2007-09-10 23:53 ` Rusty Russell
2007-09-11 0:45 ` Mathieu Desnoyers
2007-09-11 5:18 ` Rusty Russell
2007-09-11 14:27 ` Mathieu Desnoyers
2007-09-13 5:47 ` Rusty Russell
2007-09-13 21:21 ` Mathieu Desnoyers
2007-09-13 23:15 ` Rusty Russell
2007-09-14 15:32 ` Mathieu Desnoyers
2007-09-17 22:54 ` Rusty Russell
2007-09-18 13:41 ` Mathieu Desnoyers
2007-09-20 12:29 ` Rusty Russell
2007-09-21 13:37 ` Mathieu Desnoyers
2007-09-22 7:15 ` Rusty Russell
2007-09-06 20:02 ` [patch 2/8] Immediate Values - Architecture Independent Code Mathieu Desnoyers
2007-09-06 20:02 ` [patch 3/8] Immediate Values - Kconfig menu in EMBEDDED Mathieu Desnoyers
2007-09-07 6:49 ` Andi Kleen
2007-09-07 12:46 ` Mathieu Desnoyers
2007-09-07 22:39 ` Andi Kleen
2007-09-11 20:22 ` Mathieu Desnoyers
2007-09-12 12:42 ` Andi Kleen
2007-09-06 20:02 ` [patch 4/8] Immediate Values - Move Kprobes i386 restore_interrupt to kdebug.h Mathieu Desnoyers
2007-09-07 10:31 ` Ananth N Mavinakayanahalli
2007-09-06 20:02 ` [patch 5/8] Immediate Values - i386 Optimization Mathieu Desnoyers
2007-09-06 20:02 ` [patch 6/8] Immediate Values - Powerpc Optimization Mathieu Desnoyers
2007-09-06 20:02 ` [patch 7/8] Immediate Values Powerpc Optimization Fix Mathieu Desnoyers
2007-09-06 20:02 ` [patch 8/8] Immediate Values - Documentation Mathieu Desnoyers
2007-09-06 21:20 ` Randy Dunlap
2007-09-07 12:23 ` Mathieu Desnoyers
2007-09-07 14:24 ` Randy Dunlap
-- strict thread matches above, loose matches on Subject: below --
2007-11-13 18:58 [patch 0/8] Immediate Values (now with merged x86 support) Mathieu Desnoyers
2007-11-13 18:58 ` [patch 6/8] Immediate Values - Powerpc Optimization Mathieu Desnoyers
2007-08-27 15:59 [patch 0/8] Immediate Values Mathieu Desnoyers
2007-08-27 15:59 ` [patch 6/8] Immediate Values - Powerpc Optimization Mathieu Desnoyers
2007-08-20 20:23 [patch 0/8] Immediate Values Mathieu Desnoyers
2007-08-20 20:24 ` [patch 6/8] Immediate Values - Powerpc Optimization Mathieu Desnoyers
2007-08-12 15:07 [patch 0/8] Immediate Values Mathieu Desnoyers
2007-08-12 15:07 ` [patch 6/8] Immediate Values - Powerpc Optimization Mathieu Desnoyers
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox