public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [GIT PULL] x86/cpu changes for v2.6.34
@ 2010-02-27 15:09 Ingo Molnar
  2010-02-27 17:10 ` Arjan van de Ven
  2010-02-28 20:09 ` Linus Torvalds
  0 siblings, 2 replies; 12+ messages in thread
From: Ingo Molnar @ 2010-02-27 15:09 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner, Borislav Petkov,
	Andrew Morton

Linus,

Please pull the latest x86-cpu-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git x86-cpu-for-linus


out-of-topic modifications in x86-cpu-for-linus:
------------------------------------------------
drivers/char/agp/intel-agp.c       # 48a719c: intel-agp: Switch to wbinvd_on_al

 Thanks,

	Ingo

------------------>
Borislav Petkov (7):
      x86, lib: Add wbinvd smp helpers
      intel-agp: Switch to wbinvd_on_all_cpus
      x86, cacheinfo: Fix disabling of L3 cache indices
      x86, cacheinfo: Add cache index disable sysfs attrs only to L3 caches
      x86, cacheinfo: Calculate L3 indices
      x86, cacheinfo: Remove NUMA dependency, fix for AMD Fam10h rev D1
      x86, cacheinfo: Enable L3 CID only on AMD

Joerg Roedel (1):
      x86, cpu: Print AMD virtualization features in /proc/cpuinfo


 arch/x86/include/asm/cpufeature.h          |    4 +
 arch/x86/include/asm/smp.h                 |    9 +
 arch/x86/kernel/cpu/addon_cpuid_features.c |    4 +
 arch/x86/kernel/cpu/intel_cacheinfo.c      |  250 +++++++++++++++++-----------
 arch/x86/lib/Makefile                      |    2 +-
 arch/x86/lib/cache-smp.c                   |   19 ++
 drivers/char/agp/intel-agp.c               |   15 +--
 7 files changed, 197 insertions(+), 106 deletions(-)
 create mode 100644 arch/x86/lib/cache-smp.c

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 637e1ec..0cd82d0 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,6 +168,10 @@
 #define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */
 #define X86_FEATURE_EPT         (8*32+ 3) /* Intel Extended Page Table */
 #define X86_FEATURE_VPID        (8*32+ 4) /* Intel Virtual Processor ID */
+#define X86_FEATURE_NPT		(8*32+5)  /* AMD Nested Page Table support */
+#define X86_FEATURE_LBRV	(8*32+6)  /* AMD LBR Virtualization support */
+#define X86_FEATURE_SVML	(8*32+7)  /* "svm_lock" AMD SVM locking MSR */
+#define X86_FEATURE_NRIPS	(8*32+8)  /* "nrip_save" AMD SVM next_rip save */
 
 #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
 
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1e79678..4cfc908 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -135,6 +135,8 @@ int native_cpu_disable(void);
 void native_cpu_die(unsigned int cpu);
 void native_play_dead(void);
 void play_dead_common(void);
+void wbinvd_on_cpu(int cpu);
+int wbinvd_on_all_cpus(void);
 
 void native_send_call_func_ipi(const struct cpumask *mask);
 void native_send_call_func_single_ipi(int cpu);
@@ -147,6 +149,13 @@ static inline int num_booting_cpus(void)
 {
 	return cpumask_weight(cpu_callout_mask);
 }
+#else /* !CONFIG_SMP */
+#define wbinvd_on_cpu(cpu)     wbinvd()
+static inline int wbinvd_on_all_cpus(void)
+{
+	wbinvd();
+	return 0;
+}
 #endif /* CONFIG_SMP */
 
 extern unsigned disabled_cpus __cpuinitdata;
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 468489b..97ad79c 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -32,6 +32,10 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
 	static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
 		{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
 		{ X86_FEATURE_ARAT, CR_EAX, 2, 0x00000006 },
+		{ X86_FEATURE_NPT,   CR_EDX, 0, 0x8000000a },
+		{ X86_FEATURE_LBRV,  CR_EDX, 1, 0x8000000a },
+		{ X86_FEATURE_SVML,  CR_EDX, 2, 0x8000000a },
+		{ X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a },
 		{ 0, 0, 0, 0 }
 	};
 
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fc6c8ef..d440123 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -18,6 +18,7 @@
 #include <asm/processor.h>
 #include <linux/smp.h>
 #include <asm/k8.h>
+#include <asm/smp.h>
 
 #define LVL_1_INST	1
 #define LVL_1_DATA	2
@@ -150,7 +151,8 @@ struct _cpuid4_info {
 	union _cpuid4_leaf_ebx ebx;
 	union _cpuid4_leaf_ecx ecx;
 	unsigned long size;
-	unsigned long can_disable;
+	bool can_disable;
+	unsigned int l3_indices;
 	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
 };
 
@@ -160,7 +162,8 @@ struct _cpuid4_info_regs {
 	union _cpuid4_leaf_ebx ebx;
 	union _cpuid4_leaf_ecx ecx;
 	unsigned long size;
-	unsigned long can_disable;
+	bool can_disable;
+	unsigned int l3_indices;
 };
 
 unsigned short			num_cache_leaves;
@@ -290,6 +293,36 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
 		(ebx->split.ways_of_associativity + 1) - 1;
 }
 
+struct _cache_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct _cpuid4_info *, char *);
+	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
+};
+
+#ifdef CONFIG_CPU_SUP_AMD
+static unsigned int __cpuinit amd_calc_l3_indices(void)
+{
+	/*
+	 * We're called over smp_call_function_single() and therefore
+	 * are on the correct cpu.
+	 */
+	int cpu = smp_processor_id();
+	int node = cpu_to_node(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned int sc0, sc1, sc2, sc3;
+	u32 val = 0;
+
+	pci_read_config_dword(dev, 0x1C4, &val);
+
+	/* calculate subcache sizes */
+	sc0 = !(val & BIT(0));
+	sc1 = !(val & BIT(4));
+	sc2 = !(val & BIT(8))  + !(val & BIT(9));
+	sc3 = !(val & BIT(12)) + !(val & BIT(13));
+
+	return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
+}
+
 static void __cpuinit
 amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
 {
@@ -299,12 +332,103 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
 	if (boot_cpu_data.x86 == 0x11)
 		return;
 
-	/* see erratum #382 */
-	if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8))
+	/* see errata #382 and #388 */
+	if ((boot_cpu_data.x86 == 0x10) &&
+	    ((boot_cpu_data.x86_model < 0x8) ||
+	     (boot_cpu_data.x86_mask  < 0x1)))
 		return;
 
-	this_leaf->can_disable = 1;
+	this_leaf->can_disable = true;
+	this_leaf->l3_indices  = amd_calc_l3_indices();
+}
+
+static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
+				  unsigned int index)
+{
+	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	int node = amd_get_nb_id(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned int reg = 0;
+
+	if (!this_leaf->can_disable)
+		return -EINVAL;
+
+	if (!dev)
+		return -EINVAL;
+
+	pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
+	return sprintf(buf, "0x%08x\n", reg);
+}
+
+#define SHOW_CACHE_DISABLE(index)					\
+static ssize_t								\
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf)	\
+{									\
+	return show_cache_disable(this_leaf, buf, index);		\
+}
+SHOW_CACHE_DISABLE(0)
+SHOW_CACHE_DISABLE(1)
+
+static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
+	const char *buf, size_t count, unsigned int index)
+{
+	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
+	int node = amd_get_nb_id(cpu);
+	struct pci_dev *dev = node_to_k8_nb_misc(node);
+	unsigned long val = 0;
+
+#define SUBCACHE_MASK	(3UL << 20)
+#define SUBCACHE_INDEX	0xfff
+
+	if (!this_leaf->can_disable)
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!dev)
+		return -EINVAL;
+
+	if (strict_strtoul(buf, 10, &val) < 0)
+		return -EINVAL;
+
+	/* do not allow writes outside of allowed bits */
+	if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
+	    ((val & SUBCACHE_INDEX) > this_leaf->l3_indices))
+		return -EINVAL;
+
+	val |= BIT(30);
+	pci_write_config_dword(dev, 0x1BC + index * 4, val);
+	/*
+	 * We need to WBINVD on a core on the node containing the L3 cache which
+	 * indices we disable therefore a simple wbinvd() is not sufficient.
+	 */
+	wbinvd_on_cpu(cpu);
+	pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31));
+	return count;
+}
+
+#define STORE_CACHE_DISABLE(index)					\
+static ssize_t								\
+store_cache_disable_##index(struct _cpuid4_info *this_leaf,		\
+			    const char *buf, size_t count)		\
+{									\
+	return store_cache_disable(this_leaf, buf, count, index);	\
 }
+STORE_CACHE_DISABLE(0)
+STORE_CACHE_DISABLE(1)
+
+static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
+		show_cache_disable_0, store_cache_disable_0);
+static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
+		show_cache_disable_1, store_cache_disable_1);
+
+#else	/* CONFIG_CPU_SUP_AMD */
+static void __cpuinit
+amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
+{
+};
+#endif /* CONFIG_CPU_SUP_AMD */
 
 static int
 __cpuinit cpuid4_cache_lookup_regs(int index,
@@ -711,82 +835,6 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
 #define to_object(k)	container_of(k, struct _index_kobject, kobj)
 #define to_attr(a)	container_of(a, struct _cache_attr, attr)
 
-static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
-				  unsigned int index)
-{
-	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
-	int node = cpu_to_node(cpu);
-	struct pci_dev *dev = node_to_k8_nb_misc(node);
-	unsigned int reg = 0;
-
-	if (!this_leaf->can_disable)
-		return -EINVAL;
-
-	if (!dev)
-		return -EINVAL;
-
-	pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
-	return sprintf(buf, "%x\n", reg);
-}
-
-#define SHOW_CACHE_DISABLE(index)					\
-static ssize_t								\
-show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf)  	\
-{									\
-	return show_cache_disable(this_leaf, buf, index);		\
-}
-SHOW_CACHE_DISABLE(0)
-SHOW_CACHE_DISABLE(1)
-
-static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
-	const char *buf, size_t count, unsigned int index)
-{
-	int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
-	int node = cpu_to_node(cpu);
-	struct pci_dev *dev = node_to_k8_nb_misc(node);
-	unsigned long val = 0;
-	unsigned int scrubber = 0;
-
-	if (!this_leaf->can_disable)
-		return -EINVAL;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
-	if (!dev)
-		return -EINVAL;
-
-	if (strict_strtoul(buf, 10, &val) < 0)
-		return -EINVAL;
-
-	val |= 0xc0000000;
-
-	pci_read_config_dword(dev, 0x58, &scrubber);
-	scrubber &= ~0x1f000000;
-	pci_write_config_dword(dev, 0x58, scrubber);
-
-	pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
-	wbinvd();
-	pci_write_config_dword(dev, 0x1BC + index * 4, val);
-	return count;
-}
-
-#define STORE_CACHE_DISABLE(index)					\
-static ssize_t								\
-store_cache_disable_##index(struct _cpuid4_info *this_leaf,	     	\
-			    const char *buf, size_t count)		\
-{									\
-	return store_cache_disable(this_leaf, buf, count, index);	\
-}
-STORE_CACHE_DISABLE(0)
-STORE_CACHE_DISABLE(1)
-
-struct _cache_attr {
-	struct attribute attr;
-	ssize_t (*show)(struct _cpuid4_info *, char *);
-	ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
-};
-
 #define define_one_ro(_name) \
 static struct _cache_attr _name = \
 	__ATTR(_name, 0444, show_##_name, NULL)
@@ -801,23 +849,28 @@ define_one_ro(size);
 define_one_ro(shared_cpu_map);
 define_one_ro(shared_cpu_list);
 
-static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
-		show_cache_disable_0, store_cache_disable_0);
-static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
-		show_cache_disable_1, store_cache_disable_1);
+#define DEFAULT_SYSFS_CACHE_ATTRS	\
+	&type.attr,			\
+	&level.attr,			\
+	&coherency_line_size.attr,	\
+	&physical_line_partition.attr,	\
+	&ways_of_associativity.attr,	\
+	&number_of_sets.attr,		\
+	&size.attr,			\
+	&shared_cpu_map.attr,		\
+	&shared_cpu_list.attr
 
 static struct attribute *default_attrs[] = {
-	&type.attr,
-	&level.attr,
-	&coherency_line_size.attr,
-	&physical_line_partition.attr,
-	&ways_of_associativity.attr,
-	&number_of_sets.attr,
-	&size.attr,
-	&shared_cpu_map.attr,
-	&shared_cpu_list.attr,
+	DEFAULT_SYSFS_CACHE_ATTRS,
+	NULL
+};
+
+static struct attribute *default_l3_attrs[] = {
+	DEFAULT_SYSFS_CACHE_ATTRS,
+#ifdef CONFIG_CPU_SUP_AMD
 	&cache_disable_0.attr,
 	&cache_disable_1.attr,
+#endif
 	NULL
 };
 
@@ -908,6 +961,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 	unsigned int cpu = sys_dev->id;
 	unsigned long i, j;
 	struct _index_kobject *this_object;
+	struct _cpuid4_info   *this_leaf;
 	int retval;
 
 	retval = cpuid4_cache_sysfs_init(cpu);
@@ -926,6 +980,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
 		this_object = INDEX_KOBJECT_PTR(cpu, i);
 		this_object->cpu = cpu;
 		this_object->index = i;
+
+		this_leaf = CPUID4_INFO_IDX(cpu, i);
+
+		if (this_leaf->can_disable)
+			ktype_cache.default_attrs = default_l3_attrs;
+		else
+			ktype_cache.default_attrs = default_attrs;
+
 		retval = kobject_init_and_add(&(this_object->kobj),
 					      &ktype_cache,
 					      per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index cffd754..d85e0e4 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c
 
 clean-files := inat-tables.c
 
-obj-$(CONFIG_SMP) += msr-smp.o
+obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
 
 lib-y := delay.o
 lib-y += thunk_$(BITS).o
diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c
new file mode 100644
index 0000000..a3c6688
--- /dev/null
+++ b/arch/x86/lib/cache-smp.c
@@ -0,0 +1,19 @@
+#include <linux/smp.h>
+#include <linux/module.h>
+
+static void __wbinvd(void *dummy)
+{
+	wbinvd();
+}
+
+void wbinvd_on_cpu(int cpu)
+{
+	smp_call_function_single(cpu, __wbinvd, NULL, 1);
+}
+EXPORT_SYMBOL(wbinvd_on_cpu);
+
+int wbinvd_on_all_cpus(void)
+{
+	return on_each_cpu(__wbinvd, NULL, 1);
+}
+EXPORT_SYMBOL(wbinvd_on_all_cpus);
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 3999a5f..8a713f1 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <linux/pagemap.h>
 #include <linux/agp_backend.h>
+#include <asm/smp.h>
 #include "agp.h"
 
 /*
@@ -815,12 +816,6 @@ static void intel_i830_setup_flush(void)
 		intel_i830_fini_flush();
 }
 
-static void
-do_wbinvd(void *null)
-{
-	wbinvd();
-}
-
 /* The chipset_flush interface needs to get data that has already been
  * flushed out of the CPU all the way out to main memory, because the GPU
  * doesn't snoop those buffers.
@@ -837,12 +832,10 @@ static void intel_i830_chipset_flush(struct agp_bridge_data *bridge)
 
 	memset(pg, 0, 1024);
 
-	if (cpu_has_clflush) {
+	if (cpu_has_clflush)
 		clflush_cache_range(pg, 1024);
-	} else {
-		if (on_each_cpu(do_wbinvd, NULL, 1) != 0)
-			printk(KERN_ERR "Timed out waiting for cache flush.\n");
-	}
+	else if (wbinvd_on_all_cpus() != 0)
+		printk(KERN_ERR "Timed out waiting for cache flush.\n");
 }
 
 /* The intel i830 automatically initializes the agp aperture during POST.

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-02-27 15:09 [GIT PULL] x86/cpu changes for v2.6.34 Ingo Molnar
@ 2010-02-27 17:10 ` Arjan van de Ven
  2010-02-27 20:03   ` H. Peter Anvin
  2010-02-28 20:09 ` Linus Torvalds
  1 sibling, 1 reply; 12+ messages in thread
From: Arjan van de Ven @ 2010-02-27 17:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, linux-kernel, H. Peter Anvin, Thomas Gleixner,
	Borislav Petkov, Andrew Morton

On Sat, 27 Feb 2010 16:09:42 +0100
Ingo Molnar <mingo@elte.hu> wrote:

> +int wbinvd_on_all_cpus(void)
> +{
> +	return on_each_cpu(__wbinvd, NULL, 1);
> +}

does this make sense at all?

doesn't cache coherency on x86 already guarantee this?


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-02-27 17:10 ` Arjan van de Ven
@ 2010-02-27 20:03   ` H. Peter Anvin
  0 siblings, 0 replies; 12+ messages in thread
From: H. Peter Anvin @ 2010-02-27 20:03 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Ingo Molnar, Linus Torvalds, linux-kernel, Thomas Gleixner,
	Borislav Petkov, Andrew Morton

On 02/27/2010 09:10 AM, Arjan van de Ven wrote:
> On Sat, 27 Feb 2010 16:09:42 +0100
> Ingo Molnar <mingo@elte.hu> wrote:
> 
>> +int wbinvd_on_all_cpus(void)
>> +{
>> +	return on_each_cpu(__wbinvd, NULL, 1);
>> +}
> 
> does this make sense at all?
> 
> doesn't cache coherency on x86 already guarantee this?
> 

No, WBINVD (unlike CLFLUSH) is local to one CPU.

	-hpa

-- 
H. Peter Anvin, Intel Open Source Technology Center
I work for Intel.  I don't speak on their behalf.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-02-27 15:09 [GIT PULL] x86/cpu changes for v2.6.34 Ingo Molnar
  2010-02-27 17:10 ` Arjan van de Ven
@ 2010-02-28 20:09 ` Linus Torvalds
  2010-02-28 20:45   ` Linus Torvalds
  1 sibling, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2010-02-28 20:09 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner, Borislav Petkov,
	Andrew Morton


I haven't bisected this, but something slowed down in bootup on my machine 
recently.

See the timestamps:

	[    0.000000] Linux version 2.6.33-01832-g30ff056 
	...
	[    0.010066] Enabled Interrupt-remapping
	[    0.010120] Setting APIC routing to physical flat
	[    0.010180] DRHD: handling fault status reg 2
	[    0.010582] ..TIMER: vector=0x30 apic1=0 pin1=2 apic2=-1 pin2=-1
	[    0.049955] CPU0: Genuine Intel(R) CPU             000  @ 3.20GHz stepping 04
	[    0.157195] Booting Node   0, Processors  #1
	[    0.245179] CPU 1 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
	[    0.265332]  #2
	[    0.353185] CPU 2 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
	[    0.373328]  #3
	[    2.193277] CPU 3 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
	[    2.213379]  #4
	[    2.301283] CPU 4 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	[    2.321391]  #5
	[    2.417287] CPU 5 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	[    2.437356]  #6
	[    2.525293] CPU 6 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	[    2.545354]  #7
	[    2.633298] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	[    2.653423] Brought up 8 CPUs
	[    2.653571] Total of 8 processors activated (51201.44 BogoMIPS).

what happened there for almost 2 seconds in between CPU#3 and CPU#4?

It wasn't very fast before either, but it was way better:

	[    0.050298] CPU0: Genuine Intel(R) CPU             000  @ 3.20GHz stepping 04
	[    0.156725] Booting Node   0, Processors  #1
	[    0.244410] CPU 1 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
	[    0.264458]  #2
	[    0.352078] CPU 2 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
	[    0.372147]  #3
	[    0.459746] CPU 3 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
	[    0.479838]  #4
	[    0.567415] CPU 4 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	[    0.587446]  #5
	[    0.683057] CPU 5 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	[    0.703081]  #6
	[    0.790724] CPU 6 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	[    0.810748]  #7
	[    0.898393] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	[    0.918412] Brought up 8 CPUs
	[    0.918562] Total of 8 processors activated (51203.34 BogoMIPS).

what is it that takes so long to bring those CPU's up?

		Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-02-28 20:09 ` Linus Torvalds
@ 2010-02-28 20:45   ` Linus Torvalds
  2010-03-01  8:00     ` Ingo Molnar
  0 siblings, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2010-02-28 20:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner, Borislav Petkov,
	Andrew Morton



On Sun, 28 Feb 2010, Linus Torvalds wrote:
> 
> I haven't bisected this, but something slowed down in bootup on my machine 
> recently.

Hmm. I take that back. It's not consistent, and it's not recent after all. 

It comes and goes:

	[torvalds@nehalem linux]$ grep "CPU 7 MCA" /var/log/messages-* /var/log/messages | cut -d: -f5-
	 [    0.898396] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898400] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    1.596240] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898394] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    1.600229] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898395] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.901211] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    2.633298] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898393] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.901210] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898395] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898393] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898393] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898402] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.901213] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898392] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898395] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    1.601467] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898401] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898395] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
	 [    0.898397] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8

note how it's pretty consistently at about the 0.89s mark, but then 
there's a _couple_ of times when it's taken rather longer to boot. But the 
delay is always in that CPU bringup phase, because doing the same grep for 
"CPU 0 MCA" gives consistently low numbers (0.0005s).

			Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-02-28 20:45   ` Linus Torvalds
@ 2010-03-01  8:00     ` Ingo Molnar
  2010-03-01 13:17       ` Frederic Weisbecker
  0 siblings, 1 reply; 12+ messages in thread
From: Ingo Molnar @ 2010-03-01  8:00 UTC (permalink / raw)
  To: Linus Torvalds, Steven Rostedt, Fr??d??ric Weisbecker,
	Thomas Gleixner
  Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner, Borislav Petkov,
	Andrew Morton


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Sun, 28 Feb 2010, Linus Torvalds wrote:
> > 
> > I haven't bisected this, but something slowed down in bootup on my machine 
> > recently.
> 
> Hmm. I take that back. It's not consistent, and it's not recent after all. 
> 
> It comes and goes:
> 
> 	[torvalds@nehalem linux]$ grep "CPU 7 MCA" /var/log/messages-* /var/log/messages | cut -d: -f5-
> 	 [    0.898396] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898400] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    1.596240] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898394] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    1.600229] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898395] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.901211] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    2.633298] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898393] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.901210] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898395] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898393] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898393] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898402] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.901213] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898392] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898395] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    1.601467] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898401] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898395] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 	 [    0.898397] CPU 7 MCA banks SHD:2 SHD:3 SHD:5 SHD:6 SHD:8
> 
> note how it's pretty consistently at about the 0.89s mark, but then there's 
> a _couple_ of times when it's taken rather longer to boot. But the delay is 
> always in that CPU bringup phase, because doing the same grep for "CPU 0 
> MCA" gives consistently low numbers (0.0005s).

Weird. It seems to be around multiples of .8: 0.8, 1.6, 2.4, with some extra 
overhead.

Almost as if some calibration routine or some other busy-loop misses the train 
occasionally.

The way i'd go about debugging this is to narrow down the approximate place 
the slowdown happens, then enable CONFIG_FUNCTION_TRACER (and disable 
CONFIG_DYNAMIC_FTRACE=y, to not have to deal with the dynamic patching 
aspects), and do a single-shot tracing session of only that section, on only 
one CPU:

	if (smp_processor_id() == 7)
		ftrace_enabled = 1;

	... bootup sequence ...

	if (smp_processor_id() == 7)
		ftrace_enabled = 0;

And recover the resulting trace from /debug/tracing/trace - it should have the reason
in it plain and simple.

( Unfortunately i'm not 100% sure that setting ftrace_enabled to 1 is enough. 
  I asked for a simple ad-hoc enable/disable function tracing mechanism _ages_ 
  ago - Steve, Frederic, what happened to that? ftrace_start()/stop() does not 
  seem to allow that. )

Or you could sprinkle the code with printk's, and see where the overhead 
concentrates into. (But printks ca change timings - etc. So can the function 
tracer as well ...)

	Ingo


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-03-01  8:00     ` Ingo Molnar
@ 2010-03-01 13:17       ` Frederic Weisbecker
  2010-03-01 16:47         ` Linus Torvalds
  2010-03-01 19:29         ` Steven Rostedt
  0 siblings, 2 replies; 12+ messages in thread
From: Frederic Weisbecker @ 2010-03-01 13:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Steven Rostedt, Thomas Gleixner, linux-kernel,
	H. Peter Anvin, Borislav Petkov, Andrew Morton

On Mon, Mar 01, 2010 at 09:00:58AM +0100, Ingo Molnar wrote:
> Weird. It seems to be around multiples of .8: 0.8, 1.6, 2.4, with some extra 
> overhead.
> 
> Almost as if some calibration routine or some other busy-loop misses the train 
> occasionally.
> 
> The way i'd go about debugging this is to narrow down the approximate place 
> the slowdown happens, then enable CONFIG_FUNCTION_TRACER (and disable 
> CONFIG_DYNAMIC_FTRACE=y, to not have to deal with the dynamic patching 
> aspects), and do a single-shot tracing session of only that section, on only 
> one CPU:
> 
> 	if (smp_processor_id() == 7)
> 		ftrace_enabled = 1;
> 
> 	... bootup sequence ...
> 
> 	if (smp_processor_id() == 7)
> 		ftrace_enabled = 0;
> 
> And recover the resulting trace from /debug/tracing/trace - it should have the reason
> in it plain and simple.
> 
> ( Unfortunately i'm not 100% sure that setting ftrace_enabled to 1 is enough. 
>   I asked for a simple ad-hoc enable/disable function tracing mechanism _ages_ 
>   ago - Steve, Frederic, what happened to that? ftrace_start()/stop() does not 
>   seem to allow that. )



I don't remember such request. But that would be useful indeed.
We could simply pair the setting of an early tracer with tracing
disabled and then manually delimit the places to trace with
tracing_on/tracing_off().

Whatever.

For now what you can do is setting the function_graph tracer
on bootup:

	ftrace=function_graph

and call ftrace_graph_stop() in the place you want the trace
to finish (you could use ftrace_graph_filter= to delimit
the function tracing window, but that won't work without
dynamic tracing, neither with __init functions).

So, after the boot you can look at /debug/tracing/per_cpu/cpu7/trace
and the end of the trace should contain what you want.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-03-01 13:17       ` Frederic Weisbecker
@ 2010-03-01 16:47         ` Linus Torvalds
  2010-03-01 19:42           ` Steven Rostedt
  2010-03-01 19:29         ` Steven Rostedt
  1 sibling, 1 reply; 12+ messages in thread
From: Linus Torvalds @ 2010-03-01 16:47 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Steven Rostedt, Thomas Gleixner, linux-kernel,
	H. Peter Anvin, Borislav Petkov, Andrew Morton



On Mon, 1 Mar 2010, Frederic Weisbecker wrote:

> On Mon, Mar 01, 2010 at 09:00:58AM +0100, Ingo Molnar wrote:
> > 
> > 	if (smp_processor_id() == 7)
> > 		ftrace_enabled = 1;
> > 
> > 	... bootup sequence ...
> > 
> > 	if (smp_processor_id() == 7)
> > 		ftrace_enabled = 0;

> So, after the boot you can look at /debug/tracing/per_cpu/cpu7/trace
> and the end of the trace should contain what you want.

Both of you seemed to miss the fact that it's not cpu7 that is 
particularly slow. See the original email from me in this thread: the jump 
was at some random point:

        [    0.245179] CPU 1 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
        [    0.265332]  #2
        [    0.353185] CPU 2 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
        [    0.373328]  #3
        [    2.193277] CPU 3 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
        [    2.213379]  #4

and the reason I grepped for "CPU 7" was that it's the _last_ CPU on this 
machine, so what I was grepping for was basically "how long did it take to 
bring up all CPU's".

So that particular really bad case apparently happened for CPU#3, but the 
two other slow cases happened for CPU#4.

Also, it seems to happen only about every fifth boot or so. Suggestions 
for something simple that can trace things like that?

		Linus

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-03-01 13:17       ` Frederic Weisbecker
  2010-03-01 16:47         ` Linus Torvalds
@ 2010-03-01 19:29         ` Steven Rostedt
  1 sibling, 0 replies; 12+ messages in thread
From: Steven Rostedt @ 2010-03-01 19:29 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Linus Torvalds, Thomas Gleixner, linux-kernel,
	H. Peter Anvin, Borislav Petkov, Andrew Morton

On Mon, 2010-03-01 at 14:17 +0100, Frederic Weisbecker wrote:
> On Mon, Mar 01, 2010 at 09:00:58AM +0100, Ingo Molnar wrote:
> > Weird. It seems to be around multiples of .8: 0.8, 1.6, 2.4, with some extra 
> > overhead.
> > 
> > Almost as if some calibration routine or some other busy-loop misses the train 
> > occasionally.
> > 
> > The way i'd go about debugging this is to narrow down the approximate place 
> > the slowdown happens, then enable CONFIG_FUNCTION_TRACER (and disable 
> > CONFIG_DYNAMIC_FTRACE=y, to not have to deal with the dynamic patching 
> > aspects), and do a single-shot tracing session of only that section, on only 
> > one CPU:
> > 
> > 	if (smp_processor_id() == 7)
> > 		ftrace_enabled = 1;
> > 
> > 	... bootup sequence ...
> > 
> > 	if (smp_processor_id() == 7)
> > 		ftrace_enabled = 0;
> > 
> > And recover the resulting trace from /debug/tracing/trace - it should have the reason
> > in it plain and simple.
> > 
> > ( Unfortunately i'm not 100% sure that setting ftrace_enabled to 1 is enough. 
> >   I asked for a simple ad-hoc enable/disable function tracing mechanism _ages_ 
> >   ago - Steve, Frederic, what happened to that? ftrace_start()/stop() does not 
> >   seem to allow that. )
> 

Setting ftrace_enabled = 0 should stop the function tracer, but may not
stop the function graph tracer.

> 
> 
> I don't remember such request. But that would be useful indeed.
> We could simply pair the setting of an early tracer with tracing
> disabled and then manually delimit the places to trace with
> tracing_on/tracing_off().

It's best to use tracing_off() and tracing_on() for such things.

> 
> Whatever.
> 
> For now what you can do is setting the function_graph tracer
> on bootup:
> 
> 	ftrace=function_graph
> 
> and call ftrace_graph_stop() in the place you want the trace

tracing_off() is the best API for this. Although you still have the
overhead of the tracer. But you can just
 echo nop > /debug/tracing/current_tracer
to remove the overhead after bootup.

-- Steve

> to finish (you could use ftrace_graph_filter= to delimit
> the function tracing window, but that won't work without
> dynamic tracing, neither with __init functions).
> 
> So, after the boot you can look at /debug/tracing/per_cpu/cpu7/trace
> and the end of the trace should contain what you want.
> 



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-03-01 16:47         ` Linus Torvalds
@ 2010-03-01 19:42           ` Steven Rostedt
  2010-03-01 22:23             ` Steven Rostedt
  2010-03-01 22:24             ` H. Peter Anvin
  0 siblings, 2 replies; 12+ messages in thread
From: Steven Rostedt @ 2010-03-01 19:42 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Frederic Weisbecker, Ingo Molnar, Thomas Gleixner, linux-kernel,
	H. Peter Anvin, Borislav Petkov, Andrew Morton

On Mon, 2010-03-01 at 08:47 -0800, Linus Torvalds wrote:

> Both of you seemed to miss the fact that it's not cpu7 that is 
> particularly slow. See the original email from me in this thread: the jump 
> was at some random point:
> 
>         [    0.245179] CPU 1 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
>         [    0.265332]  #2
>         [    0.353185] CPU 2 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
>         [    0.373328]  #3
>         [    2.193277] CPU 3 MCA banks CMCI:2 CMCI:3 CMCI:5 SHD:6 SHD:8
>         [    2.213379]  #4
> 
> and the reason I grepped for "CPU 7" was that it's the _last_ CPU on this 
> machine, so what I was grepping for was basically "how long did it take to 
> bring up all CPU's".
> 
> So that particular really bad case apparently happened for CPU#3, but the 
> two other slow cases happened for CPU#4.
> 
> Also, it seems to happen only about every fifth boot or so. Suggestions 
> for something simple that can trace things like that?

As Frederic has said you can use 'ftrace=function_graph' on the kernel
command line. It will be initialized in early_initcall (which I believe
is before CPUs are set up. Then add a tracing_off() after the trouble
code. You can make the trace buffers bigger with the kernel command
line:

	trace_buf_size=10000000

The above will make the trace buffer 10Meg per CPU. Unlike the
"buffer_size_kb" file, this number is in bytes, even though it will
round to the nearest page. (I probably should make this into kb, and
rename it to trace_buf_size_kb, and deprecate trace_buf_size).

Then you can cat out /debug/tracing/trace, and search for large
latencies in the timestamps.

-- Steve




^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-03-01 19:42           ` Steven Rostedt
@ 2010-03-01 22:23             ` Steven Rostedt
  2010-03-01 22:24             ` H. Peter Anvin
  1 sibling, 0 replies; 12+ messages in thread
From: Steven Rostedt @ 2010-03-01 22:23 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Frederic Weisbecker, Ingo Molnar, Thomas Gleixner, linux-kernel,
	H. Peter Anvin, Borislav Petkov, Andrew Morton

On Mon, 2010-03-01 at 14:42 -0500, Steven Rostedt wrote:

> As Frederic has said you can use 'ftrace=function_graph' on the kernel
> command line. It will be initialized in early_initcall (which I believe
> is before CPUs are set up. Then add a tracing_off() after the trouble
> code. You can make the trace buffers bigger with the kernel command
> line:
> 
> 	trace_buf_size=10000000
> 
> The above will make the trace buffer 10Meg per CPU. Unlike the
> "buffer_size_kb" file, this number is in bytes, even though it will
> round to the nearest page. (I probably should make this into kb, and
> rename it to trace_buf_size_kb, and deprecate trace_buf_size).
> 
> Then you can cat out /debug/tracing/trace, and search for large
> latencies in the timestamps.

I just tried the above and it doesn't work. The ring buffer gets
allocated with the early_initcall(), so trace_printk()'s will work. But
the function and function graph tracers don't get registered until the
device_initcall().

If you are still interested, this patch will allow you to run the
function graph tracer before smp_init(). You still need to add
"ftrace=function_graph" on the kernel command line.

It's a hack, but I tried it out and it worked.

-- Steve



diff --git a/init/main.c b/init/main.c
index 4cb47a1..b334663 100644
--- a/init/main.c
+++ b/init/main.c
@@ -868,8 +868,15 @@ static int __init kernel_init(void * unused)
 	do_pre_smp_initcalls();
 	start_boot_trace();
 
+	{
+		int init_graph_trace(void);
+		init_graph_trace();
+	}
+	trace_printk("start\n");
 	smp_init();
 	sched_init_smp();
+	trace_printk("end\n");
+	tracing_off();
 
 	do_basic_setup();
 
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index aaf580c..f18cad8 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1214,11 +1214,11 @@ static struct tracer graph_trace __read_mostly = {
 #endif
 };
 
-static __init int init_graph_trace(void)
+__init int init_graph_trace(void)
 {
 	max_bytes_for_cpu = snprintf(NULL, 0, "%d", nr_cpu_ids - 1);
 
 	return register_tracer(&graph_trace);
 }
 
-device_initcall(init_graph_trace);
+//device_initcall(init_graph_trace);



^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [GIT PULL] x86/cpu changes for v2.6.34
  2010-03-01 19:42           ` Steven Rostedt
  2010-03-01 22:23             ` Steven Rostedt
@ 2010-03-01 22:24             ` H. Peter Anvin
  1 sibling, 0 replies; 12+ messages in thread
From: H. Peter Anvin @ 2010-03-01 22:24 UTC (permalink / raw)
  To: rostedt
  Cc: Linus Torvalds, Frederic Weisbecker, Ingo Molnar, Thomas Gleixner,
	linux-kernel, Borislav Petkov, Andrew Morton

On 03/01/2010 11:42 AM, Steven Rostedt wrote:
> 
> As Frederic has said you can use 'ftrace=function_graph' on the kernel
> command line. It will be initialized in early_initcall (which I believe
> is before CPUs are set up. Then add a tracing_off() after the trouble
> code. You can make the trace buffers bigger with the kernel command
> line:
> 
> 	trace_buf_size=10000000
> 
> The above will make the trace buffer 10Meg per CPU. Unlike the
> "buffer_size_kb" file, this number is in bytes, even though it will
> round to the nearest page. (I probably should make this into kb, and
> rename it to trace_buf_size_kb, and deprecate trace_buf_size).
> 

Memory sizes specified on the kernel command line should generally be in
units of bytes, but accepting suffixes.

	-hpa

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-03-01 22:26 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-02-27 15:09 [GIT PULL] x86/cpu changes for v2.6.34 Ingo Molnar
2010-02-27 17:10 ` Arjan van de Ven
2010-02-27 20:03   ` H. Peter Anvin
2010-02-28 20:09 ` Linus Torvalds
2010-02-28 20:45   ` Linus Torvalds
2010-03-01  8:00     ` Ingo Molnar
2010-03-01 13:17       ` Frederic Weisbecker
2010-03-01 16:47         ` Linus Torvalds
2010-03-01 19:42           ` Steven Rostedt
2010-03-01 22:23             ` Steven Rostedt
2010-03-01 22:24             ` H. Peter Anvin
2010-03-01 19:29         ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox