* [PATCH 1/7] x86, smp: Convert msr-smp.c to an smp library
2010-01-21 16:41 [PATCH -v2 0/7] x86, cacheinfo, amd: L3 Cache Index Disable fixes Borislav Petkov
@ 2010-01-21 16:41 ` Borislav Petkov
2010-01-21 18:08 ` H. Peter Anvin
2010-01-21 16:41 ` [PATCH 2/7] x86, smp: Adjust native_wbinvd for smp_call_function-helpers Borislav Petkov
` (5 subsequent siblings)
6 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2010-01-21 16:41 UTC (permalink / raw)
To: mingo, hpa, tglx; +Cc: andreas.herrmann3, x86, linux-kernel
From: Borislav Petkov <borislav.petkov@amd.com>
Put all smp-related helpers into lib/smp.c to use for the rest of the
kernel.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/lib/Makefile | 2 +-
arch/x86/lib/msr-smp.c | 204 ------------------------------------------------
arch/x86/lib/smp.c | 204 ++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 205 insertions(+), 205 deletions(-)
delete mode 100644 arch/x86/lib/msr-smp.c
create mode 100644 arch/x86/lib/smp.c
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index cffd754..48a5ced 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -14,7 +14,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c
clean-files := inat-tables.c
-obj-$(CONFIG_SMP) += msr-smp.o
+obj-$(CONFIG_SMP) += smp.o
lib-y := delay.o
lib-y += thunk_$(BITS).o
diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
deleted file mode 100644
index a6b1b86..0000000
--- a/arch/x86/lib/msr-smp.c
+++ /dev/null
@@ -1,204 +0,0 @@
-#include <linux/module.h>
-#include <linux/preempt.h>
-#include <linux/smp.h>
-#include <asm/msr.h>
-
-static void __rdmsr_on_cpu(void *info)
-{
- struct msr_info *rv = info;
- struct msr *reg;
- int this_cpu = raw_smp_processor_id();
-
- if (rv->msrs)
- reg = per_cpu_ptr(rv->msrs, this_cpu);
- else
- reg = &rv->reg;
-
- rdmsr(rv->msr_no, reg->l, reg->h);
-}
-
-static void __wrmsr_on_cpu(void *info)
-{
- struct msr_info *rv = info;
- struct msr *reg;
- int this_cpu = raw_smp_processor_id();
-
- if (rv->msrs)
- reg = per_cpu_ptr(rv->msrs, this_cpu);
- else
- reg = &rv->reg;
-
- wrmsr(rv->msr_no, reg->l, reg->h);
-}
-
-int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- int err;
- struct msr_info rv;
-
- memset(&rv, 0, sizeof(rv));
-
- rv.msr_no = msr_no;
- err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
- *l = rv.reg.l;
- *h = rv.reg.h;
-
- return err;
-}
-EXPORT_SYMBOL(rdmsr_on_cpu);
-
-int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- int err;
- struct msr_info rv;
-
- memset(&rv, 0, sizeof(rv));
-
- rv.msr_no = msr_no;
- rv.reg.l = l;
- rv.reg.h = h;
- err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
-
- return err;
-}
-EXPORT_SYMBOL(wrmsr_on_cpu);
-
-static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
- struct msr *msrs,
- void (*msr_func) (void *info))
-{
- struct msr_info rv;
- int this_cpu;
-
- memset(&rv, 0, sizeof(rv));
-
- rv.msrs = msrs;
- rv.msr_no = msr_no;
-
- this_cpu = get_cpu();
-
- if (cpumask_test_cpu(this_cpu, mask))
- msr_func(&rv);
-
- smp_call_function_many(mask, msr_func, &rv, 1);
- put_cpu();
-}
-
-/* rdmsr on a bunch of CPUs
- *
- * @mask: which CPUs
- * @msr_no: which MSR
- * @msrs: array of MSR values
- *
- */
-void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
-{
- __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu);
-}
-EXPORT_SYMBOL(rdmsr_on_cpus);
-
-/*
- * wrmsr on a bunch of CPUs
- *
- * @mask: which CPUs
- * @msr_no: which MSR
- * @msrs: array of MSR values
- *
- */
-void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
-{
- __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu);
-}
-EXPORT_SYMBOL(wrmsr_on_cpus);
-
-/* These "safe" variants are slower and should be used when the target MSR
- may not actually exist. */
-static void __rdmsr_safe_on_cpu(void *info)
-{
- struct msr_info *rv = info;
-
- rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
-}
-
-static void __wrmsr_safe_on_cpu(void *info)
-{
- struct msr_info *rv = info;
-
- rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h);
-}
-
-int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
-{
- int err;
- struct msr_info rv;
-
- memset(&rv, 0, sizeof(rv));
-
- rv.msr_no = msr_no;
- err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
- *l = rv.reg.l;
- *h = rv.reg.h;
-
- return err ? err : rv.err;
-}
-EXPORT_SYMBOL(rdmsr_safe_on_cpu);
-
-int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
-{
- int err;
- struct msr_info rv;
-
- memset(&rv, 0, sizeof(rv));
-
- rv.msr_no = msr_no;
- rv.reg.l = l;
- rv.reg.h = h;
- err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
-
- return err ? err : rv.err;
-}
-EXPORT_SYMBOL(wrmsr_safe_on_cpu);
-
-/*
- * These variants are significantly slower, but allows control over
- * the entire 32-bit GPR set.
- */
-static void __rdmsr_safe_regs_on_cpu(void *info)
-{
- struct msr_regs_info *rv = info;
-
- rv->err = rdmsr_safe_regs(rv->regs);
-}
-
-static void __wrmsr_safe_regs_on_cpu(void *info)
-{
- struct msr_regs_info *rv = info;
-
- rv->err = wrmsr_safe_regs(rv->regs);
-}
-
-int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
-{
- int err;
- struct msr_regs_info rv;
-
- rv.regs = regs;
- rv.err = -EIO;
- err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1);
-
- return err ? err : rv.err;
-}
-EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
-
-int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
-{
- int err;
- struct msr_regs_info rv;
-
- rv.regs = regs;
- rv.err = -EIO;
- err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1);
-
- return err ? err : rv.err;
-}
-EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);
diff --git a/arch/x86/lib/smp.c b/arch/x86/lib/smp.c
new file mode 100644
index 0000000..a6b1b86
--- /dev/null
+++ b/arch/x86/lib/smp.c
@@ -0,0 +1,204 @@
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/smp.h>
+#include <asm/msr.h>
+
+static void __rdmsr_on_cpu(void *info)
+{
+ struct msr_info *rv = info;
+ struct msr *reg;
+ int this_cpu = raw_smp_processor_id();
+
+ if (rv->msrs)
+ reg = per_cpu_ptr(rv->msrs, this_cpu);
+ else
+ reg = &rv->reg;
+
+ rdmsr(rv->msr_no, reg->l, reg->h);
+}
+
+static void __wrmsr_on_cpu(void *info)
+{
+ struct msr_info *rv = info;
+ struct msr *reg;
+ int this_cpu = raw_smp_processor_id();
+
+ if (rv->msrs)
+ reg = per_cpu_ptr(rv->msrs, this_cpu);
+ else
+ reg = &rv->reg;
+
+ wrmsr(rv->msr_no, reg->l, reg->h);
+}
+
+int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1);
+ *l = rv.reg.l;
+ *h = rv.reg.h;
+
+ return err;
+}
+EXPORT_SYMBOL(rdmsr_on_cpu);
+
+int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ rv.reg.l = l;
+ rv.reg.h = h;
+ err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1);
+
+ return err;
+}
+EXPORT_SYMBOL(wrmsr_on_cpu);
+
+static void __rwmsr_on_cpus(const struct cpumask *mask, u32 msr_no,
+ struct msr *msrs,
+ void (*msr_func) (void *info))
+{
+ struct msr_info rv;
+ int this_cpu;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msrs = msrs;
+ rv.msr_no = msr_no;
+
+ this_cpu = get_cpu();
+
+ if (cpumask_test_cpu(this_cpu, mask))
+ msr_func(&rv);
+
+ smp_call_function_many(mask, msr_func, &rv, 1);
+ put_cpu();
+}
+
+/* rdmsr on a bunch of CPUs
+ *
+ * @mask: which CPUs
+ * @msr_no: which MSR
+ * @msrs: array of MSR values
+ *
+ */
+void rdmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
+{
+ __rwmsr_on_cpus(mask, msr_no, msrs, __rdmsr_on_cpu);
+}
+EXPORT_SYMBOL(rdmsr_on_cpus);
+
+/*
+ * wrmsr on a bunch of CPUs
+ *
+ * @mask: which CPUs
+ * @msr_no: which MSR
+ * @msrs: array of MSR values
+ *
+ */
+void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
+{
+ __rwmsr_on_cpus(mask, msr_no, msrs, __wrmsr_on_cpu);
+}
+EXPORT_SYMBOL(wrmsr_on_cpus);
+
+/* These "safe" variants are slower and should be used when the target MSR
+ may not actually exist. */
+static void __rdmsr_safe_on_cpu(void *info)
+{
+ struct msr_info *rv = info;
+
+ rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
+}
+
+static void __wrmsr_safe_on_cpu(void *info)
+{
+ struct msr_info *rv = info;
+
+ rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h);
+}
+
+int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
+ *l = rv.reg.l;
+ *h = rv.reg.h;
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(rdmsr_safe_on_cpu);
+
+int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h)
+{
+ int err;
+ struct msr_info rv;
+
+ memset(&rv, 0, sizeof(rv));
+
+ rv.msr_no = msr_no;
+ rv.reg.l = l;
+ rv.reg.h = h;
+ err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1);
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(wrmsr_safe_on_cpu);
+
+/*
+ * These variants are significantly slower, but allows control over
+ * the entire 32-bit GPR set.
+ */
+static void __rdmsr_safe_regs_on_cpu(void *info)
+{
+ struct msr_regs_info *rv = info;
+
+ rv->err = rdmsr_safe_regs(rv->regs);
+}
+
+static void __wrmsr_safe_regs_on_cpu(void *info)
+{
+ struct msr_regs_info *rv = info;
+
+ rv->err = wrmsr_safe_regs(rv->regs);
+}
+
+int rdmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+{
+ int err;
+ struct msr_regs_info rv;
+
+ rv.regs = regs;
+ rv.err = -EIO;
+ err = smp_call_function_single(cpu, __rdmsr_safe_regs_on_cpu, &rv, 1);
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(rdmsr_safe_regs_on_cpu);
+
+int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
+{
+ int err;
+ struct msr_regs_info rv;
+
+ rv.regs = regs;
+ rv.err = -EIO;
+ err = smp_call_function_single(cpu, __wrmsr_safe_regs_on_cpu, &rv, 1);
+
+ return err ? err : rv.err;
+}
+EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);
--
1.6.6
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 2/7] x86, smp: Adjust native_wbinvd for smp_call_function-helpers
2010-01-21 16:41 [PATCH -v2 0/7] x86, cacheinfo, amd: L3 Cache Index Disable fixes Borislav Petkov
2010-01-21 16:41 ` [PATCH 1/7] x86, smp: Convert msr-smp.c to an smp library Borislav Petkov
@ 2010-01-21 16:41 ` Borislav Petkov
2010-01-21 18:07 ` H. Peter Anvin
2010-01-21 16:41 ` [PATCH 3/7] x86, smp: Add wbinvd_on_all_cpus helper Borislav Petkov
` (4 subsequent siblings)
6 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2010-01-21 16:41 UTC (permalink / raw)
To: mingo, hpa, tglx; +Cc: andreas.herrmann3, x86, linux-kernel
From: Borislav Petkov <borislav.petkov@amd.com>
Add a wbinvd_on_cpu() helper and fold the void * arg in the wbinvd macro
definition so that native_wbinvd can be used in the smp_call_function_*
helpers as a pointed-to function.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/include/asm/paravirt.h | 3 ++-
arch/x86/include/asm/paravirt_types.h | 2 +-
arch/x86/include/asm/smp.h | 3 +++
arch/x86/include/asm/system.h | 10 ++++++++--
arch/x86/lib/smp.c | 7 +++++++
5 files changed, 21 insertions(+), 4 deletions(-)
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index dd59a85..ee034a6 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -117,7 +117,8 @@ static inline void halt(void)
static inline void wbinvd(void)
{
- PVOP_VCALL0(pv_cpu_ops.wbinvd);
+ void *uninitialized_var(dummy_ptr);
+ PVOP_VCALL1(pv_cpu_ops.wbinvd, dummy_ptr);
}
#define get_kernel_rpl() (pv_info.kernel_rpl)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index b1e70d5..79730b8 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -136,7 +136,7 @@ struct pv_cpu_ops {
void (*set_iopl_mask)(unsigned mask);
- void (*wbinvd)(void);
+ void (*wbinvd)(void *);
void (*io_delay)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 1e79678..090146e 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -138,6 +138,7 @@ void play_dead_common(void);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
+void wbinvd_on_cpu(int cpu);
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
@@ -147,6 +148,8 @@ static inline int num_booting_cpus(void)
{
return cpumask_weight(cpu_callout_mask);
}
+#else /* !CONFIG_SMP */
+#define wbinvd_on_cpu(cpu) wbinvd()
#endif /* CONFIG_SMP */
extern unsigned disabled_cpus __cpuinitdata;
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index ecb544e..8c531ab 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -297,7 +297,7 @@ static inline void native_write_cr8(unsigned long val)
}
#endif
-static inline void native_wbinvd(void)
+static inline void native_wbinvd(void *dummy)
{
asm volatile("wbinvd": : :"memory");
}
@@ -314,7 +314,13 @@ static inline void native_wbinvd(void)
#define read_cr4() (native_read_cr4())
#define read_cr4_safe() (native_read_cr4_safe())
#define write_cr4(x) (native_write_cr4(x))
-#define wbinvd() (native_wbinvd())
+
+#define wbinvd() \
+ ({ \
+ void *uninitialized_var(dumb_ptr); \
+ native_wbinvd(dumb_ptr); \
+ })
+
#ifdef CONFIG_X86_64
#define read_cr8() (native_read_cr8())
#define write_cr8(x) (native_write_cr8(x))
diff --git a/arch/x86/lib/smp.c b/arch/x86/lib/smp.c
index a6b1b86..408c3b7 100644
--- a/arch/x86/lib/smp.c
+++ b/arch/x86/lib/smp.c
@@ -2,6 +2,7 @@
#include <linux/preempt.h>
#include <linux/smp.h>
#include <asm/msr.h>
+#include <asm/system.h>
static void __rdmsr_on_cpu(void *info)
{
@@ -202,3 +203,9 @@ int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 *regs)
return err ? err : rv.err;
}
EXPORT_SYMBOL(wrmsr_safe_regs_on_cpu);
+
+void wbinvd_on_cpu(int cpu)
+{
+ smp_call_function_single(cpu, native_wbinvd, NULL, 1);
+}
+EXPORT_SYMBOL(wbinvd_on_cpu);
--
1.6.6
^ permalink raw reply related [flat|nested] 12+ messages in thread* Re: [PATCH 2/7] x86, smp: Adjust native_wbinvd for smp_call_function-helpers
2010-01-21 16:41 ` [PATCH 2/7] x86, smp: Adjust native_wbinvd for smp_call_function-helpers Borislav Petkov
@ 2010-01-21 18:07 ` H. Peter Anvin
2010-01-21 18:36 ` Borislav Petkov
0 siblings, 1 reply; 12+ messages in thread
From: H. Peter Anvin @ 2010-01-21 18:07 UTC (permalink / raw)
To: Borislav Petkov; +Cc: mingo, tglx, andreas.herrmann3, x86, linux-kernel
On 01/21/2010 08:41 AM, Borislav Petkov wrote:
> From: Borislav Petkov <borislav.petkov@amd.com>
>
> Add a wbinvd_on_cpu() helper and fold the void * arg in the wbinvd macro
> definition so that native_wbinvd can be used in the smp_call_function_*
> helpers as a pointed-to function.
>
> Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
This patch is a lot of extra uglification just to avoid a simple jump
stub -- and even then only with PV enabled -- for an operation which is
then going to perform an IPI and WBINVD. That doesn't exactly seem like
the right tradeoff.
As such, I'm going to request that you just put a simple stub function
in lib/smp.c rather than this whole patchset (well, the smp.h bit
obviously needs to stay.)
-hpa
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 2/7] x86, smp: Adjust native_wbinvd for smp_call_function-helpers
2010-01-21 18:07 ` H. Peter Anvin
@ 2010-01-21 18:36 ` Borislav Petkov
0 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2010-01-21 18:36 UTC (permalink / raw)
To: H. Peter Anvin; +Cc: mingo, tglx, andreas.herrmann3, x86, linux-kernel
On Thu, Jan 21, 2010 at 10:07:51AM -0800, H. Peter Anvin wrote:
> On 01/21/2010 08:41 AM, Borislav Petkov wrote:
> > From: Borislav Petkov <borislav.petkov@amd.com>
> >
> > Add a wbinvd_on_cpu() helper and fold the void * arg in the wbinvd macro
> > definition so that native_wbinvd can be used in the smp_call_function_*
> > helpers as a pointed-to function.
> >
> > Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
>
> This patch is a lot of extra uglification just to avoid a simple jump
> stub -- and even then only with PV enabled -- for an operation which is
> then going to perform an IPI and WBINVD. That doesn't exactly seem like
> the right tradeoff.
>
> As such, I'm going to request that you just put a simple stub function
> in lib/smp.c rather than this whole patchset (well, the smp.h bit
> obviously needs to stay.)
..and I was wrapping the wrong way: wbinvd() was going to hide the void
* function argument while a simple stub in lib/smp.c could wrap around
wbinvd() which is much more cleaner. Thanks.
Will redo the patchset tomorrow.
--
Regards/Gruss,
Boris.
-
Advanced Micro Devices, Inc.
Operating Systems Research Center
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 3/7] x86, smp: Add wbinvd_on_all_cpus helper
2010-01-21 16:41 [PATCH -v2 0/7] x86, cacheinfo, amd: L3 Cache Index Disable fixes Borislav Petkov
2010-01-21 16:41 ` [PATCH 1/7] x86, smp: Convert msr-smp.c to an smp library Borislav Petkov
2010-01-21 16:41 ` [PATCH 2/7] x86, smp: Adjust native_wbinvd for smp_call_function-helpers Borislav Petkov
@ 2010-01-21 16:41 ` Borislav Petkov
2010-01-21 16:41 ` [PATCH 4/7] intel-agp: Switch to wbinvd_on_all_cpus Borislav Petkov
` (3 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2010-01-21 16:41 UTC (permalink / raw)
To: mingo, hpa, tglx; +Cc: andreas.herrmann3, x86, linux-kernel
From: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/include/asm/smp.h | 2 ++
arch/x86/lib/smp.c | 6 ++++++
2 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 090146e..3c8ed3a 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -139,6 +139,7 @@ void play_dead_common(void);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
void wbinvd_on_cpu(int cpu);
+int wbinvd_on_all_cpus(void);
void smp_store_cpu_info(int id);
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
@@ -150,6 +151,7 @@ static inline int num_booting_cpus(void)
}
#else /* !CONFIG_SMP */
#define wbinvd_on_cpu(cpu) wbinvd()
+#define wbinvd_on_all_cpus() wbinvd()
#endif /* CONFIG_SMP */
extern unsigned disabled_cpus __cpuinitdata;
diff --git a/arch/x86/lib/smp.c b/arch/x86/lib/smp.c
index 408c3b7..38f92b0 100644
--- a/arch/x86/lib/smp.c
+++ b/arch/x86/lib/smp.c
@@ -209,3 +209,9 @@ void wbinvd_on_cpu(int cpu)
smp_call_function_single(cpu, native_wbinvd, NULL, 1);
}
EXPORT_SYMBOL(wbinvd_on_cpu);
+
+int wbinvd_on_all_cpus(void)
+{
+ return on_each_cpu(native_wbinvd, NULL, 1);
+}
+EXPORT_SYMBOL(wbinvd_on_all_cpus);
--
1.6.6
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 4/7] intel-agp: Switch to wbinvd_on_all_cpus
2010-01-21 16:41 [PATCH -v2 0/7] x86, cacheinfo, amd: L3 Cache Index Disable fixes Borislav Petkov
` (2 preceding siblings ...)
2010-01-21 16:41 ` [PATCH 3/7] x86, smp: Add wbinvd_on_all_cpus helper Borislav Petkov
@ 2010-01-21 16:41 ` Borislav Petkov
2010-01-21 16:41 ` [PATCH 5/7] x86, cacheinfo: Fix disabling of L3 cache indices Borislav Petkov
` (2 subsequent siblings)
6 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2010-01-21 16:41 UTC (permalink / raw)
To: mingo, hpa, tglx; +Cc: andreas.herrmann3, x86, linux-kernel
From: Borislav Petkov <borislav.petkov@amd.com>
Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
drivers/char/agp/intel-agp.c | 8 +-------
1 files changed, 1 insertions(+), 7 deletions(-)
diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 3999a5f..bc453da 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -815,12 +815,6 @@ static void intel_i830_setup_flush(void)
intel_i830_fini_flush();
}
-static void
-do_wbinvd(void *null)
-{
- wbinvd();
-}
-
/* The chipset_flush interface needs to get data that has already been
* flushed out of the CPU all the way out to main memory, because the GPU
* doesn't snoop those buffers.
@@ -840,7 +834,7 @@ static void intel_i830_chipset_flush(struct agp_bridge_data *bridge)
if (cpu_has_clflush) {
clflush_cache_range(pg, 1024);
} else {
- if (on_each_cpu(do_wbinvd, NULL, 1) != 0)
+ if (wbinvd_on_all_cpus() != 0)
printk(KERN_ERR "Timed out waiting for cache flush.\n");
}
}
--
1.6.6
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 5/7] x86, cacheinfo: Fix disabling of L3 cache indices
2010-01-21 16:41 [PATCH -v2 0/7] x86, cacheinfo, amd: L3 Cache Index Disable fixes Borislav Petkov
` (3 preceding siblings ...)
2010-01-21 16:41 ` [PATCH 4/7] intel-agp: Switch to wbinvd_on_all_cpus Borislav Petkov
@ 2010-01-21 16:41 ` Borislav Petkov
2010-01-21 16:41 ` [PATCH 6/7] x86, cacheinfo: Add cache index disable sysfs attrs only to L3 caches Borislav Petkov
2010-01-21 16:41 ` [PATCH 7/7] x86, cacheinfo: Calculate L3 indexes Borislav Petkov
6 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2010-01-21 16:41 UTC (permalink / raw)
To: mingo, hpa, tglx; +Cc: andreas.herrmann3, x86, linux-kernel
From: Borislav Petkov <borislav.petkov@amd.com>
* Correct the masks used for writing the cache index disable indices.
* Do not turn off L3 scrubber - it is not necessary.
* Make sure wbinvd is executed on the same node where the L3 is.
* Check for out-of-bounds values written to the registers.
* Make show_cache_disable hex values unambiguous
* Check for Erratum #388
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/kernel/cpu/intel_cacheinfo.c | 30 +++++++++++++++++-------------
1 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index fc6c8ef..cef4fdd 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -18,6 +18,7 @@
#include <asm/processor.h>
#include <linux/smp.h>
#include <asm/k8.h>
+#include <asm/smp.h>
#define LVL_1_INST 1
#define LVL_1_DATA 2
@@ -299,8 +300,10 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
if (boot_cpu_data.x86 == 0x11)
return;
- /* see erratum #382 */
- if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8))
+ /* see errata #382 and #388 */
+ if ((boot_cpu_data.x86 == 0x10) &&
+ ((boot_cpu_data.x86_model < 0x9) ||
+ (boot_cpu_data.x86_mask < 0x1)))
return;
this_leaf->can_disable = 1;
@@ -726,12 +729,12 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
return -EINVAL;
pci_read_config_dword(dev, 0x1BC + index * 4, ®);
- return sprintf(buf, "%x\n", reg);
+ return sprintf(buf, "0x%08x\n", reg);
}
#define SHOW_CACHE_DISABLE(index) \
static ssize_t \
-show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
+show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
{ \
return show_cache_disable(this_leaf, buf, index); \
}
@@ -745,7 +748,9 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
int node = cpu_to_node(cpu);
struct pci_dev *dev = node_to_k8_nb_misc(node);
unsigned long val = 0;
- unsigned int scrubber = 0;
+
+#define SUBCACHE_MASK (3UL << 20)
+#define SUBCACHE_INDEX 0xfff
if (!this_leaf->can_disable)
return -EINVAL;
@@ -759,21 +764,20 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
if (strict_strtoul(buf, 10, &val) < 0)
return -EINVAL;
- val |= 0xc0000000;
-
- pci_read_config_dword(dev, 0x58, &scrubber);
- scrubber &= ~0x1f000000;
- pci_write_config_dword(dev, 0x58, scrubber);
+ /* do not allow writes outside of allowed bits */
+ if (val & ~(SUBCACHE_MASK | SUBCACHE_INDEX))
+ return -EINVAL;
- pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
- wbinvd();
+ val |= BIT(30);
pci_write_config_dword(dev, 0x1BC + index * 4, val);
+ wbinvd_on_cpu(cpu);
+ pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31));
return count;
}
#define STORE_CACHE_DISABLE(index) \
static ssize_t \
-store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
+store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
const char *buf, size_t count) \
{ \
return store_cache_disable(this_leaf, buf, count, index); \
--
1.6.6
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 6/7] x86, cacheinfo: Add cache index disable sysfs attrs only to L3 caches
2010-01-21 16:41 [PATCH -v2 0/7] x86, cacheinfo, amd: L3 Cache Index Disable fixes Borislav Petkov
` (4 preceding siblings ...)
2010-01-21 16:41 ` [PATCH 5/7] x86, cacheinfo: Fix disabling of L3 cache indices Borislav Petkov
@ 2010-01-21 16:41 ` Borislav Petkov
2010-01-21 16:41 ` [PATCH 7/7] x86, cacheinfo: Calculate L3 indexes Borislav Petkov
6 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2010-01-21 16:41 UTC (permalink / raw)
To: mingo, hpa, tglx; +Cc: andreas.herrmann3, x86, linux-kernel
From: Borislav Petkov <borislav.petkov@amd.com>
The cache_disable_[01] attribute in
/sys/devices/system/cpu/cpu?/cache/index[0-3]/
is enabled on all cache levels although only L3 supports it. Add it only
to the cache level that actually supports it.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/kernel/cpu/intel_cacheinfo.c | 35 ++++++++++++++++++++++++--------
1 files changed, 26 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index cef4fdd..0ce0e8d 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -810,16 +810,24 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);
+#define DEFAULT_SYSFS_CACHE_ATTRS \
+ &type.attr, \
+ &level.attr, \
+ &coherency_line_size.attr, \
+ &physical_line_partition.attr, \
+ &ways_of_associativity.attr, \
+ &number_of_sets.attr, \
+ &size.attr, \
+ &shared_cpu_map.attr, \
+ &shared_cpu_list.attr
+
static struct attribute *default_attrs[] = {
- &type.attr,
- &level.attr,
- &coherency_line_size.attr,
- &physical_line_partition.attr,
- &ways_of_associativity.attr,
- &number_of_sets.attr,
- &size.attr,
- &shared_cpu_map.attr,
- &shared_cpu_list.attr,
+ DEFAULT_SYSFS_CACHE_ATTRS,
+ NULL
+};
+
+static struct attribute *default_l3_attrs[] = {
+ DEFAULT_SYSFS_CACHE_ATTRS,
&cache_disable_0.attr,
&cache_disable_1.attr,
NULL
@@ -912,6 +920,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
unsigned int cpu = sys_dev->id;
unsigned long i, j;
struct _index_kobject *this_object;
+ struct _cpuid4_info *this_leaf;
int retval;
retval = cpuid4_cache_sysfs_init(cpu);
@@ -930,6 +939,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
this_object = INDEX_KOBJECT_PTR(cpu, i);
this_object->cpu = cpu;
this_object->index = i;
+
+ this_leaf = CPUID4_INFO_IDX(cpu, i);
+
+ if (this_leaf->can_disable)
+ ktype_cache.default_attrs = default_l3_attrs;
+ else
+ ktype_cache.default_attrs = default_attrs;
+
retval = kobject_init_and_add(&(this_object->kobj),
&ktype_cache,
per_cpu(ici_cache_kobject, cpu),
--
1.6.6
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH 7/7] x86, cacheinfo: Calculate L3 indexes
2010-01-21 16:41 [PATCH -v2 0/7] x86, cacheinfo, amd: L3 Cache Index Disable fixes Borislav Petkov
` (5 preceding siblings ...)
2010-01-21 16:41 ` [PATCH 6/7] x86, cacheinfo: Add cache index disable sysfs attrs only to L3 caches Borislav Petkov
@ 2010-01-21 16:41 ` Borislav Petkov
2010-01-21 17:57 ` [PATCH] x86, cacheinfo: Calculate L3 indices Borislav Petkov
6 siblings, 1 reply; 12+ messages in thread
From: Borislav Petkov @ 2010-01-21 16:41 UTC (permalink / raw)
To: mingo, hpa, tglx; +Cc: andreas.herrmann3, x86, linux-kernel
From: Borislav Petkov <borislav.petkov@amd.com>
We need to know the valid L3 indexes interval when disabling them over
/sysfs. Do that when the core is brought online and add boundary checks
to the sysfs .store attribute.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/kernel/cpu/intel_cacheinfo.c | 35 +++++++++++++++++++++++++++++---
1 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0ce0e8d..d231c38 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -151,7 +151,8 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
- unsigned long can_disable;
+ bool can_disable;
+ unsigned int l3_indices;
DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
};
@@ -161,7 +162,8 @@ struct _cpuid4_info_regs {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
- unsigned long can_disable;
+ bool can_disable;
+ unsigned int l3_indices;
};
unsigned short num_cache_leaves;
@@ -291,6 +293,29 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
(ebx->split.ways_of_associativity + 1) - 1;
}
+static unsigned int __cpuinit amd_calc_l3_indices(void)
+{
+ /*
+ * We're called over smp_call_function_single() and therefore
+ * are on the correct cpu.
+ */
+ int cpu = smp_processor_id();
+ int node = cpu_to_node(cpu);
+ struct pci_dev *dev = node_to_k8_nb_misc(node);
+ unsigned int sc0, sc1, sc2, sc3;
+ u32 val;
+
+ pci_read_config_dword(dev, 0x1C4, &val);
+
+ /* calculate subcache sizes */
+ sc0 = !(val & BIT(0));
+ sc1 = !(val & BIT(4));
+ sc2 = !(val & BIT(8)) + !(val & BIT(9));
+ sc3 = !(val & BIT(12)) + !(val & BIT(13));
+
+ return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
+}
+
static void __cpuinit
amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
{
@@ -306,7 +331,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
(boot_cpu_data.x86_mask < 0x1)))
return;
- this_leaf->can_disable = 1;
+ this_leaf->can_disable = true;
+ this_leaf->l3_indices = amd_calc_l3_indices();
}
static int
@@ -765,7 +791,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
return -EINVAL;
/* do not allow writes outside of allowed bits */
- if (val & ~(SUBCACHE_MASK | SUBCACHE_INDEX))
+ if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
+ ((val & SUBCACHE_INDEX) > this_leaf->l3_indices))
return -EINVAL;
val |= BIT(30);
--
1.6.6
^ permalink raw reply related [flat|nested] 12+ messages in thread* [PATCH] x86, cacheinfo: Calculate L3 indices
2010-01-21 16:41 ` [PATCH 7/7] x86, cacheinfo: Calculate L3 indexes Borislav Petkov
@ 2010-01-21 17:57 ` Borislav Petkov
0 siblings, 0 replies; 12+ messages in thread
From: Borislav Petkov @ 2010-01-21 17:57 UTC (permalink / raw)
To: mingo, hpa, tglx; +Cc: andreas.herrmann3, x86, linux-kernel
From: Borislav Petkov <borislav.petkov@amd.com>
We need to know the valid L3 indices interval when disabling them over
/sysfs. Do that when the core is brought online and add boundary checks
to the sysfs .store attribute.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
Fix indexes -> indices in the commit message.
arch/x86/kernel/cpu/intel_cacheinfo.c | 35 +++++++++++++++++++++++++++++---
1 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 0ce0e8d..d231c38 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -151,7 +151,8 @@ struct _cpuid4_info {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
- unsigned long can_disable;
+ bool can_disable;
+ unsigned int l3_indices;
DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
};
@@ -161,7 +162,8 @@ struct _cpuid4_info_regs {
union _cpuid4_leaf_ebx ebx;
union _cpuid4_leaf_ecx ecx;
unsigned long size;
- unsigned long can_disable;
+ bool can_disable;
+ unsigned int l3_indices;
};
unsigned short num_cache_leaves;
@@ -291,6 +293,29 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
(ebx->split.ways_of_associativity + 1) - 1;
}
+static unsigned int __cpuinit amd_calc_l3_indices(void)
+{
+ /*
+ * We're called over smp_call_function_single() and therefore
+ * are on the correct cpu.
+ */
+ int cpu = smp_processor_id();
+ int node = cpu_to_node(cpu);
+ struct pci_dev *dev = node_to_k8_nb_misc(node);
+ unsigned int sc0, sc1, sc2, sc3;
+ u32 val;
+
+ pci_read_config_dword(dev, 0x1C4, &val);
+
+ /* calculate subcache sizes */
+ sc0 = !(val & BIT(0));
+ sc1 = !(val & BIT(4));
+ sc2 = !(val & BIT(8)) + !(val & BIT(9));
+ sc3 = !(val & BIT(12)) + !(val & BIT(13));
+
+ return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
+}
+
static void __cpuinit
amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
{
@@ -306,7 +331,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
(boot_cpu_data.x86_mask < 0x1)))
return;
- this_leaf->can_disable = 1;
+ this_leaf->can_disable = true;
+ this_leaf->l3_indices = amd_calc_l3_indices();
}
static int
@@ -765,7 +791,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
return -EINVAL;
/* do not allow writes outside of allowed bits */
- if (val & ~(SUBCACHE_MASK | SUBCACHE_INDEX))
+ if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
+ ((val & SUBCACHE_INDEX) > this_leaf->l3_indices))
return -EINVAL;
val |= BIT(30);
--
1.6.6
--
Regards/Gruss,
Boris.
-
Advanced Micro Devices, Inc.
Operating Systems Research Center
^ permalink raw reply related [flat|nested] 12+ messages in thread