* [PATCH 2/3] powerpc/perf: add 2 additional performance monitor counters for e6500 core
From: Lijun Pan @ 2013-05-29 22:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Lijun.Pan
In-Reply-To: <1369865562-29525-1-git-send-email-Lijun.Pan@freescale.com>
There are 6 counters in e6500 core instead of 4 in e500 core.
Signed-off-by: Lijun Pan <Lijun.Pan@freescale.com>
---
arch/powerpc/include/asm/reg_fsl_emb.h | 12 ++++++++++++
arch/powerpc/kernel/cputable.c | 2 +-
arch/powerpc/oprofile/op_model_fsl_emb.c | 30 ++++++++++++++++++++++++++++++
arch/powerpc/perf/core-fsl-emb.c | 24 ++++++++++++++++++++++++
4 files changed, 67 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h
index 1cf8ab0..c51d52e 100644
--- a/arch/powerpc/include/asm/reg_fsl_emb.h
+++ b/arch/powerpc/include/asm/reg_fsl_emb.h
@@ -19,10 +19,14 @@
#define PMRN_PMC1 0x011 /* Performance Monitor Counter 1 */
#define PMRN_PMC2 0x012 /* Performance Monitor Counter 2 */
#define PMRN_PMC3 0x013 /* Performance Monitor Counter 3 */
+#define PMRN_PMC4 0x014 /* Performance Monitor Counter 4 */
+#define PMRN_PMC5 0x015 /* Performance Monitor Counter 5 */
#define PMRN_PMLCA0 0x090 /* PM Local Control A0 */
#define PMRN_PMLCA1 0x091 /* PM Local Control A1 */
#define PMRN_PMLCA2 0x092 /* PM Local Control A2 */
#define PMRN_PMLCA3 0x093 /* PM Local Control A3 */
+#define PMRN_PMLCA4 0x094 /* PM Local Control A4 */
+#define PMRN_PMLCA5 0x095 /* PM Local Control A5 */
#define PMLCA_FC 0x80000000 /* Freeze Counter */
#define PMLCA_FCS 0x40000000 /* Freeze in Supervisor */
@@ -38,6 +42,8 @@
#define PMRN_PMLCB1 0x111 /* PM Local Control B1 */
#define PMRN_PMLCB2 0x112 /* PM Local Control B2 */
#define PMRN_PMLCB3 0x113 /* PM Local Control B3 */
+#define PMRN_PMLCB4 0x114 /* PM Local Control B4 */
+#define PMRN_PMLCB5 0x115 /* PM Local Control B5 */
#define PMLCB_THRESHMUL_MASK 0x0700 /* Threshold Multiple Field */
#define PMLCB_THRESHMUL_SHIFT 8
@@ -57,14 +63,20 @@
#define PMRN_UPMC1 0x001 /* User Performance Monitor Counter 1 */
#define PMRN_UPMC2 0x002 /* User Performance Monitor Counter 2 */
#define PMRN_UPMC3 0x003 /* User Performance Monitor Counter 3 */
+#define PMRN_UPMC4 0x004 /* User Performance Monitor Counter 4 */
+#define PMRN_UPMC5 0x005 /* User Performance Monitor Counter 5 */
#define PMRN_UPMLCA0 0x080 /* User PM Local Control A0 */
#define PMRN_UPMLCA1 0x081 /* User PM Local Control A1 */
#define PMRN_UPMLCA2 0x082 /* User PM Local Control A2 */
#define PMRN_UPMLCA3 0x083 /* User PM Local Control A3 */
+#define PMRN_UPMLCA4 0x084 /* User PM Local Control A4 */
+#define PMRN_UPMLCA5 0x085 /* User PM Local Control A5 */
#define PMRN_UPMLCB0 0x100 /* User PM Local Control B0 */
#define PMRN_UPMLCB1 0x101 /* User PM Local Control B1 */
#define PMRN_UPMLCB2 0x102 /* User PM Local Control B2 */
#define PMRN_UPMLCB3 0x103 /* User PM Local Control B3 */
+#define PMRN_UPMLCB4 0x104 /* User PM Local Control B4 */
+#define PMRN_UPMLCB5 0x105 /* User PM Local Control B5 */
#define PMRN_UPMGC0 0x180 /* User PM Global Control 0 */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index cc39139..2d4eee2 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2073,7 +2073,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
MMU_FTR_USE_TLBILX,
.icache_bsize = 64,
.dcache_bsize = 64,
- .num_pmcs = 4,
+ .num_pmcs = 6,
.oprofile_cpu_type = "ppc/e6500",
.oprofile_type = PPC_OPROFILE_FSL_EMB,
.cpu_setup = __setup_cpu_e6500,
diff --git a/arch/powerpc/oprofile/op_model_fsl_emb.c b/arch/powerpc/oprofile/op_model_fsl_emb.c
index ccc1daa..2a82d3e 100644
--- a/arch/powerpc/oprofile/op_model_fsl_emb.c
+++ b/arch/powerpc/oprofile/op_model_fsl_emb.c
@@ -46,6 +46,12 @@ static inline u32 get_pmlca(int ctr)
case 3:
pmlca = mfpmr(PMRN_PMLCA3);
break;
+ case 4:
+ pmlca = mfpmr(PMRN_PMLCA4);
+ break;
+ case 5:
+ pmlca = mfpmr(PMRN_PMLCA5);
+ break;
default:
panic("Bad ctr number\n");
}
@@ -68,6 +74,12 @@ static inline void set_pmlca(int ctr, u32 pmlca)
case 3:
mtpmr(PMRN_PMLCA3, pmlca);
break;
+ case 4:
+ mtpmr(PMRN_PMLCA4, pmlca);
+ break;
+ case 5:
+ mtpmr(PMRN_PMLCA5, pmlca);
+ break;
default:
panic("Bad ctr number\n");
}
@@ -84,6 +96,10 @@ static inline unsigned int ctr_read(unsigned int i)
return mfpmr(PMRN_PMC2);
case 3:
return mfpmr(PMRN_PMC3);
+ case 4:
+ return mfpmr(PMRN_PMC4);
+ case 5:
+ return mfpmr(PMRN_PMC5);
default:
return 0;
}
@@ -104,6 +120,12 @@ static inline void ctr_write(unsigned int i, unsigned int val)
case 3:
mtpmr(PMRN_PMC3, val);
break;
+ case 4:
+ mtpmr(PMRN_PMC4, val);
+ break;
+ case 5:
+ mtpmr(PMRN_PMC5, val);
+ break;
default:
break;
}
@@ -133,6 +155,14 @@ static void init_pmc_stop(int ctr)
mtpmr(PMRN_PMLCA3, pmlca);
mtpmr(PMRN_PMLCB3, pmlcb);
break;
+ case 4:
+ mtpmr(PMRN_PMLCA4, pmlca);
+ mtpmr(PMRN_PMLCB4, pmlcb);
+ break;
+ case 5:
+ mtpmr(PMRN_PMLCA5, pmlca);
+ mtpmr(PMRN_PMLCB5, pmlcb);
+ break;
default:
panic("Bad ctr number!\n");
}
diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c
index 106c533..a81e666 100644
--- a/arch/powerpc/perf/core-fsl-emb.c
+++ b/arch/powerpc/perf/core-fsl-emb.c
@@ -70,6 +70,12 @@ static unsigned long read_pmc(int idx)
case 3:
val = mfpmr(PMRN_PMC3);
break;
+ case 4:
+ val = mfpmr(PMRN_PMC4);
+ break;
+ case 5:
+ val = mfpmr(PMRN_PMC5);
+ break;
default:
printk(KERN_ERR "oops trying to read PMC%d\n", idx);
val = 0;
@@ -95,6 +101,12 @@ static void write_pmc(int idx, unsigned long val)
case 3:
mtpmr(PMRN_PMC3, val);
break;
+ case 4:
+ mtpmr(PMRN_PMC4, val);
+ break;
+ case 5:
+ mtpmr(PMRN_PMC5, val);
+ break;
default:
printk(KERN_ERR "oops trying to write PMC%d\n", idx);
}
@@ -120,6 +132,12 @@ static void write_pmlca(int idx, unsigned long val)
case 3:
mtpmr(PMRN_PMLCA3, val);
break;
+ case 4:
+ mtpmr(PMRN_PMLCA4, val);
+ break;
+ case 5:
+ mtpmr(PMRN_PMLCA5, val);
+ break;
default:
printk(KERN_ERR "oops trying to write PMLCA%d\n", idx);
}
@@ -145,6 +163,12 @@ static void write_pmlcb(int idx, unsigned long val)
case 3:
mtpmr(PMRN_PMLCB3, val);
break;
+ case 4:
+ mtpmr(PMRN_PMLCB4, val);
+ break;
+ case 5:
+ mtpmr(PMRN_PMLCB5, val);
+ break;
default:
printk(KERN_ERR "oops trying to write PMLCB%d\n", idx);
}
--
1.7.9.7
^ permalink raw reply related
* [PATCH 3/3] powerpc/perf: Add e6500 PMU driver
From: Lijun Pan @ 2013-05-29 22:12 UTC (permalink / raw)
To: linuxppc-dev; +Cc: Lijun.Pan, Poonam Aggrwal, Priyanka Jain
In-Reply-To: <1369865562-29525-1-git-send-email-Lijun.Pan@freescale.com>
e6500 core performance monitors has the following features:
- 6 performance monitor counters
- 512 events supported
- no threshold events
e6500 PMU has more specific events (Data L1 cache misses, Instruction L1
cache misses, etc ) than e500 PMU (which only had Data L1 cache reloads,
etc). Where available, the more specific events have been used which will
produce slightly different results than e500 PMU equivalents.
Based on work done by Priyanka Jain
Signed-off-by: Lijun Pan <Lijun.Pan@freescale.com>
Signed-off-by: Priyanka Jain <Priyanka.Jain@freescale.com>
Signed-off-by: Poonam Aggrwal <Poonam.Aggrwal@freescale.com>
---
arch/powerpc/include/asm/reg_fsl_emb.h | 4 +-
arch/powerpc/perf/Makefile | 2 +-
arch/powerpc/perf/e6500-pmu.c | 120 ++++++++++++++++++++++++++++++++
3 files changed, 124 insertions(+), 2 deletions(-)
create mode 100644 arch/powerpc/perf/e6500-pmu.c
diff --git a/arch/powerpc/include/asm/reg_fsl_emb.h b/arch/powerpc/include/asm/reg_fsl_emb.h
index c51d52e..0e3ddf5 100644
--- a/arch/powerpc/include/asm/reg_fsl_emb.h
+++ b/arch/powerpc/include/asm/reg_fsl_emb.h
@@ -34,8 +34,10 @@
#define PMLCA_FCM1 0x10000000 /* Freeze when PMM==1 */
#define PMLCA_FCM0 0x08000000 /* Freeze when PMM==0 */
#define PMLCA_CE 0x04000000 /* Condition Enable */
+#define PMLCA_FGCS1 0x00000002 /* Freeze in guest state */
+#define PMLCA_FGCS0 0x00000001 /* Freeze in hypervisor state */
-#define PMLCA_EVENT_MASK 0x00ff0000 /* Event field */
+#define PMLCA_EVENT_MASK 0x01ff0000 /* Event field */
#define PMLCA_EVENT_SHIFT 16
#define PMRN_PMLCB0 0x110 /* PM Local Control B0 */
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index af3fac2..06dd8d5 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,7 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
-obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o
+obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
diff --git a/arch/powerpc/perf/e6500-pmu.c b/arch/powerpc/perf/e6500-pmu.c
new file mode 100644
index 0000000..e38ed1f
--- /dev/null
+++ b/arch/powerpc/perf/e6500-pmu.c
@@ -0,0 +1,120 @@
+/*
+ * Performance counter support for e6500 family processors.
+ *
+ * Based on e500-pmu.c
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/string.h>
+#include <linux/perf_event.h>
+#include <asm/reg.h>
+#include <asm/cputable.h>
+
+/*
+ * Map of generic hardware event types to hardware events
+ * Zero if unsupported
+ */
+static int e6500_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = 1,
+ [PERF_COUNT_HW_INSTRUCTIONS] = 2,
+ [PERF_COUNT_HW_CACHE_MISSES] = 221,
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12,
+ [PERF_COUNT_HW_BRANCH_MISSES] = 15,
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int e6500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [C(L1D)] = {
+ /*RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 27, 222 },
+ [C(OP_WRITE)] = { 28, 223 },
+ [C(OP_PREFETCH)] = { 29, 0 },
+ },
+ [C(L1I)] = {
+ /*RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 2, 254 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { 37, 0 },
+ },
+ /*
+ * Assuming LL means L2, it's not a good match for this model.
+ * It does not have separate read/write events (but it does have
+ * separate instruction/data events).
+ */
+ [C(LL)] = {
+ /*RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 0, 0 },
+ [C(OP_WRITE)] = { 0, 0 },
+ [C(OP_PREFETCH)] = { 0, 0 },
+ },
+ /*
+ * There are data/instruction MMU misses, but that's a miss on
+ * the chip's internal level-one TLB which is probably not
+ * what the user wants. Instead, unified level-two TLB misses
+ * are reported here.
+ */
+ [C(DTLB)] = {
+ /*RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 26, 66 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(BPU)] = {
+ /*RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { 12, 15 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+ [C(NODE)] = {
+ /* RESULT_ACCESS RESULT_MISS */
+ [C(OP_READ)] = { -1, -1 },
+ [C(OP_WRITE)] = { -1, -1 },
+ [C(OP_PREFETCH)] = { -1, -1 },
+ },
+};
+
+static int num_events = 512;
+
+/* Upper half of event id is PMLCb, for threshold events */
+static u64 e6500_xlate_event(u64 event_id)
+{
+ u32 event_low = (u32)event_id;
+ if (event_low >= num_events ||
+ (event_id & (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)))
+ return 0;
+
+ return FSL_EMB_EVENT_VALID;
+}
+
+static struct fsl_emb_pmu e6500_pmu = {
+ .name = "e6500 family",
+ .n_counter = 6,
+ .n_restricted = 0,
+ .xlate_event = e6500_xlate_event,
+ .n_generic = ARRAY_SIZE(e6500_generic_events),
+ .generic_events = e6500_generic_events,
+ .cache_events = &e6500_cache_events,
+};
+
+static int init_e6500_pmu(void)
+{
+ if (!cur_cpu_spec->oprofile_cpu_type ||
+ strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e6500"))
+ return -ENODEV;
+
+ return register_fsl_emb_pmu(&e6500_pmu);
+}
+
+early_initcall(init_e6500_pmu);
--
1.7.9.7
^ permalink raw reply related
* Re: [PATCH 3/4] KVM: PPC: Add support for IOMMU in-kernel handling
From: Alexey Kardashevskiy @ 2013-05-29 23:10 UTC (permalink / raw)
To: Scott Wood
Cc: kvm, Alexander Graf, kvm-ppc, linux-kernel, Paul Mackerras,
linuxppc-dev, David Gibson
In-Reply-To: <1369857949.18630.42@snotra>
On 05/30/2013 06:05 AM, Scott Wood wrote:
> On 05/28/2013 07:12:32 PM, Alexey Kardashevskiy wrote:
>> On 05/29/2013 09:35 AM, Scott Wood wrote:
>> > On 05/28/2013 06:30:40 PM, Alexey Kardashevskiy wrote:
>> >> >> >>> @@ -939,6 +940,9 @@ struct kvm_s390_ucas_mapping {
>> >> >> >>> #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct
>> >> >> >>> kvm_device_attr)
>> >> >> >>> #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct
>> >> >> >>> kvm_device_attr)
>> >> >> >>>
>> >> >> >>> +/* ioctl for SPAPR TCE IOMMU */
>> >> >> >>> +#define KVM_CREATE_SPAPR_TCE_IOMMU _IOW(KVMIO, 0xe4, struct
>> >> >> >>> kvm_create_spapr_tce_iommu)
>> >> >> >>
>> >> >> >> Shouldn't this go under the vm ioctl section?
>> >> >>
>> >> >>
>> >> >> The KVM_CREATE_SPAPR_TCE_IOMMU ioctl (the version for emulated
>> >> devices) is
>> >> >> in this section so I decided to keep them together. Wrong?
>> >> >
>> >> > You decided to keep KVM_CREATE_SPAPR_TCE_IOMMU together with
>> >> > KVM_CREATE_SPAPR_TCE_IOMMU?
>> >>
>> >> Yes.
>> >
>> > Sigh. That's the same thing repeated. There's only one IOCTL.
>> Nothing is
>> > being "kept together".
>>
>> Sorry, I meant this ioctl - KVM_CREATE_SPAPR_TCE.
>
> But you didn't put it in the same section as KVM_CREATE_SPAPR_TCE. 0xe0
> begins a different section.
It is not really obvious that there are sections as no comment defines
those :) But yes, makes sense to move it up a bit and change the code to 0xad.
--
Alexey
^ permalink raw reply
* Re: [PATCH 3/4] KVM: PPC: Add support for IOMMU in-kernel handling
From: Scott Wood @ 2013-05-29 23:14 UTC (permalink / raw)
To: Alexey Kardashevskiy
Cc: kvm, Alexander Graf, kvm-ppc, linux-kernel, Paul Mackerras,
linuxppc-dev, David Gibson
In-Reply-To: <51A68AE9.6070709@ozlabs.ru>
On 05/29/2013 06:10:33 PM, Alexey Kardashevskiy wrote:
> On 05/30/2013 06:05 AM, Scott Wood wrote:
> > On 05/28/2013 07:12:32 PM, Alexey Kardashevskiy wrote:
> >> On 05/29/2013 09:35 AM, Scott Wood wrote:
> >> > On 05/28/2013 06:30:40 PM, Alexey Kardashevskiy wrote:
> >> >> >> >>> @@ -939,6 +940,9 @@ struct kvm_s390_ucas_mapping {
> >> >> >> >>> #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, =20
> struct
> >> >> >> >>> kvm_device_attr)
> >> >> >> >>> #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, =20
> struct
> >> >> >> >>> kvm_device_attr)
> >> >> >> >>>
> >> >> >> >>> +/* ioctl for SPAPR TCE IOMMU */
> >> >> >> >>> +#define KVM_CREATE_SPAPR_TCE_IOMMU _IOW(KVMIO, 0xe4, =20
> struct
> >> >> >> >>> kvm_create_spapr_tce_iommu)
> >> >> >> >>
> >> >> >> >> Shouldn't this go under the vm ioctl section?
> >> >> >>
> >> >> >>
> >> >> >> The KVM_CREATE_SPAPR_TCE_IOMMU ioctl (the version for =20
> emulated
> >> >> devices) is
> >> >> >> in this section so I decided to keep them together. Wrong?
> >> >> >
> >> >> > You decided to keep KVM_CREATE_SPAPR_TCE_IOMMU together with
> >> >> > KVM_CREATE_SPAPR_TCE_IOMMU?
> >> >>
> >> >> Yes.
> >> >
> >> > Sigh. That's the same thing repeated. There's only one IOCTL.
> >> Nothing is
> >> > being "kept together".
> >>
> >> Sorry, I meant this ioctl - KVM_CREATE_SPAPR_TCE.
> >
> > But you didn't put it in the same section as KVM_CREATE_SPAPR_TCE. =20
> 0xe0
> > begins a different section.
>=20
> It is not really obvious that there are sections as no comment defines
> those :)
There is a comment /* ioctls for fds returned by KVM_CREATE_DEVICE */
Putting KVM_CREATE_DEVICE in there was mainly to avoid dealing with the =20
ioctl number conflict mess in the vm-ioctl section, but at least that =20
one is related to the device control API. :-)
> But yes, makes sense to move it up a bit and change the code to 0xad.
0xad is KVM_KVMCLOCK_CTRL
-Scott=
^ permalink raw reply
* Re: [PATCH 3/4] KVM: PPC: Add support for IOMMU in-kernel handling
From: Alexey Kardashevskiy @ 2013-05-29 23:29 UTC (permalink / raw)
To: Scott Wood
Cc: kvm, Alexander Graf, kvm-ppc, linux-kernel, Paul Mackerras,
linuxppc-dev, David Gibson
In-Reply-To: <1369869272.18630.47@snotra>
On 05/30/2013 09:14 AM, Scott Wood wrote:
> On 05/29/2013 06:10:33 PM, Alexey Kardashevskiy wrote:
>> On 05/30/2013 06:05 AM, Scott Wood wrote:
>> > On 05/28/2013 07:12:32 PM, Alexey Kardashevskiy wrote:
>> >> On 05/29/2013 09:35 AM, Scott Wood wrote:
>> >> > On 05/28/2013 06:30:40 PM, Alexey Kardashevskiy wrote:
>> >> >> >> >>> @@ -939,6 +940,9 @@ struct kvm_s390_ucas_mapping {
>> >> >> >> >>> #define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct
>> >> >> >> >>> kvm_device_attr)
>> >> >> >> >>> #define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct
>> >> >> >> >>> kvm_device_attr)
>> >> >> >> >>>
>> >> >> >> >>> +/* ioctl for SPAPR TCE IOMMU */
>> >> >> >> >>> +#define KVM_CREATE_SPAPR_TCE_IOMMU _IOW(KVMIO, 0xe4, struct
>> >> >> >> >>> kvm_create_spapr_tce_iommu)
>> >> >> >> >>
>> >> >> >> >> Shouldn't this go under the vm ioctl section?
>> >> >> >>
>> >> >> >>
>> >> >> >> The KVM_CREATE_SPAPR_TCE_IOMMU ioctl (the version for emulated
>> >> >> devices) is
>> >> >> >> in this section so I decided to keep them together. Wrong?
>> >> >> >
>> >> >> > You decided to keep KVM_CREATE_SPAPR_TCE_IOMMU together with
>> >> >> > KVM_CREATE_SPAPR_TCE_IOMMU?
>> >> >>
>> >> >> Yes.
>> >> >
>> >> > Sigh. That's the same thing repeated. There's only one IOCTL.
>> >> Nothing is
>> >> > being "kept together".
>> >>
>> >> Sorry, I meant this ioctl - KVM_CREATE_SPAPR_TCE.
>> >
>> > But you didn't put it in the same section as KVM_CREATE_SPAPR_TCE. 0xe0
>> > begins a different section.
>>
>> It is not really obvious that there are sections as no comment defines
>> those :)
>
> There is a comment /* ioctls for fds returned by KVM_CREATE_DEVICE */
>
> Putting KVM_CREATE_DEVICE in there was mainly to avoid dealing with the
> ioctl number conflict mess in the vm-ioctl section, but at least that one
> is related to the device control API. :-)
>
>> But yes, makes sense to move it up a bit and change the code to 0xad.
>
> 0xad is KVM_KVMCLOCK_CTRL
That's it. I am _completely_ confused now. No system whatsoever :(
What rule should I use in order to choose the number for my new ioctl? :)
--
Alexey
^ permalink raw reply
* Re: [PATCH 3/4] KVM: PPC: Add support for IOMMU in-kernel handling
From: Scott Wood @ 2013-05-29 23:32 UTC (permalink / raw)
To: Alexey Kardashevskiy
Cc: kvm, Alexander Graf, kvm-ppc, linux-kernel, Paul Mackerras,
linuxppc-dev, David Gibson
In-Reply-To: <51A68F49.6020908@ozlabs.ru>
On 05/29/2013 06:29:13 PM, Alexey Kardashevskiy wrote:
> On 05/30/2013 09:14 AM, Scott Wood wrote:
> > On 05/29/2013 06:10:33 PM, Alexey Kardashevskiy wrote:
> >> On 05/30/2013 06:05 AM, Scott Wood wrote:
> >> > But you didn't put it in the same section as =20
> KVM_CREATE_SPAPR_TCE. 0xe0
> >> > begins a different section.
> >>
> >> It is not really obvious that there are sections as no comment =20
> defines
> >> those :)
> >
> > There is a comment /* ioctls for fds returned by KVM_CREATE_DEVICE =20
> */
> >
> > Putting KVM_CREATE_DEVICE in there was mainly to avoid dealing with =20
> the
> > ioctl number conflict mess in the vm-ioctl section, but at least =20
> that one
> > is related to the device control API. :-)
> >
> >> But yes, makes sense to move it up a bit and change the code to =20
> 0xad.
> >
> > 0xad is KVM_KVMCLOCK_CTRL
>=20
> That's it. I am _completely_ confused now. No system whatsoever :(
> What rule should I use in order to choose the number for my new =20
> ioctl? :)
Yeah, it's a mess. 0xaf seems to be free. :-)
-Scott=
^ permalink raw reply
* Re: [PATCH] KVM: PPC: Book3S: Add support for H_IPOLL and H_XIRR_X in XICS emulation
From: Scott Wood @ 2013-05-29 23:38 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: kvm, Gleb Natapov, Marcelo Tosatti, Alexander Graf, kvm-ppc,
linuxppc-dev, Paul Mackerras
In-Reply-To: <1369788078.3928.28.camel@pasglop>
On 05/28/2013 07:41:18 PM, Benjamin Herrenschmidt wrote:
> On Tue, 2013-05-28 at 12:41 -0500, Scott Wood wrote:
>=20
> > I believe Alex is staying far away from e-mail on his vacation. =20
> He's
> > asked me to fill in for him while he's gone.
> >
> > The patch itself seems reasonable (though I don't know much about =20
> XICS,
> > and do have one question...), but I'll leave it up to =20
> Gleb/Marcelo/Ben
> > if it should go in for 3.10 and via which tree. I understand the
> > desire to not have an incomplete ABI in a released version, but =20
> Linus
> > is already grumbling about how much went into rc3, and you say the
> > hcalls aren't currently used... Are they likely to be used in any
> > timeframe in which we'd reasonably care about 3.10?
>=20
> Yes. I'd like to have them in. Their implementation is actually fairly
> trivial and they cannot be emulated by qemu if the rest of the XICS is
> in the kernel, so it's a problem.
OK. Does it make more sense for you to take it as Paul suggested, or =20
for Gleb or Marcelo to pick it up directly?
> > > + /* These requests don't have real-mode implementations at
> > > present */
> > > + switch (req) {
> > > + case H_XIRR_X:
> > > + res =3D kvmppc_h_xirr(vcpu);
> > > + kvmppc_set_gpr(vcpu, 4, res);
> > > + kvmppc_set_gpr(vcpu, 5, get_tb());
> > > + return rc;
> > > + case H_IPOLL:
> > > + rc =3D kvmppc_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
> > > + return rc;
> > > + }
> > > +
> > > /* Check for real mode returning too hard */
> > > if (xics->real_mode)
> > > return kvmppc_xics_rm_complete(vcpu, req);
> >
> > Could you explain what's going on here relative to
> > kvmppc_xics_rm_complete()? What does "returning too hard" mean, and
> > why must rm_action not be checked for these hcalls?
>=20
> This is related to how we handle some hcalls in real mode as a fast
> path. The real-mode stuff cannot handle cases that require for =20
> example a
> re-emit of the interrupt, a reject, etc... so in some cases, it =20
> returns
> H_TOO_HARD which causes KVM to exit and try to handle the hcall again =20
> in
> kernel virtual mode.
>=20
> When doing so as the result of a XICS hcall, it sets a bit mask of
> "tasks" to handle in virtual mode (because it will have already
> partially done the operation, it cannot just re-play the whole hcall).
>=20
> So when real-mode is supported we must not just call the normal =20
> virtual
> mode version of the hcalls, we instead go to kvmppc_xics_rm_complete()
> to handle those "tasks".
>=20
> However, for those 2 "missing" hcalls, we have no real mode
> implementation at all (we didn't bother, we will do that later if
> needed, it's purely a performance issue). So we need to fully handle
> them in virtual mode, and we know there will be no "tasks" to handle =20
> in
> rm_complete.
Then rm_action should always be 0 for these hcalls, right? So there's =20
no correctness reason to keep the hcalls in separate switch =20
statements. You shave off a few cycles checking rm_action, at the cost =20
of needing to change kvmppc_xics_hcall() if a real-mode version of =20
these hcalls is ever done.
-Scott=
^ permalink raw reply
* Re: [PATCH] KVM: PPC: Book3S: Add support for H_IPOLL and H_XIRR_X in XICS emulation
From: Benjamin Herrenschmidt @ 2013-05-29 23:57 UTC (permalink / raw)
To: Scott Wood
Cc: kvm, Gleb Natapov, Marcelo Tosatti, Alexander Graf, kvm-ppc,
linuxppc-dev, Paul Mackerras
In-Reply-To: <1369870703.18630.49@snotra>
On Wed, 2013-05-29 at 18:38 -0500, Scott Wood wrote:
> > Yes. I'd like to have them in. Their implementation is actually fairly
> > trivial and they cannot be emulated by qemu if the rest of the XICS is
> > in the kernel, so it's a problem.
>
> OK. Does it make more sense for you to take it as Paul suggested, or
> for Gleb or Marcelo to pick it up directly?
I'll take it.
> Then rm_action should always be 0 for these hcalls, right? So there's
> no correctness reason to keep the hcalls in separate switch
> statements. You shave off a few cycles checking rm_action, at the cost
> of needing to change kvmppc_xics_hcall() if a real-mode version of
> these hcalls is ever done.
No, because rm_action will also be 0 if the hcall was fully done in real
mode (which can happen, that's our fast path), in which case we do *NOT*
want to to be re-done in virtual mode.
That's why we always return whether rm_action is 0 or not when real-mode
is enabled.
Cheers,
Ben.
^ permalink raw reply
* Re: [PATCH] KVM: PPC: Book3S: Add support for H_IPOLL and H_XIRR_X in XICS emulation
From: Scott Wood @ 2013-05-30 0:07 UTC (permalink / raw)
To: Benjamin Herrenschmidt
Cc: kvm, Gleb Natapov, Marcelo Tosatti, Alexander Graf, kvm-ppc,
linuxppc-dev, Paul Mackerras
In-Reply-To: <1369871852.3928.79.camel@pasglop>
On 05/29/2013 06:57:32 PM, Benjamin Herrenschmidt wrote:
> On Wed, 2013-05-29 at 18:38 -0500, Scott Wood wrote:
>=20
> > > Yes. I'd like to have them in. Their implementation is actually =20
> fairly
> > > trivial and they cannot be emulated by qemu if the rest of the =20
> XICS is
> > > in the kernel, so it's a problem.
> >
> > OK. Does it make more sense for you to take it as Paul suggested, =20
> or
> > for Gleb or Marcelo to pick it up directly?
>=20
> I'll take it.
Acked-by: Scott Wood <scottwood@freescale.com>
> > Then rm_action should always be 0 for these hcalls, right? So =20
> there's
> > no correctness reason to keep the hcalls in separate switch
> > statements. You shave off a few cycles checking rm_action, at the =20
> cost
> > of needing to change kvmppc_xics_hcall() if a real-mode version of
> > these hcalls is ever done.
>=20
> No, because rm_action will also be 0 if the hcall was fully done in =20
> real
> mode (which can happen, that's our fast path), in which case we do =20
> *NOT*
> want to to be re-done in virtual mode.
>=20
> That's why we always return whether rm_action is 0 or not when =20
> real-mode
> is enabled.
Oh, I misread the code and thought the decision to return was based on =20
the return value of kvmppc_xics_rm_complete. Sorry about that. :-(
-Scott=
^ permalink raw reply
* Re: [PATCH] powerpc/mpc85xx: match with the pci bus address used by u-boot for all p1_p2_rdb_pc boards
From: Kevin Hao @ 2013-05-30 3:25 UTC (permalink / raw)
To: Scott Wood; +Cc: linuxppc
In-Reply-To: <1369781156.18630.24@snotra>
[-- Attachment #1: Type: text/plain, Size: 2526 bytes --]
On Tue, May 28, 2013 at 05:45:56PM -0500, Scott Wood wrote:
> On 05/16/2013 01:29:45 AM, Kevin Hao wrote:
> >All these boards use the same configuration file p1_p2_rdb_pc.h in
> >u-boot. So they have the same pci bus address set by the u-boot.
> >But in some of these boards the bus address set in dtb don't match
> >the one used by u-boot. And this will trigger a kernel bug in 32bit
> >kernel and cause the pci device malfunction. For example, on a
> >p2020rdb-pc board the u-boot use the 0xa0000000 as both bus address
> >and cpu address for one pci controller and then assign bus address
> >such as 0xa00004000 to some pci device. But in the kernel, the dtb
> >set the bus address to 0xe0000000 and the cpu address to 0xa0000000.
> >The kernel assumes mistakenly the assigned bus address 0xa0004000
> >in pci device is correct and keep it unchanged. This will definitely
> >cause the pci device malfunction. I have made two patches to fix
> >this in the pci subsystem.
> >http://patchwork.ozlabs.org/patch/243702/
> >http://patchwork.ozlabs.org/patch/243703/
> >
> >But I still think it makes sense to set these bus address to match
> >with the u-boot. This issue can't be reproduced on 36bit kernel.
> >But I also tweak the 36bit dtb for the above reason.
>
> IIRC the reason for using 0xe0000000 on all PCIe roots is to
> maximize the memory that is DMA-addressable without involving
> swiotlb.
OK, this sounds reasonable. I can drop the changes for the 36bit dts. But for
the 32bit dts, it does cause the kernel hang on my p2020rdb-pca board when the
SiI3132 driver probe the on-board pcie to sata controller. I think this issue
should apply to all these boards if it has a pci device plugged. So we should
fix them ASAP.
>
> Maybe U-Boot should be fixed?
Maybe. I have created patch for kernel to detect this kind of mismatch between
kernel and bootloader and then try to reassign the bus address automatically.
https://git.kernel.org/cgit/linux/kernel/git/helgaas/pci.git/commit/?h=next&id=cf4d1cf5ac5e7d2b886af6ed906ea0dcdc5b6855
So with this patch the kernel should just work even without this patch and
the fix for u-boot. But this patch is just queued for 3.11. So I wish we can
tweak the 32bit dts to accommodate to the u-boot now so that we can make sure
that these boards are at least bootable for 3.10 or previous kernel. Then we
can revert this patch for more DMA address space once the pci patch are
merged into mainline.
Thanks,
Kevin
>
> -Scott
[-- Attachment #2: Type: application/pgp-signature, Size: 490 bytes --]
^ permalink raw reply
* can't access PCIe card under sbc8548
From: wolfking @ 2013-05-30 3:42 UTC (permalink / raw)
To: linuxppc-dev
hi, all
I'm doing some developing on the windriver's sbc8548 board. The kernel I
use
is 3.6.10 and the u-boot version is 2012-10. I changed the board's
configuration:
the board now boot from the 64MB SODIMM Flash (not the default 8MB on-board
Flash
memory), and the PCI clock rate is changed to 33MHZ.
Now the trouble I am in is that: the PCI card (a NIC card rtl8139) can be
accessed OK, while the PCIe card can't work, that is, the kernel can't
access
its internal register. The kernel can correctly probe the PCIe card. its
BAR0
is a I/O mapped register, I use ioport_map to map the BAR0 to kernel's
address
space, then use ioread8/iowrite8 to access its internal register, it doesn't
work.
I analyse the ioport_map function and find it just add the input parameter
to
a fixed _IO_BASE value, below is the function:
void __iomem *ioport_map(unsigned long port, unsigned int len)
{
return (void __iomem *) (port + _IO_BASE);
}
the _IO_BASE value under sbc8548 is 0xfd7fd000, the value of ioport_map
paramenter
"port" is 0xfefff000. Obviously the add overflows, so the follow-up
operations
can't succeed. The value of "port" is got from the function
pci_resource_start.
So I guess the kernel allocate a bad address to this PCIe card. How can I
fix this?
I also plug this PCIe card into a freescale's board mpc8641-hpcn, try the
same driver,
I noticed ioport_map also meet overflow, but it does work fine. So I'm
messed, :<
--
View this message in context: http://linuxppc.10917.n7.nabble.com/can-t-access-PCIe-card-under-sbc8548-tp71775.html
Sent from the linuxppc-dev mailing list archive at Nabble.com.
^ permalink raw reply
* [PATCH] powerpc/pseries: Kill all prefetch streams on context switch
From: Michael Neuling @ 2013-05-30 5:34 UTC (permalink / raw)
To: benh; +Cc: Linux PPC dev, anton, miltonm
On context switch, we should have no prefetch streams leak from one
userspace process to another. This frees up prefetch resources for the
next process.
Based on patch from Milton Miller.
Signed-off-by: Michael Neuling <mikey@neuling.org>
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index cea8496..2f1b6c5 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -523,6 +523,17 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946)
#define PPC440EP_ERR42
#endif
+/* The following stops all load and store data streams associated with stream
+ * ID (ie. streams created explicitly). The embedded and server mnemonics for
+ * dcbt are different so we use machine "power4" here explicitly.
+ */
+#define DCBT_STOP_ALL_STREAM_IDS(scratch) \
+.machine push ; \
+.machine "power4" ; \
+ lis scratch,0x60000000@h; \
+ dcbt r0,scratch,0b01010; \
+.machine pop
+
/*
* toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them
* keep the address intact to be compatible with code shared with
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0e9095e..246b11c 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -501,6 +501,13 @@ BEGIN_FTR_SECTION
ldarx r6,0,r1
END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS)
+#ifdef CONFIG_PPC_BOOK3S
+/* Cancel all explict user streams as they will have no use after context
+ * switch and will stop the HW from creating streams itself
+ */
+ DCBT_STOP_ALL_STREAM_IDS(r6)
+#endif
+
addi r6,r4,-THREAD /* Convert THREAD to 'current' */
std r6,PACACURRENT(r13) /* Set new 'current' */
^ permalink raw reply related
* [PATCH] powerpc/pseries: Improve stream generation comments in copypage/user
From: Michael Neuling @ 2013-05-30 5:34 UTC (permalink / raw)
To: benh; +Cc: Linux PPC dev, anton
No code changes, just documenting what's happening a little better.
Signed-off-by: Michael Neuling <mikey@neuling.org>
diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S
index 0ef75bf..395c594 100644
--- a/arch/powerpc/lib/copypage_power7.S
+++ b/arch/powerpc/lib/copypage_power7.S
@@ -28,13 +28,14 @@ _GLOBAL(copypage_power7)
* aligned we don't need to clear the bottom 7 bits of either
* address.
*/
- ori r9,r3,1 /* stream=1 */
+ ori r9,r3,1 /* stream=1 => to */
#ifdef CONFIG_PPC_64K_PAGES
- lis r7,0x0E01 /* depth=7, units=512 */
+ lis r7,0x0E01 /* depth=7
+ * units/cachelines=512 */
#else
lis r7,0x0E00 /* depth=7 */
- ori r7,r7,0x1000 /* units=32 */
+ ori r7,r7,0x1000 /* units/cachelines=32 */
#endif
ori r10,r7,1 /* stream=1 */
@@ -43,12 +44,14 @@ _GLOBAL(copypage_power7)
.machine push
.machine "power4"
- dcbt r0,r4,0b01000
- dcbt r0,r7,0b01010
- dcbtst r0,r9,0b01000
- dcbtst r0,r10,0b01010
+ /* setup read stream 0 */
+ dcbt r0,r4,0b01000 /* addr from */
+ dcbt r0,r7,0b01010 /* length and depth from */
+ /* setup write stream 1 */
+ dcbtst r0,r9,0b01000 /* addr to */
+ dcbtst r0,r10,0b01010 /* length and depth to */
eieio
- dcbt r0,r8,0b01010 /* GO */
+ dcbt r0,r8,0b01010 /* all streams GO */
.machine pop
#ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S
index 0d24ff1..d1f1179 100644
--- a/arch/powerpc/lib/copyuser_power7.S
+++ b/arch/powerpc/lib/copyuser_power7.S
@@ -318,12 +318,14 @@ err1; stb r0,0(r3)
.machine push
.machine "power4"
- dcbt r0,r6,0b01000
- dcbt r0,r7,0b01010
- dcbtst r0,r9,0b01000
- dcbtst r0,r10,0b01010
+ /* setup read stream 0 */
+ dcbt r0,r6,0b01000 /* addr from */
+ dcbt r0,r7,0b01010 /* length and depth from */
+ /* setup write stream 1 */
+ dcbtst r0,r9,0b01000 /* addr to */
+ dcbtst r0,r10,0b01010 /* length and depth to */
eieio
- dcbt r0,r8,0b01010 /* GO */
+ dcbt r0,r8,0b01010 /* all streams GO */
.machine pop
beq cr1,.Lunwind_stack_nonvmx_copy
^ permalink raw reply related
* 3.10-rc ppc64 corrupts usermem when swapping
From: Hugh Dickins @ 2013-05-30 5:47 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: Paul Mackerras, linuxppc-dev, David Gibson
Running my favourite swapping load (repeated make -j20 kernel builds
in tmpfs in parallel with repeated make -j20 kernel builds in ext4 on
loop on tmpfs file, all limited by mem=700M and swap 1.5G) on 3.10-rc
on PowerMac G5, the test dies with corrupted usermem after a few hours.
Variously, segmentation fault or Binutils assertion fail or gcc Internal
error in either or both builds: usually signs of swapping or TLB flushing
gone wrong. Sometimes the tmpfs build breaks first, sometimes the ext4 on
loop on tmpfs, so at least it looks unrelated to loop. No problem on x86.
This is 64-bit kernel but 4k pages and old SuSE 11.1 32-bit userspace.
I've just finished a manual bisection on arch/powerpc/mm (which might
have been a wrong guess, but has paid off): the first bad commit is
7e74c3921ad9610c0b49f28b8fc69f7480505841
"powerpc: Fix hpte_decode to use the correct decoding for page sizes".
I don't know if it's actually swapping to swap that's triggering the
problem, or a more general page reclaim or TLB flush problem. I hit
it originally when trying to test Mel Gorman's pagevec series on top
of 3.10-rc; and though I then reproduced it without that series, it
did seem to take much longer: so I have been applying Mel's series to
speed up each step of the bisection. But if I went back again, might
find it was just chance that I hit it sooner with Mel's series than
without. So, you're probably safe to ignore that detail, but I
mention it just in case it turns out to have some relevance.
Something else peculiar that I've been doing in these runs, may or may
not be relevant: I've been running swapon and swapoff repeatedly in the
background, so that we're doing swapoff even while busy building.
I probably can't go into much more detail on the test (it's hard
to get the balance right, to be swapping rather than OOMing or just
running without reclaim), but can test any patches you'd like me to
try (though it may take 24 hours for me to report back usefully).
Thanks,
Hugh
^ permalink raw reply
* Re: can't access PCIe card under sbc8548
From: tiejun.chen @ 2013-05-30 5:56 UTC (permalink / raw)
To: wolfking; +Cc: linuxppc-dev
In-Reply-To: <1369885321567-71775.post@n7.nabble.com>
On 05/30/2013 11:42 AM, wolfking wrote:
> hi, all
> I'm doing some developing on the windriver's sbc8548 board. The kernel I
> use
> is 3.6.10 and the u-boot version is 2012-10. I changed the board's
> configuration:
> the board now boot from the 64MB SODIMM Flash (not the default 8MB on-board
> Flash
> memory), and the PCI clock rate is changed to 33MHZ.
> Now the trouble I am in is that: the PCI card (a NIC card rtl8139) can be
> accessed OK, while the PCIe card can't work, that is, the kernel can't
> access
> its internal register. The kernel can correctly probe the PCIe card. its
> BAR0
> is a I/O mapped register, I use ioport_map to map the BAR0 to kernel's
> address
> space, then use ioread8/iowrite8 to access its internal register, it doesn't
> work.
> I analyse the ioport_map function and find it just add the input parameter
> to
> a fixed _IO_BASE value, below is the function:
> void __iomem *ioport_map(unsigned long port, unsigned int len)
> {
> return (void __iomem *) (port + _IO_BASE);
> }
> the _IO_BASE value under sbc8548 is 0xfd7fd000, the value of ioport_map
> paramenter
> "port" is 0xfefff000. Obviously the add overflows, so the follow-up
> operations
> can't succeed. The value of "port" is got from the function
> pci_resource_start.
In PPC case I/O is memory-mapped, so you should use ioremap() instead of
ioport_map()..
Tiejun
^ permalink raw reply
* [PATCH] powerpc/smp: use '==' instead of '<' for system_state
From: liguang @ 2013-05-30 6:47 UTC (permalink / raw)
To: Arnd Bergmann, Benjamin Herrenschmidt, Paul Mackerras
Cc: cbe-oss-dev, Greg Kroah-Hartman, linux-kernel, Rob Herring,
Nathan Fontenot, linuxppc-dev, liguang
'system_state < SYSTEM_RUNNING' will have same effect
with 'system_state == SYSTEM_BOOTING', but the later
one is more clearer.
Signed-off-by: liguang <lig.fnst@cn.fujitsu.com>
---
arch/powerpc/platforms/cell/smp.c | 2 +-
arch/powerpc/platforms/powernv/smp.c | 2 +-
arch/powerpc/platforms/pseries/smp.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/platforms/cell/smp.c b/arch/powerpc/platforms/cell/smp.c
index d35dbbc..f75f6fc 100644
--- a/arch/powerpc/platforms/cell/smp.c
+++ b/arch/powerpc/platforms/cell/smp.c
@@ -142,7 +142,7 @@ static int smp_cell_cpu_bootable(unsigned int nr)
* during boot if the user requests it. Odd-numbered
* cpus are assumed to be secondary threads.
*/
- if (system_state < SYSTEM_RUNNING &&
+ if (system_state == SYSTEM_BOOTING &&
cpu_has_feature(CPU_FTR_SMT) &&
!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
return 0;
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 88c9459..77784ae 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -51,7 +51,7 @@ static int pnv_smp_cpu_bootable(unsigned int nr)
/* Special case - we inhibit secondary thread startup
* during boot if the user requests it.
*/
- if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
+ if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
return 0;
if (smt_enabled_at_boot
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 12bc8c3..306643c 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -192,7 +192,7 @@ static int smp_pSeries_cpu_bootable(unsigned int nr)
/* Special case - we inhibit secondary thread startup
* during boot if the user requests it.
*/
- if (system_state < SYSTEM_RUNNING && cpu_has_feature(CPU_FTR_SMT)) {
+ if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) {
if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0)
return 0;
if (smt_enabled_at_boot
--
1.7.2.5
^ permalink raw reply related
* Re: 3.10-rc ppc64 corrupts usermem when swapping
From: Benjamin Herrenschmidt @ 2013-05-30 7:00 UTC (permalink / raw)
To: Hugh Dickins
Cc: linuxppc-dev, Anton Blanchard, Paul Mackerras, Aneesh Kumar K.V,
David Gibson
In-Reply-To: <alpine.LNX.2.00.1305292148550.9560@eggly.anvils>
On Wed, 2013-05-29 at 22:47 -0700, Hugh Dickins wrote:
> Running my favourite swapping load (repeated make -j20 kernel builds
> in tmpfs in parallel with repeated make -j20 kernel builds in ext4 on
> loop on tmpfs file, all limited by mem=700M and swap 1.5G) on 3.10-rc
> on PowerMac G5, the test dies with corrupted usermem after a few hours.
>
> Variously, segmentation fault or Binutils assertion fail or gcc Internal
> error in either or both builds: usually signs of swapping or TLB flushing
> gone wrong. Sometimes the tmpfs build breaks first, sometimes the ext4 on
> loop on tmpfs, so at least it looks unrelated to loop. No problem on x86.
>
> This is 64-bit kernel but 4k pages and old SuSE 11.1 32-bit userspace.
>
> I've just finished a manual bisection on arch/powerpc/mm (which might
> have been a wrong guess, but has paid off): the first bad commit is
> 7e74c3921ad9610c0b49f28b8fc69f7480505841
> "powerpc: Fix hpte_decode to use the correct decoding for page sizes".
Ok, I have other reasons to think is wrong. I debugged a case last week
where after kexec we still had stale TLB entries, due to the TLB cleanup
not working.
Thanks for doing that bisection ! I'll investigate ASAP (though it will
probably have to wait for tomorrow unless Paul beats me to it)
> I don't know if it's actually swapping to swap that's triggering the
> problem, or a more general page reclaim or TLB flush problem. I hit
> it originally when trying to test Mel Gorman's pagevec series on top
> of 3.10-rc; and though I then reproduced it without that series, it
> did seem to take much longer: so I have been applying Mel's series to
> speed up each step of the bisection. But if I went back again, might
> find it was just chance that I hit it sooner with Mel's series than
> without. So, you're probably safe to ignore that detail, but I
> mention it just in case it turns out to have some relevance.
>
> Something else peculiar that I've been doing in these runs, may or may
> not be relevant: I've been running swapon and swapoff repeatedly in the
> background, so that we're doing swapoff even while busy building.
>
> I probably can't go into much more detail on the test (it's hard
> to get the balance right, to be swapping rather than OOMing or just
> running without reclaim), but can test any patches you'd like me to
> try (though it may take 24 hours for me to report back usefully).
I think it's just failing to invalidate the TLB properly. At least one
bug I can spot just looking at it:
static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
int psize, int ssize, int local)
.../...
native_lock_hpte(hptep);
hpte_v = hptep->v;
actual_psize = hpte_actual_psize(hptep, psize);
if (actual_psize < 0) {
native_unlock_hpte(hptep);
local_irq_restore(flags);
return;
}
That's wrong. We must still perform the TLB invalidation even if the
hash PTE is empty.
In fact, Aneesh, this is a problem with MPSS for your THP work, I just
thought about it.
The reason is that if a hash bucket gets full, we "evict" a more/less
random entry from it. When we do that we don't invalidate the TLB
(hpte_remove) because we assume the old translation is still technically
"valid".
However that means that an hpte_invalidate *must* invalidate the TLB
later on even if it's not hitting the right entry in the hash.
However, I can see why that cannot work with THP/MPSS since you have no
way to know the page size from the PTE anymore....
So my question is, apart from hpte_decode used by kexec, which I will
fix by just blowing the whole TLB when not running phyp, why do you need
the "actual" size in invalidate and updatepp ? You really can't rely on
the size passed by the upper layers ?
Cheers,
Ben.
^ permalink raw reply
* [PATCH] powerpc/pseries: use 'true' instead of '1' for orderly_poweroff
From: liguang @ 2013-05-30 7:07 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel, liguang
orderly_poweroff is expecting a bool parameter, so
use 'ture' instead '1'
Signed-off-by: liguang <lig.fnst@cn.fujitsu.com>
---
arch/powerpc/platforms/pseries/ras.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c4dfccd..79b9502 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -83,7 +83,7 @@ static void handle_system_shutdown(char event_modifier)
switch (event_modifier) {
case EPOW_SHUTDOWN_NORMAL:
pr_emerg("Firmware initiated power off");
- orderly_poweroff(1);
+ orderly_poweroff(true);
break;
case EPOW_SHUTDOWN_ON_UPS:
@@ -95,13 +95,13 @@ static void handle_system_shutdown(char event_modifier)
pr_emerg("Loss of system critical functions reported by "
"firmware");
pr_emerg("Check RTAS error log for details");
- orderly_poweroff(1);
+ orderly_poweroff(true);
break;
case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
pr_emerg("Ambient temperature too high reported by firmware");
pr_emerg("Check RTAS error log for details");
- orderly_poweroff(1);
+ orderly_poweroff(true);
break;
default:
@@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log)
case EPOW_SYSTEM_HALT:
pr_emerg("Firmware initiated power off");
- orderly_poweroff(1);
+ orderly_poweroff(ture);
break;
case EPOW_MAIN_ENCLOSURE:
--
1.7.2.5
^ permalink raw reply related
* Re: [PATCH] powerpc/pseries: use 'true' instead of '1' for orderly_poweroff
From: Joe Perches @ 2013-05-30 7:14 UTC (permalink / raw)
To: liguang; +Cc: Paul Mackerras, linuxppc-dev, linux-kernel
In-Reply-To: <1369897651-23770-1-git-send-email-lig.fnst@cn.fujitsu.com>
On Thu, 2013-05-30 at 15:07 +0800, liguang wrote:
> orderly_poweroff is expecting a bool parameter, so
> use 'ture' instead '1'
[]
> diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
[]
> @@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log)
>
> case EPOW_SYSTEM_HALT:
> pr_emerg("Firmware initiated power off");
> - orderly_poweroff(1);
> + orderly_poweroff(ture);
> break;
Compile your patches _before_ submitting them please.
"true" not "ture" here and in the commit message.
^ permalink raw reply
* Re: can't access PCIe card under sbc8548
From: wolfking @ 2013-05-30 7:19 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <51A6EA1D.7080100@windriver.com>
hi, tiejun.chen:
Thanks for replying.
I tried to use ioremap too, but it doesn't work. The ioport_map method
succeeded
under mpc8641d which is also a ppc.
In fact, I also tried using pci_iomap by imitating rtl8139's codes. The
rtl8139's BAR0
is also a I/O mapped register and its code succeeded in accessing its
registers by
using pci_iomap under sbc8548. But when I used pci_iomap for my PCIe card,
its
output is just the same with the method of using ioport_map. Any suggestion?
--
View this message in context: http://linuxppc.10917.n7.nabble.com/can-t-access-PCIe-card-under-sbc8548-tp71775p71782.html
Sent from the linuxppc-dev mailing list archive at Nabble.com.
^ permalink raw reply
* Re: [PATCH] powerpc/pseries: use 'true' instead of '1' for orderly_poweroff
From: li guang @ 2013-05-30 7:20 UTC (permalink / raw)
To: Joe Perches; +Cc: Paul Mackerras, linuxppc-dev, linux-kernel
In-Reply-To: <1369898064.22004.134.camel@joe-AO722>
=E5=9C=A8 2013-05-30=E5=9B=9B=E7=9A=84 00:14 -0700=EF=BC=8CJoe Perches=E5=
=86=99=E9=81=93=EF=BC=9A
> On Thu, 2013-05-30 at 15:07 +0800, liguang wrote:
> > orderly_poweroff is expecting a bool parameter, so
> > use 'ture' instead '1'
> []
> > diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platfo=
rms/pseries/ras.c
> []
> > @@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *=
log)
> > =20
> > case EPOW_SYSTEM_HALT:
> > pr_emerg("Firmware initiated power off");
> > - orderly_poweroff(1);
> > + orderly_poweroff(ture);
> > break;
>=20
> Compile your patches _before_ submitting them please.
>=20
> "true" not "ture" here and in the commit message.
>=20
right, thanks!
^ permalink raw reply
* [PATCHv2] powerpc/pseries: use 'true' instead of '1' for orderly_poweroff
From: liguang @ 2013-05-30 7:20 UTC (permalink / raw)
To: Benjamin Herrenschmidt, Paul Mackerras
Cc: linuxppc-dev, linux-kernel, liguang
orderly_poweroff is expecting a bool parameter, so
use 'true' instead '1'
Signed-off-by: liguang <lig.fnst@cn.fujitsu.com>
---
arch/powerpc/platforms/pseries/ras.c | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index c4dfccd..79b9502 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -83,7 +83,7 @@ static void handle_system_shutdown(char event_modifier)
switch (event_modifier) {
case EPOW_SHUTDOWN_NORMAL:
pr_emerg("Firmware initiated power off");
- orderly_poweroff(1);
+ orderly_poweroff(true);
break;
case EPOW_SHUTDOWN_ON_UPS:
@@ -95,13 +95,13 @@ static void handle_system_shutdown(char event_modifier)
pr_emerg("Loss of system critical functions reported by "
"firmware");
pr_emerg("Check RTAS error log for details");
- orderly_poweroff(1);
+ orderly_poweroff(true);
break;
case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
pr_emerg("Ambient temperature too high reported by firmware");
pr_emerg("Check RTAS error log for details");
- orderly_poweroff(1);
+ orderly_poweroff(true);
break;
default:
@@ -162,7 +162,7 @@ void rtas_parse_epow_errlog(struct rtas_error_log *log)
case EPOW_SYSTEM_HALT:
pr_emerg("Firmware initiated power off");
- orderly_poweroff(1);
+ orderly_poweroff(true);
break;
case EPOW_MAIN_ENCLOSURE:
--
1.7.2.5
^ permalink raw reply related
* Re: can't access PCIe card under sbc8548
From: wolfking @ 2013-05-30 7:32 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <51A6EA1D.7080100@windriver.com>
(continued)
I traced the 8139too.c when it uses pci_iomap, the pci_iomap called the
ioport_map. The difference between 8139 and my PCIe card lies in the
"port" value :
void __iomem *ioport_map(unsigned long port, unsigned int len)
{
return (void __iomem *) (port + _IO_BASE);
}
in 8139too.c, the "port" value is 0x1000; for my PCIe card, the "port"
value
is 0xfefff000. And the value is got from pci_resource_start. So you see, the
8139 case doesn't overflow, my PICe overflows when _IO_BASE's value is
0xfd7fd000.
--
View this message in context: http://linuxppc.10917.n7.nabble.com/can-t-access-PCIe-card-under-sbc8548-tp71775p71783.html
Sent from the linuxppc-dev mailing list archive at Nabble.com.
^ permalink raw reply
* RE: SATA hang on 8315E triggered by heavy flash write?
From: Xie Shaohui-B21989 @ 2013-05-30 7:32 UTC (permalink / raw)
To: Anthony Foiani; +Cc: Wood Scott-B07421, linuxppc-dev@lists.ozlabs.org
In-Reply-To: <g7gik2bga.fsf@dworkin.scrye.com>
Hi, Anthony Foiani,
I found a MPC8315ERDB rev1.0 board and did some tests.
First there is no limit speed issue on the board, so it seems it may only h=
appen on the MPC8315DS board.
Second, the SATA can work well with NOR write operation on the ERDB board.=
=20
So the two issues happened to you should be board issues.
Best Regards,=20
Shaohui Xie
> -----Original Message-----
> From: Anthony Foiani [mailto:tkil@scrye.com]
> Sent: Tuesday, May 28, 2013 8:30 AM
> To: Xie Shaohui-B21989
> Cc: Wood Scott-B07421; linuxppc-dev@lists.ozlabs.org
> Subject: Re: SATA hang on 8315E triggered by heavy flash write?
>=20
> Shaohio --
>=20
> Once again, thanks for the reply.
>=20
> Xie Shaohui-B21989 <B21989@freescale.com> writes:
>=20
> > it seems [recovery or lack of recovery is] not due to speed limiting
> > code, 1.5Gbps is still used to recover link.
>=20
> Right, I noticed this in my later email on this topic.
>=20
> > for the speed limit issue, I checked 3.4.rc7 kernel, there seems a
> > place can be used to limit the speed for 8315:
> >
> > if (!of_device_is_compatible(ofdev->dev.of_node, "fsl,mpc8315-
> sata")) {
> > temp =3D ioread32(csr_base + TRANSCFG);
> > temp =3D temp & 0xffffffe0;
> > iowrite32(temp | TRANSCFG_RX_WATER_MARK, csr_base + TRANSCFG);
> > } else {
> > /* the speed limitation code for 8315 may can be put here.
> > * just move the original code which wrapped by "#ifdef
> CONFIG_MPC8315_DS" here.
> > * please let me know if you will give a try. */
> > }
>=20
> It's not clear that all uses of the MPC8315 SATA controller have this
> problem. It obviously occured on the 8315DS, but apparently that board
> never made it to production; it might or might not happen on the 8315ERDB=
;
> and it clearly happens on my vendor's board.
>=20
> Given this lack of knowledge, Scott Wood was very hesitant to implement
> any far-reaching changes, because we could not pin down exactly what had
> to be tested.
>=20
> My original patch:
>=20
> http://article.gmane.org/gmane.linux.ports.ppc.embedded/58710
>=20
> did the speed limiting directly in the sata_fsl code; Jeff Garzik
> mentioned that there was some existing infrastructure for this, and I
> inferred that he would prefer that I use it.
>=20
> So that's what my final patch does, and it's keyed off a single OF value.
>=20
> There's a small chance that I'll have the opportunity to move the project
> to 3.9 (especially if that release gets declared long-term-stable). Even
> then, I unfortunately won't have the bandwidth to pursue getting any of
> these approaches into mainline.
> The best I can do is get them onto the lists so that others might be able
> to benefit later.
>=20
> Either way, thanks again. I'll try to put together a package for my
> vendor to test with; once I have demonstrated that there is a problem
> with their hardware, they have been gracious about accepting that result
> and pursuing it with Freescale if necessary.
>=20
> Thanks again for your help!
>=20
> Best regards,
> Anthony Foiani
^ permalink raw reply
* [PATCH 1/2] powerpc/pseries: Simplify denorm code
From: Michael Neuling @ 2013-05-30 7:33 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: Michael Neuling, linuxppc-dev, Anton Blanchard
The following simplifies the denorm code by using macros to generate the long
stream of almost identical instructions.
This patch results in no changes to the output binary, but removes a lot of
lines of code.
Signed-off-by: Michael Neuling <mikey@neuling.org>
---
arch/powerpc/kernel/exceptions-64s.S | 80 +++++++---------------------------
1 file changed, 16 insertions(+), 64 deletions(-)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index e6eba1b..3c9296b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -454,38 +454,14 @@ BEGIN_FTR_SECTION
xori r10,r10,(MSR_FE0|MSR_FE1)
mtmsrd r10
sync
- fmr 0,0
- fmr 1,1
- fmr 2,2
- fmr 3,3
- fmr 4,4
- fmr 5,5
- fmr 6,6
- fmr 7,7
- fmr 8,8
- fmr 9,9
- fmr 10,10
- fmr 11,11
- fmr 12,12
- fmr 13,13
- fmr 14,14
- fmr 15,15
- fmr 16,16
- fmr 17,17
- fmr 18,18
- fmr 19,19
- fmr 20,20
- fmr 21,21
- fmr 22,22
- fmr 23,23
- fmr 24,24
- fmr 25,25
- fmr 26,26
- fmr 27,27
- fmr 28,28
- fmr 29,29
- fmr 30,30
- fmr 31,31
+
+#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1
+#define FMR4(n) FMR2(n) ; FMR2(n+2)
+#define FMR8(n) FMR4(n) ; FMR4(n+4)
+#define FMR16(n) FMR8(n) ; FMR8(n+8)
+#define FMR32(n) FMR16(n) ; FMR16(n+16)
+ FMR32(0)
+
FTR_SECTION_ELSE
/*
* To denormalise we need to move a copy of the register to itself.
@@ -495,38 +471,14 @@ FTR_SECTION_ELSE
oris r10,r10,MSR_VSX@h
mtmsrd r10
sync
- XVCPSGNDP(0,0,0)
- XVCPSGNDP(1,1,1)
- XVCPSGNDP(2,2,2)
- XVCPSGNDP(3,3,3)
- XVCPSGNDP(4,4,4)
- XVCPSGNDP(5,5,5)
- XVCPSGNDP(6,6,6)
- XVCPSGNDP(7,7,7)
- XVCPSGNDP(8,8,8)
- XVCPSGNDP(9,9,9)
- XVCPSGNDP(10,10,10)
- XVCPSGNDP(11,11,11)
- XVCPSGNDP(12,12,12)
- XVCPSGNDP(13,13,13)
- XVCPSGNDP(14,14,14)
- XVCPSGNDP(15,15,15)
- XVCPSGNDP(16,16,16)
- XVCPSGNDP(17,17,17)
- XVCPSGNDP(18,18,18)
- XVCPSGNDP(19,19,19)
- XVCPSGNDP(20,20,20)
- XVCPSGNDP(21,21,21)
- XVCPSGNDP(22,22,22)
- XVCPSGNDP(23,23,23)
- XVCPSGNDP(24,24,24)
- XVCPSGNDP(25,25,25)
- XVCPSGNDP(26,26,26)
- XVCPSGNDP(27,27,27)
- XVCPSGNDP(28,28,28)
- XVCPSGNDP(29,29,29)
- XVCPSGNDP(30,30,30)
- XVCPSGNDP(31,31,31)
+
+#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
+#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
+#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
+#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
+#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
+ XVCPSGNDP32(0)
+
ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
mtspr SPRN_HSRR0,r11
mtcrf 0x80,r9
--
1.7.10.4
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox