* Re: [PATCH] powerpc: add a missing label in resume_kernel
From: tiejun.chen @ 2013-04-10 8:43 UTC (permalink / raw)
To: Kevin Hao; +Cc: linuxppc
In-Reply-To: <1365582684-11136-1-git-send-email-haokexin@gmail.com>
On 04/10/2013 04:31 PM, Kevin Hao wrote:
> A label 0 was missed in the patch a9c4e541 (powerpc/kprobe: Complete
> kprobe and migrate exception frame). This will cause the kernel
> branch to an undetermined address if there really has a conflict when
> updating the thread flags.
>
> Signed-off-by: Kevin Hao <haokexin@gmail.com>
Acked-By: Tiejun Chen <tiejun.chen@windriver.com>
> Cc: stable@vger.kernel.org
> ---
> arch/powerpc/kernel/entry_64.S | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
> index 256c5bf..ab079ed 100644
> --- a/arch/powerpc/kernel/entry_64.S
> +++ b/arch/powerpc/kernel/entry_64.S
> @@ -657,7 +657,7 @@ resume_kernel:
> /* Clear _TIF_EMULATE_STACK_STORE flag */
> lis r11,_TIF_EMULATE_STACK_STORE@h
> addi r5,r9,TI_FLAGS
> - ldarx r4,0,r5
> +0: ldarx r4,0,r5
> andc r4,r4,r11
> stdcx. r4,0,r5
> bne- 0b
>
^ permalink raw reply
* [PATCH 4/4] powerpc/perf: Add support for SIER
From: Michael Ellerman @ 2013-04-10 8:32 UTC (permalink / raw)
To: linuxppc-dev; +Cc: sukadev, Paul Mackerras
In-Reply-To: <1365582765-6939-1-git-send-email-michael@ellerman.id.au>
From: Michael Ellerman <michaele@au1.ibm.com>
On power8 we have a new SIER (Sampled Instruction Event Register), which
captures information about instructions when we have random sampling
enabled.
Add support for loading the SIER into pt_regs, overloading regs->dar.
Also set the new NO_SIPR flag in regs->result if we don't have SIPR.
Update regs_sihv/sipr() to look for SIPR/SIHV in SIER.
Signed-off-by: Michael Ellerman <michaele@au1.ibm.com>
---
arch/powerpc/include/asm/perf_event_server.h | 1 +
arch/powerpc/perf/core-book3s.c | 19 +++++++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index e287aef..a1a1ad8 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -53,6 +53,7 @@ struct power_pmu {
#define PPMU_NO_CONT_SAMPLING 0x00000008 /* no continuous sampling */
#define PPMU_SIAR_VALID 0x00000010 /* Processor has SIAR Valid bit */
#define PPMU_HAS_SSLOT 0x00000020 /* Has sampled slot in MMCRA */
+#define PPMU_HAS_SIER 0x00000040 /* Has SIER */
/*
* Values for flags to get_alternatives()
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 4255b12..a4bbd4d 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -116,6 +116,9 @@ static bool regs_sihv(struct pt_regs *regs)
{
unsigned long sihv = MMCRA_SIHV;
+ if (ppmu->flags & PPMU_HAS_SIER)
+ return !!(regs->dar & SIER_SIHV);
+
if (ppmu->flags & PPMU_ALT_SIPR)
sihv = POWER6_MMCRA_SIHV;
@@ -126,6 +129,9 @@ static bool regs_sipr(struct pt_regs *regs)
{
unsigned long sipr = MMCRA_SIPR;
+ if (ppmu->flags & PPMU_HAS_SIER)
+ return !!(regs->dar & SIER_SIPR);
+
if (ppmu->flags & PPMU_ALT_SIPR)
sipr = POWER6_MMCRA_SIPR;
@@ -184,6 +190,7 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs)
/*
* Overload regs->dsisr to store MMCRA so we only need to read it once
* on each interrupt.
+ * Overload regs->dar to store SIER if we have it.
* Overload regs->result to specify whether we should use the MSR (result
* is zero) or the SIAR (result is non zero).
*/
@@ -200,6 +207,18 @@ static inline void perf_read_regs(struct pt_regs *regs)
regs->result |= 2;
/*
+ * On power8 if we're in random sampling mode, the SIER is updated.
+ * If we're in continuous sampling mode, we don't have SIPR.
+ */
+ if (ppmu->flags & PPMU_HAS_SIER) {
+ if (marked)
+ regs->dar = mfspr(SPRN_SIER);
+ else
+ regs->result |= 2;
+ }
+
+
+ /*
* If this isn't a PMU exception (eg a software event) the SIAR is
* not valid. Use pt_regs.
*
--
1.7.10.4
^ permalink raw reply related
* [PATCH 3/4] powerpc/perf: Add regs_no_sipr()
From: Michael Ellerman @ 2013-04-10 8:32 UTC (permalink / raw)
To: linuxppc-dev; +Cc: sukadev, Paul Mackerras
In-Reply-To: <1365582765-6939-1-git-send-email-michael@ellerman.id.au>
From: Michael Ellerman <michaele@au1.ibm.com>
On power8 the presence or absence of SIPR depends on settings at runtime,
so convert to using a dynamic flag for NO_SIPR. Existing backends that
set NO_SIPR unconditionally set the dynamic flag obviously.
Signed-off-by: Michael Ellerman <michaele@au1.ibm.com>
---
arch/powerpc/perf/core-book3s.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 770f359..4255b12 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -137,6 +137,11 @@ static bool regs_use_siar(struct pt_regs *regs)
return !!(regs->result & 1);
}
+static bool regs_no_sipr(struct pt_regs *regs)
+{
+ return !!(regs->result & 2);
+}
+
static inline u32 perf_flags_from_msr(struct pt_regs *regs)
{
if (regs->msr & MSR_PR)
@@ -159,7 +164,7 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs)
* SIAR which should give slightly more reliable
* results
*/
- if (ppmu->flags & PPMU_NO_SIPR) {
+ if (regs_no_sipr(regs)) {
unsigned long siar = mfspr(SPRN_SIAR);
if (siar >= PAGE_OFFSET)
return PERF_RECORD_MISC_KERNEL;
@@ -189,6 +194,10 @@ static inline void perf_read_regs(struct pt_regs *regs)
int use_siar;
regs->dsisr = mmcra;
+ regs->result = 0;
+
+ if (ppmu->flags & PPMU_NO_SIPR)
+ regs->result |= 2;
/*
* If this isn't a PMU exception (eg a software event) the SIAR is
@@ -213,12 +222,12 @@ static inline void perf_read_regs(struct pt_regs *regs)
use_siar = 1;
else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
use_siar = 0;
- else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs))
+ else if (!regs_no_sipr(regs) && regs_sipr(regs))
use_siar = 0;
else
use_siar = 1;
- regs->result = use_siar;
+ regs->result |= use_siar;
}
/*
--
1.7.10.4
^ permalink raw reply related
* [PATCH 2/4] powerpc/perf: Add an accessor for regs->result
From: Michael Ellerman @ 2013-04-10 8:32 UTC (permalink / raw)
To: linuxppc-dev; +Cc: sukadev, Paul Mackerras
In-Reply-To: <1365582765-6939-1-git-send-email-michael@ellerman.id.au>
From: Michael Ellerman <michaele@au1.ibm.com>
Add an accessor for regs->result so we can use it to store more flags in
future.
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---
arch/powerpc/perf/core-book3s.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index cb1618d..770f359 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -132,6 +132,11 @@ static bool regs_sipr(struct pt_regs *regs)
return !!(regs->dsisr & sipr);
}
+static bool regs_use_siar(struct pt_regs *regs)
+{
+ return !!(regs->result & 1);
+}
+
static inline u32 perf_flags_from_msr(struct pt_regs *regs)
{
if (regs->msr & MSR_PR)
@@ -143,7 +148,7 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs)
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
- unsigned long use_siar = regs->result;
+ bool use_siar = regs_use_siar(regs);
if (!use_siar)
return perf_flags_from_msr(regs);
@@ -1413,7 +1418,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
*/
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
- unsigned long use_siar = regs->result;
+ bool use_siar = regs_use_siar(regs);
if (use_siar && siar_valid(regs))
return mfspr(SPRN_SIAR) + perf_ip_adjust(regs);
--
1.7.10.4
^ permalink raw reply related
* [PATCH 1/4] powerpc/perf: Convert mmcra_sipr/sihv() to regs_sipr/sihv()
From: Michael Ellerman @ 2013-04-10 8:32 UTC (permalink / raw)
To: linuxppc-dev; +Cc: sukadev, Paul Mackerras
From: Michael Ellerman <michaele@au1.ibm.com>
On power8 the SIPR and SIHV are not in MMCRA, so convert the routines
to take regs and change the names accordingly.
Signed-off-by: Michael Ellerman <michaele@au1.ibm.com>
---
arch/powerpc/perf/core-book3s.c | 20 +++++++++++---------
1 file changed, 11 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index fcfafa0..cb1618d 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -112,24 +112,24 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
*addrp = mfspr(SPRN_SDAR);
}
-static bool mmcra_sihv(unsigned long mmcra)
+static bool regs_sihv(struct pt_regs *regs)
{
unsigned long sihv = MMCRA_SIHV;
if (ppmu->flags & PPMU_ALT_SIPR)
sihv = POWER6_MMCRA_SIHV;
- return !!(mmcra & sihv);
+ return !!(regs->dsisr & sihv);
}
-static bool mmcra_sipr(unsigned long mmcra)
+static bool regs_sipr(struct pt_regs *regs)
{
unsigned long sipr = MMCRA_SIPR;
if (ppmu->flags & PPMU_ALT_SIPR)
sipr = POWER6_MMCRA_SIPR;
- return !!(mmcra & sipr);
+ return !!(regs->dsisr & sipr);
}
static inline u32 perf_flags_from_msr(struct pt_regs *regs)
@@ -143,7 +143,6 @@ static inline u32 perf_flags_from_msr(struct pt_regs *regs)
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
- unsigned long mmcra = regs->dsisr;
unsigned long use_siar = regs->result;
if (!use_siar)
@@ -163,10 +162,12 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs)
}
/* PR has priority over HV, so order below is important */
- if (mmcra_sipr(mmcra))
+ if (regs_sipr(regs))
return PERF_RECORD_MISC_USER;
- if (mmcra_sihv(mmcra) && (freeze_events_kernel != MMCR0_FCHV))
+
+ if (regs_sihv(regs) && (freeze_events_kernel != MMCR0_FCHV))
return PERF_RECORD_MISC_HYPERVISOR;
+
return PERF_RECORD_MISC_KERNEL;
}
@@ -182,6 +183,8 @@ static inline void perf_read_regs(struct pt_regs *regs)
int marked = mmcra & MMCRA_SAMPLE_ENABLE;
int use_siar;
+ regs->dsisr = mmcra;
+
/*
* If this isn't a PMU exception (eg a software event) the SIAR is
* not valid. Use pt_regs.
@@ -205,12 +208,11 @@ static inline void perf_read_regs(struct pt_regs *regs)
use_siar = 1;
else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING))
use_siar = 0;
- else if (!(ppmu->flags & PPMU_NO_SIPR) && mmcra_sipr(mmcra))
+ else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs))
use_siar = 0;
else
use_siar = 1;
- regs->dsisr = mmcra;
regs->result = use_siar;
}
--
1.7.10.4
^ permalink raw reply related
* [PATCH] powerpc: add a missing label in resume_kernel
From: Kevin Hao @ 2013-04-10 8:31 UTC (permalink / raw)
To: Tiejun Chen, Benjamin Herrenschmidt; +Cc: linuxppc
A label 0 was missed in the patch a9c4e541 (powerpc/kprobe: Complete
kprobe and migrate exception frame). This will cause the kernel
branch to an undetermined address if there really has a conflict when
updating the thread flags.
Signed-off-by: Kevin Hao <haokexin@gmail.com>
Cc: stable@vger.kernel.org
---
arch/powerpc/kernel/entry_64.S | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 256c5bf..ab079ed 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -657,7 +657,7 @@ resume_kernel:
/* Clear _TIF_EMULATE_STACK_STORE flag */
lis r11,_TIF_EMULATE_STACK_STORE@h
addi r5,r9,TI_FLAGS
- ldarx r4,0,r5
+0: ldarx r4,0,r5
andc r4,r4,r11
stdcx. r4,0,r5
bne- 0b
--
1.8.1.4
^ permalink raw reply related
* Re: [PATCH v2 2/11] Add PRRN Event Handler
From: Michael Ellerman @ 2013-04-10 8:30 UTC (permalink / raw)
To: Nathan Fontenot; +Cc: linuxppc-dev
In-Reply-To: <51509CF0.10200@linux.vnet.ibm.com>
On Mon, Mar 25, 2013 at 01:52:32PM -0500, Nathan Fontenot wrote:
> From: Jesse Larrew <jlarrew@linux.vnet.ibm.com>
>
> A PRRN event is signaled via the RTAS event-scan mechanism, which
> returns a Hot Plug Event message "fixed part" indicating "Platform
> Resource Reassignment". In response to the Hot Plug Event message,
> we must call ibm,update-nodes to determine which resources were
> reassigned and then ibm,update-properties to obtain the new affinity
> information about those resources.
..
> Index: powerpc/arch/powerpc/kernel/rtasd.c
> ===================================================================
> --- powerpc.orig/arch/powerpc/kernel/rtasd.c 2013-03-20 08:24:14.000000000 -0500
> +++ powerpc/arch/powerpc/kernel/rtasd.c 2013-03-20 08:52:08.000000000 -0500
> @@ -87,6 +87,8 @@
> return "Resource Deallocation Event";
> case RTAS_TYPE_DUMP:
> return "Dump Notification Event";
> + case RTAS_TYPE_PRRN:
> + return "Platform Resource Reassignment Event";
> }
>
> return rtas_type[0];
> @@ -265,7 +267,38 @@
> spin_unlock_irqrestore(&rtasd_log_lock, s);
> return;
> }
> +}
> +
> +static s32 update_scope;
> +
> +static void prrn_work_fn(struct work_struct *work)
> +{
> + /*
> + * For PRRN, we must pass the negative of the scope value in
> + * the RTAS event.
> + */
> + pseries_devicetree_update(-update_scope);
> +}
> +static DECLARE_WORK(prrn_work, prrn_work_fn);
This breaks the 32-bit build (ppc6xx_defconfig):
arch/powerpc/kernel/rtasd.c:280: undefined reference to `pseries_devicetree_update'
cheers
^ permalink raw reply
* Re: [PATCH -V5 08/25] powerpc: Decode the pte-lp-encoding bits correctly.
From: Aneesh Kumar K.V @ 2013-04-10 8:11 UTC (permalink / raw)
To: David Gibson; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <20130410071915.GI8165@truffula.fritz.box>
David Gibson <dwg@au1.ibm.com> writes:
> On Thu, Apr 04, 2013 at 11:27:46AM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>
>> We look at both the segment base page size and actual page size and store
>> the pte-lp-encodings in an array per base page size.
>>
>> We also update all relevant functions to take actual page size argument
>> so that we can use the correct PTE LP encoding in HPTE. This should also
>> get the basic Multiple Page Size per Segment (MPSS) support. This is needed
>> to enable THP on ppc64.
>>
....
>> +static inline int hpte_actual_psize(struct hash_pte *hptep, int psize)
>> +{
>> + int i, shift;
>> + unsigned int mask;
>> + /* Look at the 8 bit LP value */
>> + unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
>> +
>> + if (!(hptep->v & HPTE_V_VALID))
>> + return -1;
>
> Folding the validity check into the size check seems confusing to me.
We do end up with invalid hpte with which we call
hpte_actual_psize. So that check is needed. I can either move to caller,
but then i will have to replicate it in all the call sites.
>
>> + /* First check if it is large page */
>> + if (!(hptep->v & HPTE_V_LARGE))
>> + return MMU_PAGE_4K;
>> +
>> + /* start from 1 ignoring MMU_PAGE_4K */
>> + for (i = 1; i < MMU_PAGE_COUNT; i++) {
>> + /* valid entries have a shift value */
>> + if (!mmu_psize_defs[i].shift)
>> + continue;
>
> Isn't this check redundant with the one below?
Yes. I guess we can safely assume that if penc is valid then we do
support that specific large page.
I will drop this and keep the penc check. That is more correct check
>
>> + /* invalid penc */
>> + if (mmu_psize_defs[psize].penc[i] == -1)
>> + continue;
>> + /*
>> + * encoding bits per actual page size
>> + * PTE LP actual page size
>> + * rrrr rrrz >=8KB
>> + * rrrr rrzz >=16KB
>> + * rrrr rzzz >=32KB
>> + * rrrr zzzz >=64KB
>> + * .......
>> + */
>> + shift = mmu_psize_defs[i].shift - LP_SHIFT;
>> + if (shift > LP_BITS)
>> + shift = LP_BITS;
>> + mask = (1 << shift) - 1;
>> + if ((lp & mask) == mmu_psize_defs[psize].penc[i])
>> + return i;
>> + }
>
> Shouldn't we have a BUG() or something here. If we get here we've
> somehow created a PTE with LP bits we can't interpret, yes?
>
I don't know. Is BUG() the right thing to do ?
>> + return -1;
>> +}
>> +
>> static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
>> unsigned long vpn, int psize, int ssize,
>> int local)
>> @@ -251,6 +294,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
>> struct hash_pte *hptep = htab_address + slot;
>> unsigned long hpte_v, want_v;
>> int ret = 0;
>> + int actual_psize;
>>
>> want_v = hpte_encode_avpn(vpn, psize, ssize);
>>
>> @@ -260,9 +304,13 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
>> native_lock_hpte(hptep);
>>
>> hpte_v = hptep->v;
>> -
>> + actual_psize = hpte_actual_psize(hptep, psize);
>> + if (actual_psize < 0) {
>> + native_unlock_hpte(hptep);
>> + return -1;
>> + }
>
> Wouldn't it make more sense to only do the psize lookup once you've
> found a matching hpte?
But we need to do psize lookup even if V_COMPARE fail, because we want
to do tlbie in both the case.
>
>> /* Even if we miss, we need to invalidate the TLB */
>> - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
>> + if (!HPTE_V_COMPARE(hpte_v, want_v)) {
>> DBG_LOW(" -> miss\n");
>> ret = -1;
>> } else {
>> @@ -274,7 +322,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
>> native_unlock_hpte(hptep);
>>
>> /* Ensure it is out of the tlb too. */
>> - tlbie(vpn, psize, ssize, local);
>> + tlbie(vpn, psize, actual_psize, ssize, local);
>>
>> return ret;
>> }
>> @@ -315,6 +363,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
>> static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
>> int psize, int ssize)
>> {
>> + int actual_psize;
>> unsigned long vpn;
>> unsigned long vsid;
>> long slot;
>> @@ -327,13 +376,16 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
>> if (slot == -1)
>> panic("could not find page to bolt\n");
>> hptep = htab_address + slot;
>> + actual_psize = hpte_actual_psize(hptep, psize);
>> + if (actual_psize < 0)
>> + return;
>>
>> /* Update the HPTE */
>> hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
>> (newpp & (HPTE_R_PP | HPTE_R_N));
>>
>> /* Ensure it is out of the tlb too. */
>> - tlbie(vpn, psize, ssize, 0);
>> + tlbie(vpn, psize, actual_psize, ssize, 0);
>> }
>>
>> static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
>> @@ -343,6 +395,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
>> unsigned long hpte_v;
>> unsigned long want_v;
>> unsigned long flags;
>> + int actual_psize;
>>
>> local_irq_save(flags);
>>
>> @@ -352,35 +405,38 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
>> native_lock_hpte(hptep);
>> hpte_v = hptep->v;
>>
>> + actual_psize = hpte_actual_psize(hptep, psize);
>> + if (actual_psize < 0) {
>> + native_unlock_hpte(hptep);
>> + local_irq_restore(flags);
>> + return;
>> + }
>> /* Even if we miss, we need to invalidate the TLB */
>> - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
>> + if (!HPTE_V_COMPARE(hpte_v, want_v))
>> native_unlock_hpte(hptep);
>> else
>> /* Invalidate the hpte. NOTE: this also unlocks it */
>> hptep->v = 0;
>>
>> /* Invalidate the TLB */
>> - tlbie(vpn, psize, ssize, local);
>> + tlbie(vpn, psize, actual_psize, ssize, local);
>>
>> local_irq_restore(flags);
>> }
>>
>> -#define LP_SHIFT 12
>> -#define LP_BITS 8
>> -#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
>> -
>> static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
>> - int *psize, int *ssize, unsigned long *vpn)
>> + int *psize, int *apsize, int *ssize, unsigned long *vpn)
>> {
>> unsigned long avpn, pteg, vpi;
>> unsigned long hpte_r = hpte->r;
>> unsigned long hpte_v = hpte->v;
>> unsigned long vsid, seg_off;
>> - int i, size, shift, penc;
>> + int i, size, a_size, shift, penc;
>>
>> - if (!(hpte_v & HPTE_V_LARGE))
>> - size = MMU_PAGE_4K;
>> - else {
>> + if (!(hpte_v & HPTE_V_LARGE)) {
>> + size = MMU_PAGE_4K;
>> + a_size = MMU_PAGE_4K;
>> + } else {
>> for (i = 0; i < LP_BITS; i++) {
>> if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
>> break;
>> @@ -388,19 +444,26 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
>> penc = LP_MASK(i+1) >> LP_SHIFT;
>> for (size = 0; size < MMU_PAGE_COUNT; size++) {
>
>>
>> - /* 4K pages are not represented by LP */
>> - if (size == MMU_PAGE_4K)
>> - continue;
>> -
>> /* valid entries have a shift value */
>> if (!mmu_psize_defs[size].shift)
>> continue;
>> + for (a_size = 0; a_size < MMU_PAGE_COUNT; a_size++) {
>
> Can't you resize hpte_actual_psize() here instead of recoding the
> lookup?
I thought about that, but re-coding avoided some repeated check. But
then, if I follow your review comments of avoiding hpte valid check etc, may
be I can reuse the hpte_actual_psize. Will try this.
>
>> - if (penc == mmu_psize_defs[size].penc)
>> - break;
>> + /* 4K pages are not represented by LP */
>> + if (a_size == MMU_PAGE_4K)
>> + continue;
>> +
>> + /* valid entries have a shift value */
>> + if (!mmu_psize_defs[a_size].shift)
>> + continue;
>> +
>> + if (penc == mmu_psize_defs[size].penc[a_size])
>> + goto out;
>> + }
>> }
>> }
>>
>> +out:
-aneesh
^ permalink raw reply
* Re: [PATCH -V5 06/25] powerpc: Reduce PTE table memory wastage
From: Aneesh Kumar K.V @ 2013-04-10 7:54 UTC (permalink / raw)
To: Michael Ellerman; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <20130410071453.GB24786@concordia>
Michael Ellerman <michael@ellerman.id.au> writes:
> On Thu, Apr 04, 2013 at 11:27:44AM +0530, Aneesh Kumar K.V wrote:
>> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>>
>> We allocate one page for the last level of linux page table. With THP and
>> large page size of 16MB, that would mean we are wasting large part
>> of that page. To map 16MB area, we only need a PTE space of 2K with 64K
>> page size. This patch reduce the space wastage by sharing the page
>> allocated for the last level of linux page table with multiple pmd
>> entries. We call these smaller chunks PTE page fragments and allocated
>> page, PTE page.
>
> This is not compiling for me:
>
> arch/powerpc/mm/mmu_context_hash64.c:118:3: error: implicit declaration of function 'reset_page_mapcount'
>
can you share the .config ? I have the git tree at
git://github.com/kvaneesh/linux.git ppc64-thp-7
-aneesh
^ permalink raw reply
* Re: [PATCH -V5 06/25] powerpc: Reduce PTE table memory wastage
From: Aneesh Kumar K.V @ 2013-04-10 7:53 UTC (permalink / raw)
To: David Gibson; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <20130410070403.GH8165@truffula.fritz.box>
David Gibson <dwg@au1.ibm.com> writes:
> On Wed, Apr 10, 2013 at 11:59:29AM +0530, Aneesh Kumar K.V wrote:
>> David Gibson <dwg@au1.ibm.com> writes:
>> > On Thu, Apr 04, 2013 at 11:27:44AM +0530, Aneesh Kumar K.V wrote:
> [snip]
>> >> @@ -97,13 +100,45 @@ void __destroy_context(int context_id)
>> >> }
>> >> EXPORT_SYMBOL_GPL(__destroy_context);
>> >>
>> >> +#ifdef CONFIG_PPC_64K_PAGES
>> >> +static void destroy_pagetable_page(struct mm_struct *mm)
>> >> +{
>> >> + int count;
>> >> + struct page *page;
>> >> +
>> >> + page = mm->context.pgtable_page;
>> >> + if (!page)
>> >> + return;
>> >> +
>> >> + /* drop all the pending references */
>> >> + count = atomic_read(&page->_mapcount) + 1;
>> >> + /* We allow PTE_FRAG_NR(16) fragments from a PTE page */
>> >> + count = atomic_sub_return(16 - count, &page->_count);
>> >
>> > You should really move PTE_FRAG_NR to a header so you can actually use
>> > it here rather than hard coding 16.
>> >
>> > It took me a fair while to convince myself that there is no race here
>> > with something altering mapcount and count between the atomic_read()
>> > and the atomic_sub_return(). It could do with a comment to explain
>> > why that is safe.
>> >
>> > Re-using the mapcount field for your index also seems odd, and it took
>> > me a while to convince myself that that's safe too. Wouldn't it be
>> > simpler to store a pointer to the next sub-page in the mm_context
>> > instead? You can get from that to the struct page easily enough with a
>> > shift and pfn_to_page().
>>
>> I found using _mapcount simpler in this case. I was looking at it not
>> as an index, but rather how may fragments are mapped/used already.
>
> Except that it's actually (#fragments - 1). Using subpage pointer
> makes the fragments calculation (very slightly) harder, but the
> calculation of the table address easier. More importantly it avoids
> adding effectively an extra variable - which is then shoehorned into a
> structure not really designed to hold it.
Even with subpage pointer we would need mm->context.pgtable_page or
something similar. We don't add any other extra variable right ?. Let me
try what you are suggesting here and see if that make it simpler.
>> Using
>> subpage pointer in mm->context.xyz means, we have to calculate the
>> number of fragments used/mapped via the pointer. We need the fragment
>> count so that we can drop page reference count correctly here.
>>
>>
>> >
>> >> + if (!count) {
>> >> + pgtable_page_dtor(page);
>> >> + reset_page_mapcount(page);
>> >> + free_hot_cold_page(page, 0);
>> >
>> > It would be nice to use put_page() somehow instead of duplicating its
>> > logic, though I realise the sparc code you've based this on does the
>> > same thing.
>>
>> That is not exactly put_page. We can avoid lots of check in this
>> specific case.
>
> [snip]
>> >> +static pte_t *__alloc_for_cache(struct mm_struct *mm, int kernel)
>> >> +{
>> >> + pte_t *ret = NULL;
>> >> + struct page *page = alloc_page(GFP_KERNEL | __GFP_NOTRACK |
>> >> + __GFP_REPEAT | __GFP_ZERO);
>> >> + if (!page)
>> >> + return NULL;
>> >> +
>> >> + spin_lock(&mm->page_table_lock);
>> >> + /*
>> >> + * If we find pgtable_page set, we return
>> >> + * the allocated page with single fragement
>> >> + * count.
>> >> + */
>> >> + if (likely(!mm->context.pgtable_page)) {
>> >> + atomic_set(&page->_count, PTE_FRAG_NR);
>> >> + atomic_set(&page->_mapcount, 0);
>> >> + mm->context.pgtable_page = page;
>> >> + }
>> >
>> > .. and in the unlikely case where there *is* a pgtable_page already
>> > set, what then? Seems like you should BUG_ON, or at least return NULL
>> > - as it is you will return the first sub-page of that page again,
>> > which is very likely in use.
>>
>>
>> As explained in the comment above, we return with the allocated page
>> with fragment count set to 1. So we end up having only one fragment. The
>> other option I had was to to free the allocated page and do a
>> get_from_cache under the page_table_lock. But since we already allocated
>> the page, why not use that ?. It also keep the code similar to
>> sparc.
>
> My point is that I can't see any circumstance under which we should
> ever hit this case. Which means if we do something is badly messed up
> and we should BUG() (or at least WARN()).
A multi threaded test would easily hit that. stream is the test I used.
-aneesh
^ permalink raw reply
* [RESEND PATCH 4/4] Use vmap_area_list to get vmalloc_start for ppc64.
From: Atsushi Kumagai @ 2013-04-10 7:20 UTC (permalink / raw)
To: kexec; +Cc: linuxppc-dev
In-Reply-To: <20130410150524.804cd23b99a697f71146be67@mxc.nes.nec.co.jp>
From: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Date: Fri, 15 Mar 2013 19:34:30 +0900
Subject: [PATCH 4/4] Use vmap_area_list to get vmalloc_start for ppc64.
Try to get vmalloc_start value from vmap_area_list first for
newer ppc64 kernels.
Signed-off-by: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
---
arch/ppc64.c | 44 +++++++++++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 13 deletions(-)
diff --git a/arch/ppc64.c b/arch/ppc64.c
index afbaf55..c229ede 100644
--- a/arch/ppc64.c
+++ b/arch/ppc64.c
@@ -66,22 +66,40 @@ get_machdep_info_ppc64(void)
DEBUG_MSG("kernel_start : %lx\n", info->kernel_start);
/*
- * For the compatibility, makedumpfile should run without the symbol
- * vmlist and the offset of vm_struct.addr if they are not necessary.
+ * Get vmalloc_start value from either vmap_area_list or vmlist.
*/
- if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL)
- || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) {
+ if ((SYMBOL(vmap_area_list) != NOT_FOUND_SYMBOL)
+ && (OFFSET(vmap_area.va_start) != NOT_FOUND_STRUCTURE)
+ && (OFFSET(vmap_area.list) != NOT_FOUND_STRUCTURE)) {
+ if (!readmem(VADDR, SYMBOL(vmap_area_list) + OFFSET(list_head.next),
+ &vmap_area_list, sizeof(vmap_area_list))) {
+ ERRMSG("Can't get vmap_area_list.\n");
+ return FALSE;
+ }
+ if (!readmem(VADDR, vmap_area_list - OFFSET(vmap_area.list) +
+ OFFSET(vmap_area.va_start), &vmalloc_start,
+ sizeof(vmalloc_start))) {
+ ERRMSG("Can't get vmalloc_start.\n");
+ return FALSE;
+ }
+ } else if ((SYMBOL(vmlist) != NOT_FOUND_SYMBOL)
+ && (OFFSET(vm_struct.addr) != NOT_FOUND_STRUCTURE)) {
+ if (!readmem(VADDR, SYMBOL(vmlist), &vmlist, sizeof(vmlist))) {
+ ERRMSG("Can't get vmlist.\n");
+ return FALSE;
+ }
+ if (!readmem(VADDR, vmlist + OFFSET(vm_struct.addr), &vmalloc_start,
+ sizeof(vmalloc_start))) {
+ ERRMSG("Can't get vmalloc_start.\n");
+ return FALSE;
+ }
+ } else {
+ /*
+ * For the compatibility, makedumpfile should run without the symbol
+ * vmlist and the offset of vm_struct.addr if they are not necessary.
+ */
return TRUE;
}
- if (!readmem(VADDR, SYMBOL(vmlist), &vmlist, sizeof(vmlist))) {
- ERRMSG("Can't get vmlist.\n");
- return FALSE;
- }
- if (!readmem(VADDR, vmlist + OFFSET(vm_struct.addr), &vmalloc_start,
- sizeof(vmalloc_start))) {
- ERRMSG("Can't get vmalloc_start.\n");
- return FALSE;
- }
info->vmalloc_start = vmalloc_start;
DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start);
--
1.8.0.2
^ permalink raw reply related
* [PATCH 8/8] Read common partition via pstore
From: Aruna Balakrishnaiah @ 2013-04-10 7:24 UTC (permalink / raw)
To: linuxppc-dev, paulus, linux-kernel, benh; +Cc: jkenisto, mahesh, anton
In-Reply-To: <20130410071835.20150.56489.stgit@aruna-ThinkPad-T420>
This patch exploits pstore infrastructure to read the details
from NVRAM's common partition.
Signed-off-by: Aruna Balakrishnaiah <aruna@linux.vnet.ibm.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
arch/powerpc/platforms/pseries/nvram.c | 17 ++++++++++++++++-
fs/pstore/inode.c | 3 +++
include/linux/pstore.h | 1 +
3 files changed, 20 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index b65a670..542dc7e 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -84,6 +84,12 @@ static struct nvram_os_partition of_config_partition = {
.index = -1,
.os_partition = false
};
+
+static struct nvram_os_partition common_partition = {
+ .name = "common",
+ .index = -1,
+ .os_partition = false
+};
#endif
struct oops_log_info {
@@ -157,6 +163,7 @@ static enum pstore_type_id nvram_type_ids[] = {
PSTORE_TYPE_DMESG,
PSTORE_TYPE_RTAS,
PSTORE_TYPE_OF,
+ PSTORE_TYPE_COMMON,
-1
};
static int read_type;
@@ -770,7 +777,7 @@ static int nvram_pstore_write(enum pstore_type_id type,
}
/*
- * Reads the oops/panic report, rtas-log and of-config partition.
+ * Reads the oops/panic report, rtas-log, of-config and common partition.
* Returns the length of the data we read from each partition.
* Returns 0 if we've been called before.
*/
@@ -806,6 +813,14 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
time->tv_sec = 0;
time->tv_nsec = 0;
break;
+ case PSTORE_TYPE_COMMON:
+ sig = NVRAM_SIG_SYS;
+ part = &common_partition;
+ *type = PSTORE_TYPE_COMMON;
+ *id = PSTORE_TYPE_COMMON;
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+ break;
default:
return 0;
}
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index c3d1846..11cae64 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -330,6 +330,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
case PSTORE_TYPE_OF:
sprintf(name, "of-%s-%lld", psname, id);
break;
+ case PSTORE_TYPE_COMMON:
+ sprintf(name, "common-%s-%lld", psname, id);
+ break;
case PSTORE_TYPE_UNKNOWN:
sprintf(name, "unknown-%s-%lld", psname, id);
break;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index a23d7d2..08224c2 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -38,6 +38,7 @@ enum pstore_type_id {
/* PPC64 partition types */
PSTORE_TYPE_RTAS = 10,
PSTORE_TYPE_OF = 11,
+ PSTORE_TYPE_COMMON = 12,
PSTORE_TYPE_UNKNOWN = 255
};
^ permalink raw reply related
* [PATCH 7/8] Read of-config partition via pstore
From: Aruna Balakrishnaiah @ 2013-04-10 7:24 UTC (permalink / raw)
To: linuxppc-dev, paulus, linux-kernel, benh; +Cc: jkenisto, mahesh, anton
In-Reply-To: <20130410071835.20150.56489.stgit@aruna-ThinkPad-T420>
This patch exploits pstore infrastructure to read the details
from NVRAM's of-config partition.
Signed-off-by: Aruna Balakrishnaiah <aruna@linux.vnet.ibm.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
arch/powerpc/platforms/pseries/nvram.c | 58 ++++++++++++++++++++++++++------
fs/pstore/inode.c | 3 ++
include/linux/pstore.h | 1 +
3 files changed, 52 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 6a3a7cd..b65a670 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -78,6 +78,14 @@ static const char *pseries_nvram_os_partitions[] = {
NULL
};
+#ifdef CONFIG_PSTORE
+static struct nvram_os_partition of_config_partition = {
+ .name = "of-config",
+ .index = -1,
+ .os_partition = false
+};
+#endif
+
struct oops_log_info {
u16 version;
u16 report_length;
@@ -148,6 +156,7 @@ static size_t oops_data_sz;
static enum pstore_type_id nvram_type_ids[] = {
PSTORE_TYPE_DMESG,
PSTORE_TYPE_RTAS,
+ PSTORE_TYPE_OF,
-1
};
static int read_type;
@@ -350,11 +359,15 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff,
tmp_index = part->index;
- rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
- if (rc <= 0) {
- printk(KERN_ERR "nvram_read_partition: "
- "Failed nvram_read (%d)\n", rc);
- return rc;
+ if (part->os_partition) {
+ rc = ppc_md.nvram_read((char *)&info,
+ sizeof(struct err_log_info),
+ &tmp_index);
+ if (rc <= 0) {
+ printk(KERN_ERR "nvram_read_partition: "
+ "Failed nvram_read (%d)\n", rc);
+ return rc;
+ }
}
rc = ppc_md.nvram_read(buff, length, &tmp_index);
@@ -364,8 +377,10 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff,
return rc;
}
- *error_log_cnt = info.seq_num;
- *err_type = info.error_type;
+ if (part->os_partition) {
+ *error_log_cnt = info.seq_num;
+ *err_type = info.error_type;
+ }
return 0;
}
@@ -755,7 +770,7 @@ static int nvram_pstore_write(enum pstore_type_id type,
}
/*
- * Reads the oops/panic report and ibm,rtas-log partition.
+ * Reads the oops/panic report, rtas-log and of-config partition.
* Returns the length of the data we read from each partition.
* Returns 0 if we've been called before.
*/
@@ -764,9 +779,11 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
struct pstore_info *psi)
{
struct oops_log_info *oops_hdr;
- unsigned int err_type, id_no;
+ unsigned int err_type, id_no, size = 0;
struct nvram_os_partition *part = NULL;
char *buff = NULL;
+ int sig = 0;
+ loff_t p;
read_type++;
@@ -781,10 +798,29 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
time->tv_sec = last_rtas_event;
time->tv_nsec = 0;
break;
+ case PSTORE_TYPE_OF:
+ sig = NVRAM_SIG_OF;
+ part = &of_config_partition;
+ *type = PSTORE_TYPE_OF;
+ *id = PSTORE_TYPE_OF;
+ time->tv_sec = 0;
+ time->tv_nsec = 0;
+ break;
default:
return 0;
}
+ if (!part->os_partition) {
+ p = nvram_find_partition(part->name, sig, &size);
+ if (p <= 0) {
+ pr_err("nvram: Failed to find partition %s, "
+ "err %d\n", part->name, (int)p);
+ return 0;
+ }
+ part->index = p;
+ part->size = size;
+ }
+
buff = kmalloc(part->size, GFP_KERNEL);
if (!buff)
@@ -796,7 +832,9 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
}
*count = 0;
- *id = id_no;
+
+ if (part->os_partition)
+ *id = id_no;
if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
oops_hdr = (struct oops_log_info *)buff;
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 59b1454..c3d1846 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -327,6 +327,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
case PSTORE_TYPE_RTAS:
sprintf(name, "rtas-%s-%lld", psname, id);
break;
+ case PSTORE_TYPE_OF:
+ sprintf(name, "of-%s-%lld", psname, id);
+ break;
case PSTORE_TYPE_UNKNOWN:
sprintf(name, "unknown-%s-%lld", psname, id);
break;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 4eb94c9..a23d7d2 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -37,6 +37,7 @@ enum pstore_type_id {
PSTORE_TYPE_FTRACE = 3,
/* PPC64 partition types */
PSTORE_TYPE_RTAS = 10,
+ PSTORE_TYPE_OF = 11,
PSTORE_TYPE_UNKNOWN = 255
};
^ permalink raw reply related
* [PATCH 6/8] Distinguish between a os-partition and non-os partition
From: Aruna Balakrishnaiah @ 2013-04-10 7:23 UTC (permalink / raw)
To: linuxppc-dev, paulus, linux-kernel, benh; +Cc: jkenisto, mahesh, anton
In-Reply-To: <20130410071835.20150.56489.stgit@aruna-ThinkPad-T420>
Introduce os_partition member in nvram_os_partition structure
to identify if the partition is an os partition or not. This
will be useful to handle non-os partitions of-config and
common in subsequent patches.
Signed-off-by: Aruna Balakrishnaiah <aruna@linux.vnet.ibm.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
arch/powerpc/platforms/pseries/nvram.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index d420b1d..6a3a7cd 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -53,20 +53,23 @@ struct nvram_os_partition {
int min_size; /* minimum acceptable size (0 means req_size) */
long size; /* size of data portion (excluding err_log_info) */
long index; /* offset of data portion of partition */
+ bool os_partition; /* partition initialized by OS, not FW */
};
static struct nvram_os_partition rtas_log_partition = {
.name = "ibm,rtas-log",
.req_size = 2079,
.min_size = 1055,
- .index = -1
+ .index = -1,
+ .os_partition = true
};
static struct nvram_os_partition oops_log_partition = {
.name = "lnx,oops-log",
.req_size = 4000,
.min_size = 2000,
- .index = -1
+ .index = -1,
+ .os_partition = true
};
static const char *pseries_nvram_os_partitions[] = {
^ permalink raw reply related
* [PATCH 5/8] Read rtas partition via pstore
From: Aruna Balakrishnaiah @ 2013-04-10 7:23 UTC (permalink / raw)
To: linuxppc-dev, paulus, linux-kernel, benh; +Cc: jkenisto, mahesh, anton
In-Reply-To: <20130410071835.20150.56489.stgit@aruna-ThinkPad-T420>
This patch exploits pstore infrastructure to read the details
from NVRAM's rtas partition.
Signed-off-by: Aruna Balakrishnaiah <aruna@linux.vnet.ibm.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
arch/powerpc/platforms/pseries/nvram.c | 33 +++++++++++++++++++++++++-------
fs/pstore/inode.c | 3 +++
include/linux/pstore.h | 2 ++
3 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 82d32a2..d420b1d 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -144,9 +144,11 @@ static size_t oops_data_sz;
#ifdef CONFIG_PSTORE
static enum pstore_type_id nvram_type_ids[] = {
PSTORE_TYPE_DMESG,
+ PSTORE_TYPE_RTAS,
-1
};
static int read_type;
+static unsigned long last_rtas_event;
#endif
/* Compression parameters */
#define COMPR_LEVEL 6
@@ -315,8 +317,13 @@ int nvram_write_error_log(char * buff, int length,
{
int rc = nvram_write_os_partition(&rtas_log_partition, buff, length,
err_type, error_log_cnt);
- if (!rc)
+ if (!rc) {
last_unread_rtas_event = get_seconds();
+#ifdef CONFIG_PSTORE
+ last_rtas_event = get_seconds();
+#endif
+ }
+
return rc;
}
@@ -745,7 +752,7 @@ static int nvram_pstore_write(enum pstore_type_id type,
}
/*
- * Reads the oops/panic report.
+ * Reads the oops/panic report and ibm,rtas-log partition.
* Returns the length of the data we read from each partition.
* Returns 0 if we've been called before.
*/
@@ -765,6 +772,12 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
part = &oops_log_partition;
*type = PSTORE_TYPE_DMESG;
break;
+ case PSTORE_TYPE_RTAS:
+ part = &rtas_log_partition;
+ *type = PSTORE_TYPE_RTAS;
+ time->tv_sec = last_rtas_event;
+ time->tv_nsec = 0;
+ break;
default:
return 0;
}
@@ -781,11 +794,17 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
*count = 0;
*id = id_no;
- oops_hdr = (struct oops_log_info *)buff;
- *buf = buff + sizeof(*oops_hdr);
- time->tv_sec = oops_hdr->timestamp;
- time->tv_nsec = 0;
- return oops_hdr->report_length;
+
+ if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) {
+ oops_hdr = (struct oops_log_info *)buff;
+ *buf = buff + sizeof(*oops_hdr);
+ time->tv_sec = oops_hdr->timestamp;
+ time->tv_nsec = 0;
+ return oops_hdr->report_length;
+ }
+
+ *buf = buff;
+ return part->size;
}
#else
static int nvram_pstore_open(struct pstore_info *psi)
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index e4bcb2c..59b1454 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -324,6 +324,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
case PSTORE_TYPE_MCE:
sprintf(name, "mce-%s-%lld", psname, id);
break;
+ case PSTORE_TYPE_RTAS:
+ sprintf(name, "rtas-%s-%lld", psname, id);
+ break;
case PSTORE_TYPE_UNKNOWN:
sprintf(name, "unknown-%s-%lld", psname, id);
break;
diff --git a/include/linux/pstore.h b/include/linux/pstore.h
index 75d0176..4eb94c9 100644
--- a/include/linux/pstore.h
+++ b/include/linux/pstore.h
@@ -35,6 +35,8 @@ enum pstore_type_id {
PSTORE_TYPE_MCE = 1,
PSTORE_TYPE_CONSOLE = 2,
PSTORE_TYPE_FTRACE = 3,
+ /* PPC64 partition types */
+ PSTORE_TYPE_RTAS = 10,
PSTORE_TYPE_UNKNOWN = 255
};
^ permalink raw reply related
* [PATCH 4/8] Read/Write oops nvram partition via pstore
From: Aruna Balakrishnaiah @ 2013-04-10 7:23 UTC (permalink / raw)
To: linuxppc-dev, paulus, linux-kernel, benh; +Cc: jkenisto, mahesh, anton
In-Reply-To: <20130410071835.20150.56489.stgit@aruna-ThinkPad-T420>
This patch exploits pstore infrastructure in power systems.
IBM's system p machines provide persistent storage for LPARs
through NVRAM. NVRAM's lnx,oops-log partition is used to log
oops messages. In case pstore registration fails it will
fall back to kmsg_dump mechanism.
This patch will read/write the oops messages from/to this
partition via pstore.
Signed-off-by: Jim Keniston <jkenisto@us.ibm.com>
Signed-off-by: Aruna Balakrishnaiah <aruna@linux.vnet.ibm.com>
---
arch/powerpc/platforms/pseries/nvram.c | 145 ++++++++++++++++++++++++++++++++
1 file changed, 145 insertions(+)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 6701b71..82d32a2 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -18,6 +18,7 @@
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/kmsg_dump.h>
+#include <linux/pstore.h>
#include <linux/ctype.h>
#include <linux/zlib.h>
#include <asm/uaccess.h>
@@ -87,6 +88,25 @@ static struct kmsg_dumper nvram_kmsg_dumper = {
.dump = oops_to_nvram
};
+static int nvram_pstore_open(struct pstore_info *psi);
+
+static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
+ int *count, struct timespec *time, char **buf,
+ struct pstore_info *psi);
+
+static int nvram_pstore_write(enum pstore_type_id type,
+ enum kmsg_dump_reason reason, u64 *id,
+ unsigned int part, int count, size_t size,
+ struct pstore_info *psi);
+
+static struct pstore_info nvram_pstore_info = {
+ .owner = THIS_MODULE,
+ .name = "nvram",
+ .open = nvram_pstore_open,
+ .read = nvram_pstore_read,
+ .write = nvram_pstore_write,
+};
+
/* See clobbering_unread_rtas_event() */
#define NVRAM_RTAS_READ_TIMEOUT 5 /* seconds */
static unsigned long last_unread_rtas_event; /* timestamp */
@@ -121,6 +141,13 @@ static char *big_oops_buf, *oops_buf;
static char *oops_data;
static size_t oops_data_sz;
+#ifdef CONFIG_PSTORE
+static enum pstore_type_id nvram_type_ids[] = {
+ PSTORE_TYPE_DMESG,
+ -1
+};
+static int read_type;
+#endif
/* Compression parameters */
#define COMPR_LEVEL 6
#define WINDOW_BITS 12
@@ -455,6 +482,23 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
oops_data = oops_buf + sizeof(struct oops_log_info);
oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
+ nvram_pstore_info.buf = oops_data;
+ nvram_pstore_info.bufsize = oops_data_sz;
+
+ rc = pstore_register(&nvram_pstore_info);
+
+ if (rc != 0) {
+ pr_err("nvram: pstore_register() failed, defaults to "
+ "kmsg_dump; returned %d\n", rc);
+ goto kmsg_dump;
+ } else {
+ /*TODO: Support compression when pstore is configured */
+ pr_info("nvram: Compression of oops text supported only when "
+ "pstore is not configured");
+ return;
+ }
+
+kmsg_dump:
/*
* Figure compression (preceded by elimination of each line's <n>
* severity prefix) will reduce the oops/panic report to at most
@@ -663,3 +707,104 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
spin_unlock_irqrestore(&lock, flags);
}
+
+#ifdef CONFIG_PSTORE
+static int nvram_pstore_open(struct pstore_info *psi)
+{
+ read_type = -1;
+ return 0;
+}
+
+/*
+ * Called by pstore_dump() when an oops or panic report is logged to the printk
+ * buffer. @size bytes have been written to oops_buf, starting after the
+ * oops_log_info header.
+ */
+static int nvram_pstore_write(enum pstore_type_id type,
+ enum kmsg_dump_reason reason,
+ u64 *id, unsigned int part, int count,
+ size_t size, struct pstore_info *psi)
+{
+ struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf;
+
+ /* part 1 has the recent messages from printk buffer */
+ if (part > 1 || clobbering_unread_rtas_event())
+ return -1;
+
+ BUG_ON(type != PSTORE_TYPE_DMESG);
+ BUG_ON(sizeof(*oops_hdr) + size > oops_log_partition.size);
+ oops_hdr->version = OOPS_HDR_VERSION;
+ oops_hdr->report_length = (u16) size;
+ oops_hdr->timestamp = get_seconds();
+ (void) nvram_write_os_partition(&oops_log_partition, oops_buf,
+ (int) (sizeof(*oops_hdr) + size), ERR_TYPE_KERNEL_PANIC,
+ count);
+ *id = part;
+
+ return 0;
+}
+
+/*
+ * Reads the oops/panic report.
+ * Returns the length of the data we read from each partition.
+ * Returns 0 if we've been called before.
+ */
+static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
+ int *count, struct timespec *time, char **buf,
+ struct pstore_info *psi)
+{
+ struct oops_log_info *oops_hdr;
+ unsigned int err_type, id_no;
+ struct nvram_os_partition *part = NULL;
+ char *buff = NULL;
+
+ read_type++;
+
+ switch (nvram_type_ids[read_type]) {
+ case PSTORE_TYPE_DMESG:
+ part = &oops_log_partition;
+ *type = PSTORE_TYPE_DMESG;
+ break;
+ default:
+ return 0;
+ }
+
+ buff = kmalloc(part->size, GFP_KERNEL);
+
+ if (!buff)
+ return -ENOMEM;
+
+ if (nvram_read_partition(part, buff, part->size, &err_type, &id_no)) {
+ kfree(buff);
+ return 0;
+ }
+
+ *count = 0;
+ *id = id_no;
+ oops_hdr = (struct oops_log_info *)buff;
+ *buf = buff + sizeof(*oops_hdr);
+ time->tv_sec = oops_hdr->timestamp;
+ time->tv_nsec = 0;
+ return oops_hdr->report_length;
+}
+#else
+static int nvram_pstore_open(struct pstore_info *psi)
+{
+ return 0;
+}
+
+static int nvram_pstore_write(enum pstore_type_id type,
+ enum kmsg_dump_reason reason, u64 *id,
+ unsigned int part, int count, size_t size,
+ struct pstore_info *psi)
+{
+ return 0;
+}
+
+static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type,
+ int *count, struct timespec *time, char **buf,
+ struct pstore_info *psi)
+{
+ return 0;
+}
+#endif
^ permalink raw reply related
* [PATCH 3/8] Introduce generic read function to read nvram-partitions
From: Aruna Balakrishnaiah @ 2013-04-10 7:21 UTC (permalink / raw)
To: linuxppc-dev, paulus, linux-kernel, benh; +Cc: jkenisto, mahesh, anton
In-Reply-To: <20130410071835.20150.56489.stgit@aruna-ThinkPad-T420>
Introduce generic read function to read nvram partitions other than rtas.
nvram_read_error_log will be retained which is used to read rtas partition
from rtasd. nvram_read_partition is the generic read function to read from
any nvram partition.
Signed-off-by: Aruna Balakrishnaiah <aruna@linux.vnet.ibm.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
arch/powerpc/platforms/pseries/nvram.c | 34 +++++++++++++++++++++++---------
1 file changed, 24 insertions(+), 10 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 742735a..6701b71 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -293,34 +293,37 @@ int nvram_write_error_log(char * buff, int length,
return rc;
}
-/* nvram_read_error_log
+/* nvram_read_partition
*
- * Reads nvram for error log for at most 'length'
+ * Reads nvram partition for at most 'length'
*/
-int nvram_read_error_log(char * buff, int length,
- unsigned int * err_type, unsigned int * error_log_cnt)
+int nvram_read_partition(struct nvram_os_partition *part, char *buff,
+ int length, unsigned int *err_type,
+ unsigned int *error_log_cnt)
{
int rc;
loff_t tmp_index;
struct err_log_info info;
- if (rtas_log_partition.index == -1)
+ if (part->index == -1)
return -1;
- if (length > rtas_log_partition.size)
- length = rtas_log_partition.size;
+ if (length > part->size)
+ length = part->size;
- tmp_index = rtas_log_partition.index;
+ tmp_index = part->index;
rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index);
if (rc <= 0) {
- printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ printk(KERN_ERR "nvram_read_partition: "
+ "Failed nvram_read (%d)\n", rc);
return rc;
}
rc = ppc_md.nvram_read(buff, length, &tmp_index);
if (rc <= 0) {
- printk(KERN_ERR "nvram_read_error_log: Failed nvram_read (%d)\n", rc);
+ printk(KERN_ERR "nvram_read_partition: "
+ "Failed nvram_read (%d)\n", rc);
return rc;
}
@@ -330,6 +333,17 @@ int nvram_read_error_log(char * buff, int length,
return 0;
}
+/* nvram_read_error_log
+ *
+ * Reads nvram for error log for at most 'length'
+ */
+int nvram_read_error_log(char *buff, int length,
+ unsigned int *err_type, unsigned int *error_log_cnt)
+{
+ return nvram_read_partition(&rtas_log_partition, buff, length,
+ err_type, error_log_cnt);
+}
+
/* This doesn't actually zero anything, but it sets the event_logged
* word to tell that this event is safely in syslog.
*/
^ permalink raw reply related
* [PATCH 2/8] Add version and timestamp to oops header
From: Aruna Balakrishnaiah @ 2013-04-10 7:21 UTC (permalink / raw)
To: linuxppc-dev, paulus, linux-kernel, benh; +Cc: jkenisto, mahesh, anton
In-Reply-To: <20130410071835.20150.56489.stgit@aruna-ThinkPad-T420>
Introduce version and timestamp information in the oops header.
oops_log_info (oops header) holds version (to distinguish between old
and new format oops header), length of the oops text
(compressed or uncompressed) and timestamp.
Signed-off-by: Aruna Balakrishnaiah <aruna@linux.vnet.ibm.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
arch/powerpc/platforms/pseries/nvram.c | 57 +++++++++++++++++++++-----------
1 file changed, 38 insertions(+), 19 deletions(-)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index e54a8b7..742735a 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -29,6 +29,13 @@
/* Max bytes to read/write in one go */
#define NVRW_CNT 0x20
+/*
+ * Set oops header version to distingush between old and new format header.
+ * lnx,oops-log partition max size is 4000, header version > 4000 will
+ * help in identifying new header.
+ */
+#define OOPS_HDR_VERSION 5000
+
static unsigned int nvram_size;
static int nvram_fetch, nvram_store;
static char nvram_buf[NVRW_CNT]; /* assume this is in the first 4GB */
@@ -67,6 +74,12 @@ static const char *pseries_nvram_os_partitions[] = {
NULL
};
+struct oops_log_info {
+ u16 version;
+ u16 report_length;
+ u64 timestamp;
+} __attribute__((packed));
+
static void oops_to_nvram(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason);
@@ -83,28 +96,28 @@ static unsigned long last_unread_rtas_event; /* timestamp */
* big_oops_buf[] holds the uncompressed text we're capturing.
*
- * oops_buf[] holds the compressed text, preceded by a prefix.
- * The prefix is just a u16 holding the length of the compressed* text.
- * (*Or uncompressed, if compression fails.) oops_buf[] gets written
- * to NVRAM.
+ * oops_buf[] holds the compressed text, preceded by a oops header.
+ * oops header has u16 holding the version of oops header (to differentiate
+ * between old and new format header) followed by u16 holding the length of
+ * the compressed* text (*Or uncompressed, if compression fails.) and u64
+ * holding the timestamp. oops_buf[] gets written to NVRAM.
*
- * oops_len points to the prefix. oops_data points to the compressed text.
+ * oops_log_info points to the header. oops_data points to the compressed text.
*
* +- oops_buf
- * | +- oops_data
- * v v
- * +------------+-----------------------------------------------+
- * | length | text |
- * | (2 bytes) | (oops_data_sz bytes) |
- * +------------+-----------------------------------------------+
+ * | +- oops_data
+ * v v
+ * +-----------+-----------+-----------+------------------------+
+ * | version | length | timestamp | text |
+ * | (2 bytes) | (2 bytes) | (8 bytes) | (oops_data_sz bytes) |
+ * +-----------+-----------+-----------+------------------------+
* ^
- * +- oops_len
+ * +- oops_log_info
*
* We preallocate these buffers during init to avoid kmalloc during oops/panic.
*/
static size_t big_oops_buf_sz;
static char *big_oops_buf, *oops_buf;
-static u16 *oops_len;
static char *oops_data;
static size_t oops_data_sz;
@@ -425,9 +438,8 @@ static void __init nvram_init_oops_partition(int rtas_partition_exists)
oops_log_partition.name);
return;
}
- oops_len = (u16*) oops_buf;
- oops_data = oops_buf + sizeof(u16);
- oops_data_sz = oops_log_partition.size - sizeof(u16);
+ oops_data = oops_buf + sizeof(struct oops_log_info);
+ oops_data_sz = oops_log_partition.size - sizeof(struct oops_log_info);
/*
* Figure compression (preceded by elimination of each line's <n>
@@ -555,6 +567,7 @@ error:
/* Compress the text from big_oops_buf into oops_buf. */
static int zip_oops(size_t text_len)
{
+ struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
int zipped_len = nvram_compress(big_oops_buf, oops_data, text_len,
oops_data_sz);
if (zipped_len < 0) {
@@ -562,7 +575,9 @@ static int zip_oops(size_t text_len)
pr_err("nvram: logging uncompressed oops/panic report\n");
return -1;
}
- *oops_len = (u16) zipped_len;
+ oops_hdr->version = OOPS_HDR_VERSION;
+ oops_hdr->report_length = (u16) zipped_len;
+ oops_hdr->timestamp = get_seconds();
return 0;
}
@@ -576,6 +591,7 @@ static int zip_oops(size_t text_len)
static void oops_to_nvram(struct kmsg_dumper *dumper,
enum kmsg_dump_reason reason)
{
+ struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf;
static unsigned int oops_count = 0;
static bool panicking = false;
static DEFINE_SPINLOCK(lock);
@@ -622,11 +638,14 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
kmsg_dump_get_buffer(dumper, false,
oops_data, oops_data_sz, &text_len);
err_type = ERR_TYPE_KERNEL_PANIC;
- *oops_len = (u16) text_len;
+ oops_hdr->version = OOPS_HDR_VERSION;
+ oops_hdr->report_length = (u16) text_len;
+ oops_hdr->timestamp = get_seconds();
}
(void) nvram_write_os_partition(&oops_log_partition, oops_buf,
- (int) (sizeof(*oops_len) + *oops_len), err_type, ++oops_count);
+ (int) (sizeof(*oops_hdr) + oops_hdr->report_length), err_type,
+ ++oops_count);
spin_unlock_irqrestore(&lock, flags);
}
^ permalink raw reply related
* Re: [PATCH -V5 19/25] powerpc/THP: Differentiate THP PMD entries from HUGETLB PMD entries
From: Michael Ellerman @ 2013-04-10 7:21 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <1365055083-31956-20-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
On Thu, Apr 04, 2013 at 11:27:57AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>
> HUGETLB clear the top bit of PMD entries and use that to indicate
> a HUGETLB page directory. Since we store pfns in PMDs for THP,
> we would have the top bit cleared by default. Add the top bit mask
> for THP PMD entries and clear that when we are looking for pmd_pfn.
>
> @@ -44,6 +44,14 @@ struct mm_struct;
> #define PMD_HUGE_RPN_SHIFT PTE_RPN_SHIFT
> #define HUGE_PAGE_SIZE (ASM_CONST(1) << 24)
> #define HUGE_PAGE_MASK (~(HUGE_PAGE_SIZE - 1))
> +/*
> + * HugeTLB looks at the top bit of the Linux page table entries to
> + * decide whether it is a huge page directory or not. Mark HUGE
> + * PMD to differentiate
> + */
> +#define PMD_HUGE_NOT_HUGETLB (ASM_CONST(1) << 63)
> +#define PMD_ISHUGE (_PMD_ISHUGE | PMD_HUGE_NOT_HUGETLB)
> +#define PMD_HUGE_PROTBITS (0xfff | PMD_HUGE_NOT_HUGETLB)
>
> #ifndef __ASSEMBLY__
> extern void hpte_need_hugepage_flush(struct mm_struct *mm, unsigned long addr,
> @@ -84,7 +93,8 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
> /*
> * Only called for hugepage pmd
> */
> - return pmd_val(pmd) >> PMD_HUGE_RPN_SHIFT;
> + unsigned long val = pmd_val(pmd) & ~PMD_HUGE_PROTBITS;
> + return val >> PMD_HUGE_RPN_SHIFT;
> }
This is breaking the 32-bit build for me (pmac32_defconfig):
arch/powerpc/include/asm/pgtable.h:123:2: error: left shift count >= width of type [-Werror]
cheers
^ permalink raw reply
* [PATCH 1/8] Remove syslog prefix in uncompressed oops text
From: Aruna Balakrishnaiah @ 2013-04-10 7:21 UTC (permalink / raw)
To: linuxppc-dev, paulus, linux-kernel, benh; +Cc: jkenisto, mahesh, anton
In-Reply-To: <20130410071835.20150.56489.stgit@aruna-ThinkPad-T420>
Removal of syslog prefix in the uncompressed oops text will
help in capturing more oops data.
Signed-off-by: Aruna Balakrishnaiah <aruna@linux.vnet.ibm.com>
Reviewed-by: Jim Keniston <jkenisto@us.ibm.com>
---
arch/powerpc/platforms/pseries/nvram.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 8733a86..e54a8b7 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -619,7 +619,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
}
if (rc != 0) {
kmsg_dump_rewind(dumper);
- kmsg_dump_get_buffer(dumper, true,
+ kmsg_dump_get_buffer(dumper, false,
oops_data, oops_data_sz, &text_len);
err_type = ERR_TYPE_KERNEL_PANIC;
*oops_len = (u16) text_len;
^ permalink raw reply related
* [PATCH 0/8] Nvram-to-pstore
From: Aruna Balakrishnaiah @ 2013-04-10 7:20 UTC (permalink / raw)
To: linuxppc-dev, paulus, linux-kernel, benh; +Cc: jkenisto, mahesh, anton
Currently the kernel provides the contents of p-series NVRAM only as a
simple stream of bytes via /dev/nvram, which must be interpreted in user
space by the nvram command in the powerpc-utils package. This patch set
exploits the pstore subsystem to expose each partition in NVRAM as a
separate file in /dev/pstore. For instance Oops messages will stored in a
file named [dmesg-nvram-2].
---
Aruna Balakrishnaiah (8):
Remove syslog prefix in uncompressed oops text
Add version and timestamp to oops header
Introduce generic read function to read nvram-partitions
Read/Write oops nvram partition via pstore
Read rtas partition via pstore
Distinguish between a os-partition and non-os partition
Read of-config partition via pstore
Read common partition via pstore
arch/powerpc/platforms/pseries/nvram.c | 329 ++++++++++++++++++++++++++++----
fs/pstore/inode.c | 9 +
include/linux/pstore.h | 4
3 files changed, 304 insertions(+), 38 deletions(-)
--
^ permalink raw reply
* Re: [PATCH -V5 08/25] powerpc: Decode the pte-lp-encoding bits correctly.
From: David Gibson @ 2013-04-10 7:19 UTC (permalink / raw)
To: Aneesh Kumar K.V; +Cc: paulus, linuxppc-dev, linux-mm
In-Reply-To: <1365055083-31956-9-git-send-email-aneesh.kumar@linux.vnet.ibm.com>
[-- Attachment #1: Type: text/plain, Size: 31290 bytes --]
On Thu, Apr 04, 2013 at 11:27:46AM +0530, Aneesh Kumar K.V wrote:
> From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
>
> We look at both the segment base page size and actual page size and store
> the pte-lp-encodings in an array per base page size.
>
> We also update all relevant functions to take actual page size argument
> so that we can use the correct PTE LP encoding in HPTE. This should also
> get the basic Multiple Page Size per Segment (MPSS) support. This is needed
> to enable THP on ppc64.
>
> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
> ---
> arch/powerpc/include/asm/machdep.h | 3 +-
> arch/powerpc/include/asm/mmu-hash64.h | 33 ++++----
> arch/powerpc/kvm/book3s_hv.c | 2 +-
> arch/powerpc/mm/hash_low_64.S | 18 ++--
> arch/powerpc/mm/hash_native_64.c | 138 ++++++++++++++++++++++---------
> arch/powerpc/mm/hash_utils_64.c | 121 +++++++++++++++++----------
> arch/powerpc/mm/hugetlbpage-hash64.c | 4 +-
> arch/powerpc/platforms/cell/beat_htab.c | 16 ++--
> arch/powerpc/platforms/ps3/htab.c | 6 +-
> arch/powerpc/platforms/pseries/lpar.c | 6 +-
> 10 files changed, 230 insertions(+), 117 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
> index 19d9d96..6cee6e0 100644
> --- a/arch/powerpc/include/asm/machdep.h
> +++ b/arch/powerpc/include/asm/machdep.h
> @@ -50,7 +50,8 @@ struct machdep_calls {
> unsigned long prpn,
> unsigned long rflags,
> unsigned long vflags,
> - int psize, int ssize);
> + int psize, int apsize,
> + int ssize);
> long (*hpte_remove)(unsigned long hpte_group);
> void (*hpte_removebolted)(unsigned long ea,
> int psize, int ssize);
> diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
> index 300ac3c..e42f4a3 100644
> --- a/arch/powerpc/include/asm/mmu-hash64.h
> +++ b/arch/powerpc/include/asm/mmu-hash64.h
> @@ -154,7 +154,7 @@ extern unsigned long htab_hash_mask;
> struct mmu_psize_def
> {
> unsigned int shift; /* number of bits */
> - unsigned int penc; /* HPTE encoding */
> + int penc[MMU_PAGE_COUNT]; /* HPTE encoding */
> unsigned int tlbiel; /* tlbiel supported for that page size */
> unsigned long avpnm; /* bits to mask out in AVPN in the HPTE */
> unsigned long sllp; /* SLB L||LP (exact mask to use in slbmte) */
> @@ -181,6 +181,13 @@ struct mmu_psize_def
> */
> #define VPN_SHIFT 12
>
> +/*
> + * HPTE Large Page (LP) details
> + */
> +#define LP_SHIFT 12
> +#define LP_BITS 8
> +#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
> +
> #ifndef __ASSEMBLY__
>
> static inline int segment_shift(int ssize)
> @@ -237,14 +244,14 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
>
> /*
> * This function sets the AVPN and L fields of the HPTE appropriately
> - * for the page size
> + * using the base page size and actual page size.
> */
> -static inline unsigned long hpte_encode_v(unsigned long vpn,
> - int psize, int ssize)
> +static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
> + int actual_psize, int ssize)
> {
> unsigned long v;
> - v = hpte_encode_avpn(vpn, psize, ssize);
> - if (psize != MMU_PAGE_4K)
> + v = hpte_encode_avpn(vpn, base_psize, ssize);
> + if (actual_psize != MMU_PAGE_4K)
> v |= HPTE_V_LARGE;
> return v;
> }
> @@ -254,19 +261,17 @@ static inline unsigned long hpte_encode_v(unsigned long vpn,
> * for the page size. We assume the pa is already "clean" that is properly
> * aligned for the requested page size
> */
> -static inline unsigned long hpte_encode_r(unsigned long pa, int psize)
> +static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
> + int actual_psize)
> {
> - unsigned long r;
> -
> /* A 4K page needs no special encoding */
> - if (psize == MMU_PAGE_4K)
> + if (actual_psize == MMU_PAGE_4K)
> return pa & HPTE_R_RPN;
> else {
> - unsigned int penc = mmu_psize_defs[psize].penc;
> - unsigned int shift = mmu_psize_defs[psize].shift;
> - return (pa & ~((1ul << shift) - 1)) | (penc << 12);
> + unsigned int penc = mmu_psize_defs[base_psize].penc[actual_psize];
> + unsigned int shift = mmu_psize_defs[actual_psize].shift;
> + return (pa & ~((1ul << shift) - 1)) | (penc << LP_SHIFT);
> }
> - return r;
> }
>
> /*
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 71d0c90..48f6d99 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1515,7 +1515,7 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
> (*sps)->page_shift = def->shift;
> (*sps)->slb_enc = def->sllp;
> (*sps)->enc[0].page_shift = def->shift;
> - (*sps)->enc[0].pte_enc = def->penc;
> + (*sps)->enc[0].pte_enc = def->penc[linux_psize];
> (*sps)++;
> }
>
> diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
> index abdd5e2..0e980ac 100644
> --- a/arch/powerpc/mm/hash_low_64.S
> +++ b/arch/powerpc/mm/hash_low_64.S
> @@ -196,7 +196,8 @@ htab_insert_pte:
> mr r4,r29 /* Retrieve vpn */
> li r7,0 /* !bolted, !secondary */
> li r8,MMU_PAGE_4K /* page size */
> - ld r9,STK_PARAM(R9)(r1) /* segment size */
> + li r9,MMU_PAGE_4K /* actual page size */
> + ld r10,STK_PARAM(R9)(r1) /* segment size */
> _GLOBAL(htab_call_hpte_insert1)
> bl . /* Patched by htab_finish_init() */
> cmpdi 0,r3,0
> @@ -219,7 +220,8 @@ _GLOBAL(htab_call_hpte_insert1)
> mr r4,r29 /* Retrieve vpn */
> li r7,HPTE_V_SECONDARY /* !bolted, secondary */
> li r8,MMU_PAGE_4K /* page size */
> - ld r9,STK_PARAM(R9)(r1) /* segment size */
> + li r9,MMU_PAGE_4K /* actual page size */
> + ld r10,STK_PARAM(R9)(r1) /* segment size */
> _GLOBAL(htab_call_hpte_insert2)
> bl . /* Patched by htab_finish_init() */
> cmpdi 0,r3,0
> @@ -515,7 +517,8 @@ htab_special_pfn:
> mr r4,r29 /* Retrieve vpn */
> li r7,0 /* !bolted, !secondary */
> li r8,MMU_PAGE_4K /* page size */
> - ld r9,STK_PARAM(R9)(r1) /* segment size */
> + li r9,MMU_PAGE_4K /* actual page size */
> + ld r10,STK_PARAM(R9)(r1) /* segment size */
> _GLOBAL(htab_call_hpte_insert1)
> bl . /* patched by htab_finish_init() */
> cmpdi 0,r3,0
> @@ -542,7 +545,8 @@ _GLOBAL(htab_call_hpte_insert1)
> mr r4,r29 /* Retrieve vpn */
> li r7,HPTE_V_SECONDARY /* !bolted, secondary */
> li r8,MMU_PAGE_4K /* page size */
> - ld r9,STK_PARAM(R9)(r1) /* segment size */
> + li r9,MMU_PAGE_4K /* actual page size */
> + ld r10,STK_PARAM(R9)(r1) /* segment size */
> _GLOBAL(htab_call_hpte_insert2)
> bl . /* patched by htab_finish_init() */
> cmpdi 0,r3,0
> @@ -840,7 +844,8 @@ ht64_insert_pte:
> mr r4,r29 /* Retrieve vpn */
> li r7,0 /* !bolted, !secondary */
> li r8,MMU_PAGE_64K
> - ld r9,STK_PARAM(R9)(r1) /* segment size */
> + li r9,MMU_PAGE_64K /* actual page size */
> + ld r10,STK_PARAM(R9)(r1) /* segment size */
> _GLOBAL(ht64_call_hpte_insert1)
> bl . /* patched by htab_finish_init() */
> cmpdi 0,r3,0
> @@ -863,7 +868,8 @@ _GLOBAL(ht64_call_hpte_insert1)
> mr r4,r29 /* Retrieve vpn */
> li r7,HPTE_V_SECONDARY /* !bolted, secondary */
> li r8,MMU_PAGE_64K
> - ld r9,STK_PARAM(R9)(r1) /* segment size */
> + li r9,MMU_PAGE_64K /* actual page size */
> + ld r10,STK_PARAM(R9)(r1) /* segment size */
> _GLOBAL(ht64_call_hpte_insert2)
> bl . /* patched by htab_finish_init() */
> cmpdi 0,r3,0
> diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
> index 9d8983a..aa0499b 100644
> --- a/arch/powerpc/mm/hash_native_64.c
> +++ b/arch/powerpc/mm/hash_native_64.c
> @@ -39,7 +39,7 @@
>
> DEFINE_RAW_SPINLOCK(native_tlbie_lock);
>
> -static inline void __tlbie(unsigned long vpn, int psize, int ssize)
> +static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize)
> {
> unsigned long va;
> unsigned int penc;
> @@ -68,7 +68,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int ssize)
> break;
> default:
> /* We need 14 to 14 + i bits of va */
> - penc = mmu_psize_defs[psize].penc;
> + penc = mmu_psize_defs[psize].penc[apsize];
> va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
> va |= penc << 12;
> va |= ssize << 8;
> @@ -80,7 +80,7 @@ static inline void __tlbie(unsigned long vpn, int psize, int ssize)
> }
> }
>
> -static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
> +static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize)
> {
> unsigned long va;
> unsigned int penc;
> @@ -102,7 +102,7 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
> break;
> default:
> /* We need 14 to 14 + i bits of va */
> - penc = mmu_psize_defs[psize].penc;
> + penc = mmu_psize_defs[psize].penc[apsize];
> va &= ~((1ul << mmu_psize_defs[psize].shift) - 1);
> va |= penc << 12;
> va |= ssize << 8;
> @@ -114,7 +114,8 @@ static inline void __tlbiel(unsigned long vpn, int psize, int ssize)
>
> }
>
> -static inline void tlbie(unsigned long vpn, int psize, int ssize, int local)
> +static inline void tlbie(unsigned long vpn, int psize, int apsize,
> + int ssize, int local)
> {
> unsigned int use_local = local && mmu_has_feature(MMU_FTR_TLBIEL);
> int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
> @@ -125,10 +126,10 @@ static inline void tlbie(unsigned long vpn, int psize, int ssize, int local)
> raw_spin_lock(&native_tlbie_lock);
> asm volatile("ptesync": : :"memory");
> if (use_local) {
> - __tlbiel(vpn, psize, ssize);
> + __tlbiel(vpn, psize, apsize, ssize);
> asm volatile("ptesync": : :"memory");
> } else {
> - __tlbie(vpn, psize, ssize);
> + __tlbie(vpn, psize, apsize, ssize);
> asm volatile("eieio; tlbsync; ptesync": : :"memory");
> }
> if (lock_tlbie && !use_local)
> @@ -156,7 +157,7 @@ static inline void native_unlock_hpte(struct hash_pte *hptep)
>
> static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
> unsigned long pa, unsigned long rflags,
> - unsigned long vflags, int psize, int ssize)
> + unsigned long vflags, int psize, int apsize, int ssize)
> {
> struct hash_pte *hptep = htab_address + hpte_group;
> unsigned long hpte_v, hpte_r;
> @@ -183,8 +184,8 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
> if (i == HPTES_PER_GROUP)
> return -1;
>
> - hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
> - hpte_r = hpte_encode_r(pa, psize) | rflags;
> + hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
> + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
>
> if (!(vflags & HPTE_V_BOLTED)) {
> DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
> @@ -244,6 +245,48 @@ static long native_hpte_remove(unsigned long hpte_group)
> return i;
> }
>
> +static inline int hpte_actual_psize(struct hash_pte *hptep, int psize)
> +{
> + int i, shift;
> + unsigned int mask;
> + /* Look at the 8 bit LP value */
> + unsigned int lp = (hptep->r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
> +
> + if (!(hptep->v & HPTE_V_VALID))
> + return -1;
Folding the validity check into the size check seems confusing to me.
> + /* First check if it is large page */
> + if (!(hptep->v & HPTE_V_LARGE))
> + return MMU_PAGE_4K;
> +
> + /* start from 1 ignoring MMU_PAGE_4K */
> + for (i = 1; i < MMU_PAGE_COUNT; i++) {
> + /* valid entries have a shift value */
> + if (!mmu_psize_defs[i].shift)
> + continue;
Isn't this check redundant with the one below?
> + /* invalid penc */
> + if (mmu_psize_defs[psize].penc[i] == -1)
> + continue;
> + /*
> + * encoding bits per actual page size
> + * PTE LP actual page size
> + * rrrr rrrz >=8KB
> + * rrrr rrzz >=16KB
> + * rrrr rzzz >=32KB
> + * rrrr zzzz >=64KB
> + * .......
> + */
> + shift = mmu_psize_defs[i].shift - LP_SHIFT;
> + if (shift > LP_BITS)
> + shift = LP_BITS;
> + mask = (1 << shift) - 1;
> + if ((lp & mask) == mmu_psize_defs[psize].penc[i])
> + return i;
> + }
Shouldn't we have a BUG() or something here. If we get here we've
somehow created a PTE with LP bits we can't interpret, yes?
> + return -1;
> +}
> +
> static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
> unsigned long vpn, int psize, int ssize,
> int local)
> @@ -251,6 +294,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
> struct hash_pte *hptep = htab_address + slot;
> unsigned long hpte_v, want_v;
> int ret = 0;
> + int actual_psize;
>
> want_v = hpte_encode_avpn(vpn, psize, ssize);
>
> @@ -260,9 +304,13 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
> native_lock_hpte(hptep);
>
> hpte_v = hptep->v;
> -
> + actual_psize = hpte_actual_psize(hptep, psize);
> + if (actual_psize < 0) {
> + native_unlock_hpte(hptep);
> + return -1;
> + }
Wouldn't it make more sense to only do the psize lookup once you've
found a matching hpte?
> /* Even if we miss, we need to invalidate the TLB */
> - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
> + if (!HPTE_V_COMPARE(hpte_v, want_v)) {
> DBG_LOW(" -> miss\n");
> ret = -1;
> } else {
> @@ -274,7 +322,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
> native_unlock_hpte(hptep);
>
> /* Ensure it is out of the tlb too. */
> - tlbie(vpn, psize, ssize, local);
> + tlbie(vpn, psize, actual_psize, ssize, local);
>
> return ret;
> }
> @@ -315,6 +363,7 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
> static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
> int psize, int ssize)
> {
> + int actual_psize;
> unsigned long vpn;
> unsigned long vsid;
> long slot;
> @@ -327,13 +376,16 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea,
> if (slot == -1)
> panic("could not find page to bolt\n");
> hptep = htab_address + slot;
> + actual_psize = hpte_actual_psize(hptep, psize);
> + if (actual_psize < 0)
> + return;
>
> /* Update the HPTE */
> hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
> (newpp & (HPTE_R_PP | HPTE_R_N));
>
> /* Ensure it is out of the tlb too. */
> - tlbie(vpn, psize, ssize, 0);
> + tlbie(vpn, psize, actual_psize, ssize, 0);
> }
>
> static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
> @@ -343,6 +395,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
> unsigned long hpte_v;
> unsigned long want_v;
> unsigned long flags;
> + int actual_psize;
>
> local_irq_save(flags);
>
> @@ -352,35 +405,38 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
> native_lock_hpte(hptep);
> hpte_v = hptep->v;
>
> + actual_psize = hpte_actual_psize(hptep, psize);
> + if (actual_psize < 0) {
> + native_unlock_hpte(hptep);
> + local_irq_restore(flags);
> + return;
> + }
> /* Even if we miss, we need to invalidate the TLB */
> - if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
> + if (!HPTE_V_COMPARE(hpte_v, want_v))
> native_unlock_hpte(hptep);
> else
> /* Invalidate the hpte. NOTE: this also unlocks it */
> hptep->v = 0;
>
> /* Invalidate the TLB */
> - tlbie(vpn, psize, ssize, local);
> + tlbie(vpn, psize, actual_psize, ssize, local);
>
> local_irq_restore(flags);
> }
>
> -#define LP_SHIFT 12
> -#define LP_BITS 8
> -#define LP_MASK(i) ((0xFF >> (i)) << LP_SHIFT)
> -
> static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
> - int *psize, int *ssize, unsigned long *vpn)
> + int *psize, int *apsize, int *ssize, unsigned long *vpn)
> {
> unsigned long avpn, pteg, vpi;
> unsigned long hpte_r = hpte->r;
> unsigned long hpte_v = hpte->v;
> unsigned long vsid, seg_off;
> - int i, size, shift, penc;
> + int i, size, a_size, shift, penc;
>
> - if (!(hpte_v & HPTE_V_LARGE))
> - size = MMU_PAGE_4K;
> - else {
> + if (!(hpte_v & HPTE_V_LARGE)) {
> + size = MMU_PAGE_4K;
> + a_size = MMU_PAGE_4K;
> + } else {
> for (i = 0; i < LP_BITS; i++) {
> if ((hpte_r & LP_MASK(i+1)) == LP_MASK(i+1))
> break;
> @@ -388,19 +444,26 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
> penc = LP_MASK(i+1) >> LP_SHIFT;
> for (size = 0; size < MMU_PAGE_COUNT; size++) {
>
> - /* 4K pages are not represented by LP */
> - if (size == MMU_PAGE_4K)
> - continue;
> -
> /* valid entries have a shift value */
> if (!mmu_psize_defs[size].shift)
> continue;
> + for (a_size = 0; a_size < MMU_PAGE_COUNT; a_size++) {
Can't you resize hpte_actual_psize() here instead of recoding the lookup?
> - if (penc == mmu_psize_defs[size].penc)
> - break;
> + /* 4K pages are not represented by LP */
> + if (a_size == MMU_PAGE_4K)
> + continue;
> +
> + /* valid entries have a shift value */
> + if (!mmu_psize_defs[a_size].shift)
> + continue;
> +
> + if (penc == mmu_psize_defs[size].penc[a_size])
> + goto out;
> + }
> }
> }
>
> +out:
> /* This works for all page sizes, and for 256M and 1T segments */
> *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
> shift = mmu_psize_defs[size].shift;
> @@ -433,7 +496,8 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
> default:
> *vpn = size = 0;
> }
> - *psize = size;
> + *psize = size;
> + *apsize = a_size;
> }
>
> /*
> @@ -451,7 +515,7 @@ static void native_hpte_clear(void)
> struct hash_pte *hptep = htab_address;
> unsigned long hpte_v;
> unsigned long pteg_count;
> - int psize, ssize;
> + int psize, apsize, ssize;
>
> pteg_count = htab_hash_mask + 1;
>
> @@ -477,9 +541,9 @@ static void native_hpte_clear(void)
> * already hold the native_tlbie_lock.
> */
> if (hpte_v & HPTE_V_VALID) {
> - hpte_decode(hptep, slot, &psize, &ssize, &vpn);
> + hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn);
> hptep->v = 0;
> - __tlbie(vpn, psize, ssize);
> + __tlbie(vpn, psize, apsize, ssize);
> }
> }
>
> @@ -540,7 +604,7 @@ static void native_flush_hash_range(unsigned long number, int local)
>
> pte_iterate_hashed_subpages(pte, psize,
> vpn, index, shift) {
> - __tlbiel(vpn, psize, ssize);
> + __tlbiel(vpn, psize, psize, ssize);
> } pte_iterate_hashed_end();
> }
> asm volatile("ptesync":::"memory");
> @@ -557,7 +621,7 @@ static void native_flush_hash_range(unsigned long number, int local)
>
> pte_iterate_hashed_subpages(pte, psize,
> vpn, index, shift) {
> - __tlbie(vpn, psize, ssize);
> + __tlbie(vpn, psize, psize, ssize);
> } pte_iterate_hashed_end();
> }
> asm volatile("eieio; tlbsync; ptesync":::"memory");
> diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
> index bfeab83..a5a5067 100644
> --- a/arch/powerpc/mm/hash_utils_64.c
> +++ b/arch/powerpc/mm/hash_utils_64.c
> @@ -125,7 +125,7 @@ static struct mmu_psize_def mmu_psize_defaults_old[] = {
> [MMU_PAGE_4K] = {
> .shift = 12,
> .sllp = 0,
> - .penc = 0,
> + .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
> .avpnm = 0,
> .tlbiel = 0,
> },
> @@ -139,14 +139,15 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
> [MMU_PAGE_4K] = {
> .shift = 12,
> .sllp = 0,
> - .penc = 0,
> + .penc = {[MMU_PAGE_4K] = 0, [1 ... MMU_PAGE_COUNT - 1] = -1},
> .avpnm = 0,
> .tlbiel = 1,
> },
> [MMU_PAGE_16M] = {
> .shift = 24,
> .sllp = SLB_VSID_L,
> - .penc = 0,
> + .penc = {[0 ... MMU_PAGE_16M - 1] = -1, [MMU_PAGE_16M] = 0,
> + [MMU_PAGE_16M + 1 ... MMU_PAGE_COUNT - 1] = -1 },
> .avpnm = 0x1UL,
> .tlbiel = 0,
> },
> @@ -208,7 +209,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
>
> BUG_ON(!ppc_md.hpte_insert);
> ret = ppc_md.hpte_insert(hpteg, vpn, paddr, tprot,
> - HPTE_V_BOLTED, psize, ssize);
> + HPTE_V_BOLTED, psize, psize, ssize);
>
> if (ret < 0)
> break;
> @@ -275,6 +276,30 @@ static void __init htab_init_seg_sizes(void)
> of_scan_flat_dt(htab_dt_scan_seg_sizes, NULL);
> }
>
> +static int __init get_idx_from_shift(unsigned int shift)
> +{
> + int idx = -1;
> +
> + switch (shift) {
> + case 0xc:
> + idx = MMU_PAGE_4K;
> + break;
> + case 0x10:
> + idx = MMU_PAGE_64K;
> + break;
> + case 0x14:
> + idx = MMU_PAGE_1M;
> + break;
> + case 0x18:
> + idx = MMU_PAGE_16M;
> + break;
> + case 0x22:
> + idx = MMU_PAGE_16G;
> + break;
> + }
> + return idx;
> +}
> +
> static int __init htab_dt_scan_page_sizes(unsigned long node,
> const char *uname, int depth,
> void *data)
> @@ -294,60 +319,61 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
> size /= 4;
> cur_cpu_spec->mmu_features &= ~(MMU_FTR_16M_PAGE);
> while(size > 0) {
> - unsigned int shift = prop[0];
> + unsigned int base_shift = prop[0];
> unsigned int slbenc = prop[1];
> unsigned int lpnum = prop[2];
> - unsigned int lpenc = 0;
> struct mmu_psize_def *def;
> - int idx = -1;
> + int idx, base_idx;
>
> size -= 3; prop += 3;
> - while(size > 0 && lpnum) {
> - if (prop[0] == shift)
> - lpenc = prop[1];
> - prop += 2; size -= 2;
> - lpnum--;
> + base_idx = get_idx_from_shift(base_shift);
> + if (base_idx < 0) {
> + /*
> + * skip the pte encoding also
> + */
> + prop += lpnum * 2; size -= lpnum * 2;
> + continue;
> }
> - switch(shift) {
> - case 0xc:
> - idx = MMU_PAGE_4K;
> - break;
> - case 0x10:
> - idx = MMU_PAGE_64K;
> - break;
> - case 0x14:
> - idx = MMU_PAGE_1M;
> - break;
> - case 0x18:
> - idx = MMU_PAGE_16M;
> + def = &mmu_psize_defs[base_idx];
> + if (base_idx == MMU_PAGE_16M)
> cur_cpu_spec->mmu_features |= MMU_FTR_16M_PAGE;
> - break;
> - case 0x22:
> - idx = MMU_PAGE_16G;
> - break;
> - }
> - if (idx < 0)
> - continue;
> - def = &mmu_psize_defs[idx];
> - def->shift = shift;
> - if (shift <= 23)
> +
> + def->shift = base_shift;
> + if (base_shift <= 23)
> def->avpnm = 0;
> else
> - def->avpnm = (1 << (shift - 23)) - 1;
> + def->avpnm = (1 << (base_shift - 23)) - 1;
> def->sllp = slbenc;
> - def->penc = lpenc;
> - /* We don't know for sure what's up with tlbiel, so
> + /*
> + * We don't know for sure what's up with tlbiel, so
> * for now we only set it for 4K and 64K pages
> */
> - if (idx == MMU_PAGE_4K || idx == MMU_PAGE_64K)
> + if (base_idx == MMU_PAGE_4K || base_idx == MMU_PAGE_64K)
> def->tlbiel = 1;
> else
> def->tlbiel = 0;
>
> - DBG(" %d: shift=%02x, sllp=%04lx, avpnm=%08lx, "
> - "tlbiel=%d, penc=%d\n",
> - idx, shift, def->sllp, def->avpnm, def->tlbiel,
> - def->penc);
> + while (size > 0 && lpnum) {
> + unsigned int shift = prop[0];
> + int penc = prop[1];
> +
> + prop += 2; size -= 2;
> + lpnum--;
> +
> + idx = get_idx_from_shift(shift);
> + if (idx < 0)
> + continue;
> +
> + if (penc == -1)
> + pr_err("Invalid penc for base_shift=%d "
> + "shift=%d\n", base_shift, shift);
> +
> + def->penc[idx] = penc;
> + DBG(" %d: shift=%02x, sllp=%04lx, "
> + "avpnm=%08lx, tlbiel=%d, penc=%d\n",
> + idx, shift, def->sllp, def->avpnm,
> + def->tlbiel, def->penc[idx]);
> + }
> }
> return 1;
> }
> @@ -396,10 +422,21 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
> }
> #endif /* CONFIG_HUGETLB_PAGE */
>
> +static void mmu_psize_set_default_penc(void)
> +{
> + int bpsize, apsize;
> + for (bpsize = 0; bpsize < MMU_PAGE_COUNT; bpsize++)
> + for (apsize = 0; apsize < MMU_PAGE_COUNT; apsize++)
> + mmu_psize_defs[bpsize].penc[apsize] = -1;
> +}
> +
> static void __init htab_init_page_sizes(void)
> {
> int rc;
>
> + /* se the invalid penc to -1 */
> + mmu_psize_set_default_penc();
> +
> /* Default to 4K pages only */
> memcpy(mmu_psize_defs, mmu_psize_defaults_old,
> sizeof(mmu_psize_defaults_old));
> diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
> index cecad34..e0d52ee 100644
> --- a/arch/powerpc/mm/hugetlbpage-hash64.c
> +++ b/arch/powerpc/mm/hugetlbpage-hash64.c
> @@ -103,7 +103,7 @@ repeat:
>
> /* Insert into the hash table, primary slot */
> slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
> - mmu_psize, ssize);
> + mmu_psize, mmu_psize, ssize);
>
> /* Primary is full, try the secondary */
> if (unlikely(slot == -1)) {
> @@ -111,7 +111,7 @@ repeat:
> HPTES_PER_GROUP) & ~0x7UL;
> slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags,
> HPTE_V_SECONDARY,
> - mmu_psize, ssize);
> + mmu_psize, mmu_psize, ssize);
> if (slot == -1) {
> if (mftb() & 0x1)
> hpte_group = ((hash & htab_hash_mask) *
> diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c
> index 472f9a7..246e1d8 100644
> --- a/arch/powerpc/platforms/cell/beat_htab.c
> +++ b/arch/powerpc/platforms/cell/beat_htab.c
> @@ -90,7 +90,7 @@ static inline unsigned int beat_read_mask(unsigned hpte_group)
> static long beat_lpar_hpte_insert(unsigned long hpte_group,
> unsigned long vpn, unsigned long pa,
> unsigned long rflags, unsigned long vflags,
> - int psize, int ssize)
> + int psize, int apsize, int ssize)
> {
> unsigned long lpar_rc;
> u64 hpte_v, hpte_r, slot;
> @@ -103,9 +103,9 @@ static long beat_lpar_hpte_insert(unsigned long hpte_group,
> "rflags=%lx, vflags=%lx, psize=%d)\n",
> hpte_group, va, pa, rflags, vflags, psize);
>
> - hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) |
> + hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
> vflags | HPTE_V_VALID;
> - hpte_r = hpte_encode_r(pa, psize) | rflags;
> + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
>
> if (!(vflags & HPTE_V_BOLTED))
> DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
> @@ -314,7 +314,7 @@ void __init hpte_init_beat(void)
> static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
> unsigned long vpn, unsigned long pa,
> unsigned long rflags, unsigned long vflags,
> - int psize, int ssize)
> + int psize, int apsize, int ssize)
> {
> unsigned long lpar_rc;
> u64 hpte_v, hpte_r, slot;
> @@ -327,9 +327,9 @@ static long beat_lpar_hpte_insert_v3(unsigned long hpte_group,
> "rflags=%lx, vflags=%lx, psize=%d)\n",
> hpte_group, vpn, pa, rflags, vflags, psize);
>
> - hpte_v = hpte_encode_v(vpn, psize, MMU_SEGSIZE_256M) |
> + hpte_v = hpte_encode_v(vpn, psize, apsize, MMU_SEGSIZE_256M) |
> vflags | HPTE_V_VALID;
> - hpte_r = hpte_encode_r(pa, psize) | rflags;
> + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
>
> if (!(vflags & HPTE_V_BOLTED))
> DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
> @@ -373,7 +373,7 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot,
> unsigned long pss;
>
> want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
> - pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
> + pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
>
> DBG_LOW(" update: "
> "avpnv=%016lx, slot=%016lx, psize: %d, newpp %016lx ... ",
> @@ -403,7 +403,7 @@ static void beat_lpar_hpte_invalidate_v3(unsigned long slot, unsigned long vpn,
> DBG_LOW(" inval : slot=%lx, vpn=%016lx, psize: %d, local: %d\n",
> slot, vpn, psize, local);
> want_v = hpte_encode_avpn(vpn, psize, MMU_SEGSIZE_256M);
> - pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc;
> + pss = (psize == MMU_PAGE_4K) ? -1UL : mmu_psize_defs[psize].penc[psize];
>
> lpar_rc = beat_invalidate_htab_entry3(0, slot, want_v, pss);
>
> diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
> index 07a4bba..44f06d2 100644
> --- a/arch/powerpc/platforms/ps3/htab.c
> +++ b/arch/powerpc/platforms/ps3/htab.c
> @@ -45,7 +45,7 @@ static DEFINE_SPINLOCK(ps3_htab_lock);
>
> static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
> unsigned long pa, unsigned long rflags, unsigned long vflags,
> - int psize, int ssize)
> + int psize, int apsize, int ssize)
> {
> int result;
> u64 hpte_v, hpte_r;
> @@ -61,8 +61,8 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
> */
> vflags &= ~HPTE_V_SECONDARY;
>
> - hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
> - hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize) | rflags;
> + hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
> + hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
>
> spin_lock_irqsave(&ps3_htab_lock, flags);
>
> diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
> index a77c35b..3daced3 100644
> --- a/arch/powerpc/platforms/pseries/lpar.c
> +++ b/arch/powerpc/platforms/pseries/lpar.c
> @@ -109,7 +109,7 @@ void vpa_init(int cpu)
> static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
> unsigned long vpn, unsigned long pa,
> unsigned long rflags, unsigned long vflags,
> - int psize, int ssize)
> + int psize, int apsize, int ssize)
> {
> unsigned long lpar_rc;
> unsigned long flags;
> @@ -121,8 +121,8 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
> "pa=%016lx, rflags=%lx, vflags=%lx, psize=%d)\n",
> hpte_group, vpn, pa, rflags, vflags, psize);
>
> - hpte_v = hpte_encode_v(vpn, psize, ssize) | vflags | HPTE_V_VALID;
> - hpte_r = hpte_encode_r(pa, psize) | rflags;
> + hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
> + hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
>
> if (!(vflags & HPTE_V_BOLTED))
> pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 198 bytes --]
^ permalink raw reply
* [RESEND PATCH 3/4] Use vmap_area_list to get vmalloc_start for ppc32.
From: Atsushi Kumagai @ 2013-04-10 7:10 UTC (permalink / raw)
To: kexec; +Cc: linuxppc-dev
In-Reply-To: <20130410150524.804cd23b99a697f71146be67@mxc.nes.nec.co.jp>
From: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Date: Fri, 15 Mar 2013 19:30:23 +0900
Subject: [PATCH 3/4] Use vmap_area_list to get vmalloc_start for ppc32.
Try to get vmalloc_start value from vmap_area_list first for
newer ppc32 kernels.
Signed-off-by: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
---
arch/ppc.c | 44 +++++++++++++++++++++++++++++++-------------
1 file changed, 31 insertions(+), 13 deletions(-)
diff --git a/arch/ppc.c b/arch/ppc.c
index 2bc8d5f..a9b4812 100644
--- a/arch/ppc.c
+++ b/arch/ppc.c
@@ -44,22 +44,40 @@ get_machdep_info_ppc(void)
DEBUG_MSG("kernel_start : %lx\n", info->kernel_start);
/*
- * For the compatibility, makedumpfile should run without the symbol
- * vmlist and the offset of vm_struct.addr if they are not necessary.
+ * Get vmalloc_start value from either vmap_area_list or vmlist.
*/
- if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL)
- || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) {
+ if ((SYMBOL(vmap_area_list) != NOT_FOUND_SYMBOL)
+ && (OFFSET(vmap_area.va_start) != NOT_FOUND_STRUCTURE)
+ && (OFFSET(vmap_area.list) != NOT_FOUND_STRUCTURE)) {
+ if (!readmem(VADDR, SYMBOL(vmap_area_list) + OFFSET(list_head.next),
+ &vmap_area_list, sizeof(vmap_area_list))) {
+ ERRMSG("Can't get vmap_area_list.\n");
+ return FALSE;
+ }
+ if (!readmem(VADDR, vmap_area_list - OFFSET(vmap_area.list) +
+ OFFSET(vmap_area.va_start), &vmalloc_start,
+ sizeof(vmalloc_start))) {
+ ERRMSG("Can't get vmalloc_start.\n");
+ return FALSE;
+ }
+ } else if ((SYMBOL(vmlist) != NOT_FOUND_SYMBOL)
+ && (OFFSET(vm_struct.addr) != NOT_FOUND_STRUCTURE)) {
+ if (!readmem(VADDR, SYMBOL(vmlist), &vmlist, sizeof(vmlist))) {
+ ERRMSG("Can't get vmlist.\n");
+ return FALSE;
+ }
+ if (!readmem(VADDR, vmlist + OFFSET(vm_struct.addr), &vmalloc_start,
+ sizeof(vmalloc_start))) {
+ ERRMSG("Can't get vmalloc_start.\n");
+ return FALSE;
+ }
+ } else {
+ /*
+ * For the compatibility, makedumpfile should run without the symbol
+ * vmlist and the offset of vm_struct.addr if they are not necessary.
+ */
return TRUE;
}
- if (!readmem(VADDR, SYMBOL(vmlist), &vmlist, sizeof(vmlist))) {
- ERRMSG("Can't get vmlist.\n");
- return FALSE;
- }
- if (!readmem(VADDR, vmlist + OFFSET(vm_struct.addr), &vmalloc_start,
- sizeof(vmalloc_start))) {
- ERRMSG("Can't get vmalloc_start.\n");
- return FALSE;
- }
info->vmalloc_start = vmalloc_start;
DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start);
--
1.8.0.2
^ permalink raw reply related
* [RESEND PATCH 2/4] Use vmap_area_list to get vmalloc_start for i386.
From: Atsushi Kumagai @ 2013-04-10 7:10 UTC (permalink / raw)
To: kexec; +Cc: linuxppc-dev
In-Reply-To: <20130410150524.804cd23b99a697f71146be67@mxc.nes.nec.co.jp>
From: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Date: Thu, 14 Mar 2013 19:10:49 +0900
Subject: [PATCH 2/4] Use vmap_area_list to get vmalloc_start for i386.
Try to get vmalloc_start value from vmap_area_list first for
newer i386 kernels.
Signed-off-by: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
---
arch/x86.c | 46 ++++++++++++++++++++++++++++++++--------------
1 file changed, 32 insertions(+), 14 deletions(-)
diff --git a/arch/x86.c b/arch/x86.c
index ef29e3c..e397905 100644
--- a/arch/x86.c
+++ b/arch/x86.c
@@ -70,7 +70,7 @@ remap_init(void)
int
get_machdep_info_x86(void)
{
- unsigned long vmlist, vmalloc_start;
+ unsigned long vmlist, vmap_area_list, vmalloc_start;
/* PAE */
if ((vt.mem_flags & MEMORY_X86_PAE)
@@ -100,22 +100,40 @@ get_machdep_info_x86(void)
return FALSE;
/*
- * For the compatibility, makedumpfile should run without the symbol
- * vmlist and the offset of vm_struct.addr if they are not necessary.
+ * Get vmalloc_start value from either vmap_area_list or vmlist.
*/
- if ((SYMBOL(vmlist) == NOT_FOUND_SYMBOL)
- || (OFFSET(vm_struct.addr) == NOT_FOUND_STRUCTURE)) {
+ if ((SYMBOL(vmap_area_list) != NOT_FOUND_SYMBOL)
+ && (OFFSET(vmap_area.va_start) != NOT_FOUND_STRUCTURE)
+ && (OFFSET(vmap_area.list) != NOT_FOUND_STRUCTURE)) {
+ if (!readmem(VADDR, SYMBOL(vmap_area_list) + OFFSET(list_head.next),
+ &vmap_area_list, sizeof(vmap_area_list))) {
+ ERRMSG("Can't get vmap_area_list.\n");
+ return FALSE;
+ }
+ if (!readmem(VADDR, vmap_area_list - OFFSET(vmap_area.list) +
+ OFFSET(vmap_area.va_start), &vmalloc_start,
+ sizeof(vmalloc_start))) {
+ ERRMSG("Can't get vmalloc_start.\n");
+ return FALSE;
+ }
+ } else if ((SYMBOL(vmlist) != NOT_FOUND_SYMBOL)
+ && (OFFSET(vm_struct.addr) != NOT_FOUND_STRUCTURE)) {
+ if (!readmem(VADDR, SYMBOL(vmlist), &vmlist, sizeof(vmlist))) {
+ ERRMSG("Can't get vmlist.\n");
+ return FALSE;
+ }
+ if (!readmem(VADDR, vmlist + OFFSET(vm_struct.addr), &vmalloc_start,
+ sizeof(vmalloc_start))) {
+ ERRMSG("Can't get vmalloc_start.\n");
+ return FALSE;
+ }
+ } else {
+ /*
+ * For the compatibility, makedumpfile should run without the symbol
+ * used to get vmalloc_start value if they are not necessary.
+ */
return TRUE;
}
- if (!readmem(VADDR, SYMBOL(vmlist), &vmlist, sizeof(vmlist))) {
- ERRMSG("Can't get vmlist.\n");
- return FALSE;
- }
- if (!readmem(VADDR, vmlist + OFFSET(vm_struct.addr), &vmalloc_start,
- sizeof(vmalloc_start))) {
- ERRMSG("Can't get vmalloc_start.\n");
- return FALSE;
- }
info->vmalloc_start = vmalloc_start;
DEBUG_MSG("vmalloc_start: %lx\n", vmalloc_start);
--
1.8.0.2
^ permalink raw reply related
* [RESEND PATCH 1/4] Introduce new symbols to look at the first vmap_area.
From: Atsushi Kumagai @ 2013-04-10 7:10 UTC (permalink / raw)
To: kexec; +Cc: linuxppc-dev
In-Reply-To: <20130410150524.804cd23b99a697f71146be67@mxc.nes.nec.co.jp>
From: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
Date: Thu, 14 Mar 2013 11:08:57 +0900
Subject: [PATCH 1/4] Introduce new symbols to look at the first vmap_area.
struct vmlist which we used to get vmalloc_start value will be
removed from kernel soon, so we have to look at the first vmap_area
in the sorted vmap_area_list instead of vmlist.
The address which contains vmalloc_start value is represented as
below:
vmap_area_list.next - OFFSET(vmap_area.list) + OFFSET(vmap_area.va_start)
This patch introduces these symbols.
Signed-off-by: Atsushi Kumagai <kumagai-atsushi@mxc.nes.nec.co.jp>
---
makedumpfile.c | 9 +++++++++
makedumpfile.h | 5 +++++
2 files changed, 14 insertions(+)
diff --git a/makedumpfile.c b/makedumpfile.c
index 6de0581..9cf907c 100644
--- a/makedumpfile.c
+++ b/makedumpfile.c
@@ -854,6 +854,7 @@ get_symbol_info(void)
SYMBOL_INIT(swapper_pg_dir, "swapper_pg_dir");
SYMBOL_INIT(init_level4_pgt, "init_level4_pgt");
SYMBOL_INIT(vmlist, "vmlist");
+ SYMBOL_INIT(vmap_area_list, "vmap_area_list");
SYMBOL_INIT(phys_base, "phys_base");
SYMBOL_INIT(node_online_map, "node_online_map");
SYMBOL_INIT(node_states, "node_states");
@@ -989,6 +990,8 @@ get_structure_info(void)
OFFSET_INIT(node_memblk_s.nid, "node_memblk_s", "nid");
OFFSET_INIT(vm_struct.addr, "vm_struct", "addr");
+ OFFSET_INIT(vmap_area.va_start, "vmap_area", "va_start");
+ OFFSET_INIT(vmap_area.list, "vmap_area", "list");
/*
* Get offset of the module members.
@@ -1368,6 +1371,7 @@ write_vmcoreinfo_data(void)
WRITE_SYMBOL("swapper_pg_dir", swapper_pg_dir);
WRITE_SYMBOL("init_level4_pgt", init_level4_pgt);
WRITE_SYMBOL("vmlist", vmlist);
+ WRITE_SYMBOL("vmap_area_list", vmap_area_list);
WRITE_SYMBOL("phys_base", phys_base);
WRITE_SYMBOL("node_online_map", node_online_map);
WRITE_SYMBOL("node_states", node_states);
@@ -1430,6 +1434,8 @@ write_vmcoreinfo_data(void)
WRITE_MEMBER_OFFSET("node_memblk_s.size", node_memblk_s.size);
WRITE_MEMBER_OFFSET("node_memblk_s.nid", node_memblk_s.nid);
WRITE_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr);
+ WRITE_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start);
+ WRITE_MEMBER_OFFSET("vmap_area.list", vmap_area.list);
WRITE_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec);
WRITE_MEMBER_OFFSET("log.len", log.len);
WRITE_MEMBER_OFFSET("log.text_len", log.text_len);
@@ -1684,6 +1690,7 @@ read_vmcoreinfo(void)
READ_SYMBOL("swapper_pg_dir", swapper_pg_dir);
READ_SYMBOL("init_level4_pgt", init_level4_pgt);
READ_SYMBOL("vmlist", vmlist);
+ READ_SYMBOL("vmap_area_list", vmap_area_list);
READ_SYMBOL("phys_base", phys_base);
READ_SYMBOL("node_online_map", node_online_map);
READ_SYMBOL("node_states", node_states);
@@ -1739,6 +1746,8 @@ read_vmcoreinfo(void)
READ_MEMBER_OFFSET("node_memblk_s.size", node_memblk_s.size);
READ_MEMBER_OFFSET("node_memblk_s.nid", node_memblk_s.nid);
READ_MEMBER_OFFSET("vm_struct.addr", vm_struct.addr);
+ READ_MEMBER_OFFSET("vmap_area.va_start", vmap_area.va_start);
+ READ_MEMBER_OFFSET("vmap_area.list", vmap_area.list);
READ_MEMBER_OFFSET("log.ts_nsec", log.ts_nsec);
READ_MEMBER_OFFSET("log.len", log.len);
READ_MEMBER_OFFSET("log.text_len", log.text_len);
diff --git a/makedumpfile.h b/makedumpfile.h
index de329f3..36cb9fd 100644
--- a/makedumpfile.h
+++ b/makedumpfile.h
@@ -1096,6 +1096,7 @@ struct symbol_table {
unsigned long long swapper_pg_dir;
unsigned long long init_level4_pgt;
unsigned long long vmlist;
+ unsigned long long vmap_area_list;
unsigned long long phys_base;
unsigned long long node_online_map;
unsigned long long node_states;
@@ -1233,6 +1234,10 @@ struct offset_table {
struct vm_struct {
long addr;
} vm_struct;
+ struct vmap_area {
+ long va_start;
+ long list;
+ } vmap_area;
/*
* for Xen extraction
--
1.8.0.2
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox