LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH] powerpc: Add udbg-immortal kernel option
From: Olof Johansson @ 2006-06-07  2:24 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev list, Paul Mackerras
In-Reply-To: <1149645981.27572.96.camel@localhost.localdomain>

On Wed, Jun 07, 2006 at 12:06:20PM +1000, Benjamin Herrenschmidt wrote:
>  		return;
> +	if (strstr(saved_command_line, "udbg-immortal")) {
> +		printk(KERN_INFO "early console immortal !\n");
> +		return;
> +	}

So it's YOU who add spaces before punctuation all over arch/powerpc!
Please fix. ;-)


-Olof

^ permalink raw reply

* [PATCH] powerpc: Add udbg-immortal kernel option
From: Benjamin Herrenschmidt @ 2006-06-07  2:06 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev list

When debugging early kernel crashes that happen after console_init() and
before a proper console driver takes over, we often have to go hack into
udbg.c to prevent it from unregistering so we can "see" what is
happening. This patch adds a kernel command line option "udbg-immortal"
instead to avoid having to modify the kernel.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

---
Paul: this is not 2.6.17 material

Index: linux-work/arch/powerpc/kernel/udbg.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/udbg.c	2006-05-31 14:13:59.000000000 +1000
+++ linux-work/arch/powerpc/kernel/udbg.c	2006-05-31 14:48:07.000000000 +1000
@@ -14,6 +14,7 @@
 #include <linux/types.h>
 #include <linux/sched.h>
 #include <linux/console.h>
+#include <linux/init.h>
 #include <asm/processor.h>
 #include <asm/udbg.h>
 
@@ -141,12 +142,14 @@ static int early_console_initialized;
 
 void __init disable_early_printk(void)
 {
-#if 1
 	if (!early_console_initialized)
 		return;
+	if (strstr(saved_command_line, "udbg-immortal")) {
+		printk(KERN_INFO "early console immortal !\n");
+		return;
+	}
 	unregister_console(&udbg_console);
 	early_console_initialized = 0;
-#endif
 }
 
 /* called by setup_system */

^ permalink raw reply

* [PATCH] powerpc: Fix cell blade detection
From: Benjamin Herrenschmidt @ 2006-06-07  2:04 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev list

From: Arnd Bergmann <arnd@arndb.de>

The IBM Cell blade firmware might confuse the kernel to think it's a
pSeries machine. This fixes it for now. With a bit of luck, the firmware
will be updated to avoid that in the future but currently that patch is
needed.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Index: linux-work/arch/powerpc/kernel/prom_init.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/prom_init.c	2006-06-06 11:59:20.000000000 +1000
+++ linux-work/arch/powerpc/kernel/prom_init.c	2006-06-06 12:04:42.000000000 +1000
@@ -1623,6 +1623,15 @@ static int __init prom_find_machine_type
 			if (strstr(p, RELOC("Power Macintosh")) ||
 			    strstr(p, RELOC("MacRISC")))
 				return PLATFORM_POWERMAC;
+#ifdef CONFIG_PPC64
+			/* We must make sure we don't detect the IBM Cell
+			 * blades as pSeries due to some firmware issues,
+			 * so we do it here.
+			 */
+			if (strstr(p, RELOC("IBM,CBEA")) ||
+			    strstr(p, RELOC("IBM,CPBW-1.0")))
+				return PLATFORM_GENERIC;
+#endif /* CONFIG_PPC64 */
 			i += sl + 1;
 		}
 	}
Index: linux-work/arch/powerpc/platforms/cell/setup.c
===================================================================
--- linux-work.orig/arch/powerpc/platforms/cell/setup.c	2006-05-11 11:45:08.000000000 +1000
+++ linux-work/arch/powerpc/platforms/cell/setup.c	2006-06-06 12:03:03.000000000 +1000
@@ -125,14 +125,13 @@ static void __init cell_init_early(void)
 
 static int __init cell_probe(void)
 {
-	/* XXX This is temporary, the Cell maintainer will come up with
-	 * more appropriate detection logic
-	 */
 	unsigned long root = of_get_flat_dt_root();
-	if (!of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
-		return 0;
 
-	return 1;
+	if (of_flat_dt_is_compatible(root, "IBM,CBEA") ||
+	    of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+		return 1;
+
+	return 0;
 }
 
 /*
Index: linux-work/arch/powerpc/platforms/pseries/setup.c
===================================================================
--- linux-work.orig/arch/powerpc/platforms/pseries/setup.c	2006-06-05 17:55:31.000000000 +1000
+++ linux-work/arch/powerpc/platforms/pseries/setup.c	2006-06-06 13:05:21.000000000 +1000
@@ -399,6 +399,7 @@ static int __init pSeries_probe_hypertas
 
 static int __init pSeries_probe(void)
 {
+	unsigned long root = of_get_flat_dt_root();
  	char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
  					  "device_type", NULL);
  	if (dtype == NULL)
@@ -406,6 +407,13 @@ static int __init pSeries_probe(void)
  	if (strcmp(dtype, "chrp"))
 		return 0;
 
+	/* Cell blades firmware claims to be chrp while it's not. Until this
+	 * is fixed, we need to avoid those here.
+	 */
+	if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0") ||
+	    of_flat_dt_is_compatible(root, "IBM,CBEA"))
+		return 0;
+
 	DBG("pSeries detected, looking for LPAR capability...\n");
 
 	/* Now try to figure out if we are running on LPAR */

^ permalink raw reply

* [PATCH] powerpc: Fix call to ibm,client-architecture-support
From: Benjamin Herrenschmidt @ 2006-06-07  2:01 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev list

The code in prom_init.c calling the firmware
ibm,client-architecture-support on pSeries has a bug where it fails to
properly pass the instance handle of the firmware object when trying to
call a method. Result ranges from the call doing nothing to the firmware
crashing. (Found by Segher, thanks !)

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Index: linux-work/arch/powerpc/kernel/prom_init.c
===================================================================
--- linux-work.orig/arch/powerpc/kernel/prom_init.c	2006-05-30 13:00:51.000000000 +1000
+++ linux-work/arch/powerpc/kernel/prom_init.c	2006-06-06 11:59:20.000000000 +1000
@@ -822,6 +822,7 @@ static void __init prom_send_capabilitie
 		/* try calling the ibm,client-architecture-support method */
 		if (call_prom_ret("call-method", 3, 2, &ret,
 				  ADDR("ibm,client-architecture-support"),
+				  root,
 				  ADDR(ibm_architecture_vec)) == 0) {
 			/* the call exists... */
 			if (ret)

^ permalink raw reply

* [PATCH] powerpc: oprofile support for POWER6
From: Michael Neuling @ 2006-06-07  1:23 UTC (permalink / raw)
  To: paulus; +Cc: linuxppc-dev

POWER6 moves some of the MMCRA bits and also requires some bits to be
cleared each PMU interrupt.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Acked-by: Anton Blanchard <anton@samba.org>
---
Paul: for your post 2.6.17 queue

 arch/powerpc/kernel/cputable.c          |   10 +++++++-
 arch/powerpc/oprofile/op_model_power4.c |   37 ++++++++++++--------------------
 include/asm-powerpc/cputable.h          |   11 ++++++---
 include/asm-powerpc/reg.h               |    4 +++
 4 files changed, 36 insertions(+), 26 deletions(-)

Index: linux-2.6-powerpc/arch/powerpc/kernel/cputable.c
===================================================================
--- linux-2.6-powerpc.orig/arch/powerpc/kernel/cputable.c
+++ linux-2.6-powerpc/arch/powerpc/kernel/cputable.c
@@ -236,6 +236,8 @@ struct cpu_spec	cpu_specs[] = {
 		.num_pmcs		= 6,
 		.oprofile_cpu_type	= "ppc64/power5",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.oprofile_mmcra_sihv	= MMCRA_SIHV,
+		.oprofile_mmcra_sipr	= MMCRA_SIPR,
 		.platform		= "power5",
 	},
 	{	/* Power5 GS */
@@ -249,6 +251,8 @@ struct cpu_spec	cpu_specs[] = {
 		.num_pmcs		= 6,
 		.oprofile_cpu_type	= "ppc64/power5+",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.oprofile_mmcra_sihv	= MMCRA_SIHV,
+		.oprofile_mmcra_sipr	= MMCRA_SIPR,
 		.platform		= "power5+",
 	},
 	{	/* Power6 */
@@ -259,9 +263,13 @@ struct cpu_spec	cpu_specs[] = {
 		.cpu_user_features	= COMMON_USER_POWER6,
 		.icache_bsize		= 128,
 		.dcache_bsize		= 128,
-		.num_pmcs		= 6,
+		.num_pmcs		= 8,
 		.oprofile_cpu_type	= "ppc64/power6",
 		.oprofile_type		= PPC_OPROFILE_POWER4,
+ 		.oprofile_mmcra_sihv	= POWER6_MMCRA_SIHV,
+ 		.oprofile_mmcra_sipr	= POWER6_MMCRA_SIPR,
+ 		.oprofile_mmcra_clear	= POWER6_MMCRA_THRM |
+ 			POWER6_MMCRA_OTHER,
 		.platform		= "power6",
 	},
 	{	/* Cell Broadband Engine */
Index: linux-2.6-powerpc/arch/powerpc/oprofile/op_model_power4.c
===================================================================
--- linux-2.6-powerpc.orig/arch/powerpc/oprofile/op_model_power4.c
+++ linux-2.6-powerpc/arch/powerpc/oprofile/op_model_power4.c
@@ -24,10 +24,6 @@
 static unsigned long reset_value[OP_MAX_COUNTER];
 
 static int oprofile_running;
-static int mmcra_has_sihv;
-/* Unfortunately these bits vary between CPUs */
-static unsigned long mmcra_sihv = MMCRA_SIHV;
-static unsigned long mmcra_sipr = MMCRA_SIPR;
 
 /* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */
 static u32 mmcr0_val;
@@ -41,16 +37,6 @@ static void power4_reg_setup(struct op_c
 	int i;
 
 	/*
-	 * SIHV / SIPR bits are only implemented on POWER4+ (GQ) and above.
-	 * However we disable it on all POWER4 until we verify it works
-	 * (I was seeing some strange behaviour last time I tried).
-	 *
-	 * It has been verified to work on POWER5 so we enable it there.
-	 */
-	if (cpu_has_feature(CPU_FTR_MMCRA_SIHV))
-		mmcra_has_sihv = 1;
-
-	/*
 	 * The performance counter event settings are given in the mmcr0,
 	 * mmcr1 and mmcra values passed from the user in the
 	 * op_system_config structure (sys variable).
@@ -202,18 +188,19 @@ static unsigned long get_pc(struct pt_re
 	unsigned long mmcra;
 
 	/* Cant do much about it */
-	if (!mmcra_has_sihv)
+	if (!cur_cpu_spec->oprofile_mmcra_sihv)
 		return pc;
 
 	mmcra = mfspr(SPRN_MMCRA);
 
 	/* Were we in the hypervisor? */
-	if (firmware_has_feature(FW_FEATURE_LPAR) && (mmcra & mmcra_sihv))
+	if (firmware_has_feature(FW_FEATURE_LPAR) &&
+	    (mmcra & cur_cpu_spec->oprofile_mmcra_sihv))
 		/* function descriptor madness */
 		return *((unsigned long *)hypervisor_bucket);
 
 	/* We were in userspace, nothing to do */
-	if (mmcra & mmcra_sipr)
+	if (mmcra & cur_cpu_spec->oprofile_mmcra_sipr)
 		return pc;
 
 #ifdef CONFIG_PPC_RTAS
@@ -235,15 +222,14 @@ static unsigned long get_pc(struct pt_re
 	return pc;
 }
 
-static int get_kernel(unsigned long pc)
+static int get_kernel(unsigned long pc, unsigned long mmcra)
 {
 	int is_kernel;
 
-	if (!mmcra_has_sihv) {
+	if (!cur_cpu_spec->oprofile_mmcra_sihv) {
 		is_kernel = is_kernel_addr(pc);
 	} else {
-		unsigned long mmcra = mfspr(SPRN_MMCRA);
-		is_kernel = ((mmcra & mmcra_sipr) == 0);
+		is_kernel = ((mmcra & cur_cpu_spec->oprofile_mmcra_sipr) == 0);
 	}
 
 	return is_kernel;
@@ -257,9 +243,12 @@ static void power4_handle_interrupt(stru
 	int val;
 	int i;
 	unsigned int mmcr0;
+	unsigned long mmcra;
+
+	mmcra = mfspr(SPRN_MMCRA);
 
 	pc = get_pc(regs);
-	is_kernel = get_kernel(pc);
+	is_kernel = get_kernel(pc, mmcra);
 
 	/* set the PMM bit (see comment below) */
 	mtmsrd(mfmsr() | MSR_PMM);
@@ -287,6 +276,10 @@ static void power4_handle_interrupt(stru
 	 */
 	mmcr0 &= ~MMCR0_PMAO;
 
+	/* Clear the appropriate bits in the MMCRA */
+	mmcra &= ~cur_cpu_spec->oprofile_mmcra_clear;
+	mtspr(SPRN_MMCRA, mmcra);
+
 	/*
 	 * now clear the freeze bit, counting will not start until we
 	 * rfid from this exception, because only at that point will
Index: linux-2.6-powerpc/include/asm-powerpc/cputable.h
===================================================================
--- linux-2.6-powerpc.orig/include/asm-powerpc/cputable.h
+++ linux-2.6-powerpc/include/asm-powerpc/cputable.h
@@ -69,6 +69,13 @@ struct cpu_spec {
 	/* Processor specific oprofile operations */
 	enum powerpc_oprofile_type oprofile_type;
 
+	/* Bit locations inside the mmcra change */
+	unsigned long	oprofile_mmcra_sihv;
+	unsigned long	oprofile_mmcra_sipr;
+
+	/* Bits to clear during an oprofile exception */
+	unsigned long	oprofile_mmcra_clear;
+
 	/* Name of processor class, for the ELF AT_PLATFORM entry */
 	char		*platform;
 };
@@ -117,7 +124,6 @@ extern void do_cpu_ftr_fixups(unsigned l
 #define CPU_FTR_SMT			ASM_CONST(0x0000010000000000)
 #define CPU_FTR_COHERENT_ICACHE		ASM_CONST(0x0000020000000000)
 #define CPU_FTR_LOCKLESS_TLBIE		ASM_CONST(0x0000040000000000)
-#define CPU_FTR_MMCRA_SIHV		ASM_CONST(0x0000080000000000)
 #define CPU_FTR_CI_LARGE_PAGE		ASM_CONST(0x0000100000000000)
 #define CPU_FTR_PAUSE_ZERO		ASM_CONST(0x0000200000000000)
 #define CPU_FTR_PURR			ASM_CONST(0x0000400000000000)
@@ -134,7 +140,6 @@ extern void do_cpu_ftr_fixups(unsigned l
 #define CPU_FTR_SMT			ASM_CONST(0x0)
 #define CPU_FTR_COHERENT_ICACHE		ASM_CONST(0x0)
 #define CPU_FTR_LOCKLESS_TLBIE		ASM_CONST(0x0)
-#define CPU_FTR_MMCRA_SIHV		ASM_CONST(0x0)
 #define CPU_FTR_CI_LARGE_PAGE		ASM_CONST(0x0)
 #define CPU_FTR_PURR			ASM_CONST(0x0)
 #endif
@@ -320,7 +325,7 @@ extern void do_cpu_ftr_fixups(unsigned l
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | CPU_FTR_LOCKLESS_TLBIE | \
-	    CPU_FTR_MMCRA_SIHV | CPU_FTR_PURR)
+	    CPU_FTR_PURR)
 #define CPU_FTRS_POWER6 (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
Index: linux-2.6-powerpc/include/asm-powerpc/reg.h
===================================================================
--- linux-2.6-powerpc.orig/include/asm-powerpc/reg.h
+++ linux-2.6-powerpc/include/asm-powerpc/reg.h
@@ -443,6 +443,10 @@
 #define   MMCRA_SIHV	0x10000000UL /* state of MSR HV when SIAR set */
 #define   MMCRA_SIPR	0x08000000UL /* state of MSR PR when SIAR set */
 #define   MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
+#define   POWER6_MMCRA_SIHV   0x0000040000000000ULL
+#define   POWER6_MMCRA_SIPR   0x0000020000000000ULL
+#define   POWER6_MMCRA_THRM	0x00000020UL
+#define   POWER6_MMCRA_OTHER	0x0000000EUL
 #define SPRN_PMC1	787
 #define SPRN_PMC2	788
 #define SPRN_PMC3	789

^ permalink raw reply

* Re: Collecting hypervisor call stats
From: Christopher Yeoh @ 2006-06-07  1:08 UTC (permalink / raw)
  To: Mike Kravetz; +Cc: Chris Yeoh, Bryan Rosenburg, linuxppc-dev
In-Reply-To: <20060606164646.GA3161@w-mikek2.ibm.com>

At 2006/6/6 09:46-0700  Mike Kravetz writes:
> On Thu, Jun 01, 2006 at 03:12:15PM +1000, Christopher Yeoh wrote:
> > Here's a patch we've used for collecting hcall counts and times.
> 
> Thanks for the patch/code Chris!  I'm using this as a basis for something
> that we may want to merge into the tree.  Just a couple of questions.
> 
> Your 'wrappers' have the following general form:
> 
> 
> Can you explain the need for barrier(s) before and after the call to the
> real routine?  It usually takes me a couple days of thought to figure out
> exactly where these are needed. :)

Ah oops, it turns out I was wrong and they're not necessary after all.

> The use of get_cpu_var/put_cpu_var result in disabling/enabling preemption.
> I can understand why this would be desirable to assure the accuracy of the
> statistics.  But, I was wondering if the desired accuracy is worth the added
> overhead.  My thought was to make these as lightweight as possible and
> sacrifice some accuracy if necessary.  After all, no 'internal decisions' are
> being made because of this data.  It is simply exposed to user land.
> Thoughts?

For what we were using them for at the time we weren't really
concerned about a small degradation in performance and were more
interested in accuracy. I guess I'd recommend doing some benchmarking
to see what difference they really make.

Chris
-- 
cyeoh@au.ibm.com
IBM OzLabs Linux Development Group
Canberra, Australia

^ permalink raw reply

* Re: [PATCH 0/5] Sizing zones and holes in an architecture independent manner V7
From: Andrew Morton @ 2006-06-06 23:43 UTC (permalink / raw)
  To: Mel Gorman
  Cc: davej, tony.luck, linux-mm, mel, ak, bob.picco, linux-kernel,
	linuxppc-dev
In-Reply-To: <20060606134710.21419.48239.sendpatchset@skynet.skynet.ie>

On Tue,  6 Jun 2006 14:47:10 +0100 (IST)
Mel Gorman <mel@csn.ul.ie> wrote:

> This is V7 of the patchset to size zones and memory holes in an
> architecture-independent manner.

I hope this won't deprive me of my 4 kbyte highmem zone.

I won't merge these patches for rc6-mm1 - we already have a few problems in
this area which I don't think anyone understands yet.

^ permalink raw reply

* RE: [PATCH/2.6.17-rc4 4/10]Powerpc:  Add tsi108 pic support
From: Benjamin Herrenschmidt @ 2006-06-06 23:08 UTC (permalink / raw)
  To: Alexandre Bounine; +Cc: linuxppc-dev list, Paul Mackerras, Yang Xin-Xin-r48390
In-Reply-To: <8A1F97E8A7ACE847B1DB69DFDCBC6E807D634E@caribou.pc.tundra.com>

On Tue, 2006-06-06 at 10:45 -0400, Alexandre Bounine wrote:

> We have a level-signalled irq from the cascaded PCI interrupt controller. If I do EOI at 
> this time, level request will not have chance to be cleared (unless all PCI interrupts have
> an SA_INTERRUPT flag) and result in recurring interrupts. 

Hrm... Ok, when the cascade is a 8259 or an MPIC, we don't have that
problem despite the output also being level... I think that's because
the cascade handler itself will mask the cascade interrupt (on MPIC,
reading the irq does an ack which will mask that priority level). If
your cascaded controller doesn't act this way, you may need something a
bit different in your cascade handler rather than changing mpic.

However, I wouldn't bother too much. As I said, this is all changing a
lot at the moment as I'm porting powerpc to Ingo Molnar and Thomas
Gleixner's new "genirq" layer. Cascade handling will be different and
taken out of MPIC, so you'll be able to implement it the way your want
(with much greater control on what happens) without changing the MPIC
driver.

I'll have patches posted on the list in a few days hopefully.

> I chose to have an individual flag instead of checking model ID to avoid multiple checks within ISR
> (in case if we have more that one mpic version requiring this option). I also expect that it may be
> useful for any external level-signalling cascades connected to MPIC.      

As I said above, I think it can just go away with the port to genirq.

> Motivation is the same as above - I just do not want to have multiple ID checks here. I agree that it is
> driven by mpic type (model ID) only. I can remove this one if you do not expect any
> new "broken" MPICs on horizon.  

Well, I do expect broken ones but not with that specific issue :)

Cheers,
Ben.

^ permalink raw reply

* 82xx CPM commands
From: Rune Torgersen @ 2006-06-06 22:13 UTC (permalink / raw)
  To: linuxppc-embedded

Hi.

Besause of some weird system hangs we have seen, I am wondering if the
CPM command register may need a lock around it.

In one of our drivers we can end up writing a CPM command every
millisecond on worst case during an error condition.
Sometimes during that error condition the whole CPU locks up. (or rather
it spins in an interrupt somewhere).

After looking at all the other places in the kernel the CPM command
register gets written, I am convinced that there does exists conditions
where we might accidentally write to the command register while some
other thread/irq is writing/waiting for command completion.

The code in all paces looks like:

	cp->cp_cpcr =3D mk_cr_cmd(SOME COMMAND);
	while (cp->cp_cpcr & CPM_CR_FLG);

I think we might need a lock around it.
local_irq_save() woild work, but would lock the whole CPU while the CPM
command completes (which can take 200 CPM clock cycles, avg 40 according
to maual)

Easiest way would probaby be to have a (inline) function that does the
lock/write/wait/unlock.

There is a couple of places that does a udelay between the write and the
wait.

^ permalink raw reply

* Re: ppc85xx DMA
From: Naru Sundar @ 2006-06-06 21:38 UTC (permalink / raw)
  To: Liu Dave-r63238; +Cc: linuxppc-embedded
In-Reply-To: <20060606211610.GD27078@fulcrummicro.com>

Ah hah.  I got it working. 

Turns out using the ioremapped address was bad, I had to use the actual
nonremapped physical address.

Thanks for the info!

-naru

On Tue, Jun 06, 2006 at 02:16:10PM -0700, Naru Sundar wrote:
> To clarify, I am definitely using the physical addresses.  virt_to_bus and
> virt_to_phys result in the same value.  My transfer completes and I see
> BCR go back to 0, but I can't manage to actually see any data difference
> at the destination side.
> 
> The destination address is an ioremapped region that I pass through
> virt_to_phys.  The src is kmalloc'd memory that has GFP_DMA set.
> 
> On Tue, Jun 06, 2006 at 11:55:48AM -0700, Naru Sundar wrote:
> > On Tue, Jun 06, 2006 at 09:39:29AM +0800, Liu Dave-r63238 wrote:
> > > What is the DMA transfer mode? Is direct or chaining mode?
> > 
> > Direct mode.  I fixed an error with my bit ordering for the configuration
> > registers, and now the transfer seems to complete, but I don't see any
> > actual data showing up in the destination register that I am writing to.
> > 
> > > Did you ioremap the DMA register space?
> > 
> > Yes, I can write the destination address manually.  So I am thinking my addresses
> > are wrong.
> > 
> > For the source and dest address I used:
> > 
> > dma_map_single(NULL, ptr, len, DMA_TO_DEVICE)
> > 
> > (which effectively does a virt_to_bus on ppc and so should just return to me
> > the bus address used by the dma).
> > 
> > -naru
> > _______________________________________________
> > Linuxppc-embedded mailing list
> > Linuxppc-embedded@ozlabs.org
> > https://ozlabs.org/mailman/listinfo/linuxppc-embedded
> _______________________________________________
> Linuxppc-embedded mailing list
> Linuxppc-embedded@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-embedded

^ permalink raw reply

* Re: Base address of executables - weirdness?
From: H. Peter Anvin @ 2006-06-06 21:21 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: linuxppc-dev
In-Reply-To: <jeirne56kn.fsf@sykes.suse.de>

Andreas Schwab wrote:
> "H. Peter Anvin" <hpa@zytor.com> writes:
> 
>> Until recently, binaries linked with ld defaulted to a base address of 
>> 0x10000000+SIZEOF_HEADERS.  However, recently I've gotten a couple of 
>> reports -- and I've been able to confirm this on my FC5 system -- that 
>> some versions of ld links at 0x01800000+SIZEOF_HEADERS.
> 
> You are probably using the wrong linker emulation.  There are three
> emulations enabled when building binutils for ppc-linux, but only the
> elf32ppclinux emulation it the right one that uses 0x10000000 for the base
> address.
> 

Hm.  Well, it's using the default one, but perhaps I should try to specify an explicit -m 
option.  Sure enough, that did the trick.

THANKS!

	-hpa

^ permalink raw reply

* Re: ppc85xx DMA
From: Naru Sundar @ 2006-06-06 21:16 UTC (permalink / raw)
  To: Liu Dave-r63238; +Cc: linuxppc-embedded
In-Reply-To: <20060606185548.GB27078@fulcrummicro.com>

To clarify, I am definitely using the physical addresses.  virt_to_bus and
virt_to_phys result in the same value.  My transfer completes and I see
BCR go back to 0, but I can't manage to actually see any data difference
at the destination side.

The destination address is an ioremapped region that I pass through
virt_to_phys.  The src is kmalloc'd memory that has GFP_DMA set.

On Tue, Jun 06, 2006 at 11:55:48AM -0700, Naru Sundar wrote:
> On Tue, Jun 06, 2006 at 09:39:29AM +0800, Liu Dave-r63238 wrote:
> > What is the DMA transfer mode? Is direct or chaining mode?
> 
> Direct mode.  I fixed an error with my bit ordering for the configuration
> registers, and now the transfer seems to complete, but I don't see any
> actual data showing up in the destination register that I am writing to.
> 
> > Did you ioremap the DMA register space?
> 
> Yes, I can write the destination address manually.  So I am thinking my addresses
> are wrong.
> 
> For the source and dest address I used:
> 
> dma_map_single(NULL, ptr, len, DMA_TO_DEVICE)
> 
> (which effectively does a virt_to_bus on ppc and so should just return to me
> the bus address used by the dma).
> 
> -naru
> _______________________________________________
> Linuxppc-embedded mailing list
> Linuxppc-embedded@ozlabs.org
> https://ozlabs.org/mailman/listinfo/linuxppc-embedded

^ permalink raw reply

* Re: Base address of executables - weirdness?
From: Andreas Schwab @ 2006-06-06 21:15 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: linuxppc-dev
In-Reply-To: <4485A279.4050403@zytor.com>

"H. Peter Anvin" <hpa@zytor.com> writes:

> Until recently, binaries linked with ld defaulted to a base address of 
> 0x10000000+SIZEOF_HEADERS.  However, recently I've gotten a couple of 
> reports -- and I've been able to confirm this on my FC5 system -- that 
> some versions of ld links at 0x01800000+SIZEOF_HEADERS.

You are probably using the wrong linker emulation.  There are three
emulations enabled when building binutils for ppc-linux, but only the
elf32ppclinux emulation it the right one that uses 0x10000000 for the base
address.

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply

* Re: eth0: tx queue full
From: Wolfgang Denk @ 2006-06-06 20:53 UTC (permalink / raw)
  To: salvatore cusenza; +Cc: linuxppc-embedded
In-Reply-To: <9252a64b0606060113v696adbb7ib43ad95836c0724b@mail.gmail.com>

In message <9252a64b0606060113v696adbb7ib43ad95836c0724b@mail.gmail.com> you wrote:
> 
> At runtime during the usual life of my board (MPC852 and linux-2.4.20 Denk's
> distribution)
>  I have experienced the following crash:

2.4.20 is at least 3.5 years old. Please use recent code.

Best regards,

Wolfgang Denk

-- 
Software Engineering:  Embedded and Realtime Systems,  Embedded Linux
Phone: (+49)-8142-66989-10 Fax: (+49)-8142-66989-80 Email: wd@denx.de
Brain fried - Core dumped

^ permalink raw reply

* 2.4 kernel scheduling (?) problems
From: Tobias Netzel @ 2006-06-06 18:29 UTC (permalink / raw)
  To: linuxppc-dev

Hello all,

I'm new to this list - hoping you will help me with the 2.4 kernel, 
although it's old now.
I'm improving hardware support for the NuBus PMacs. The NuBus Pmacs are 
68k Macs with a PPC CPU and a different bus bridge/memory controller.
The NuBus PMac port is something like a hack to the PPC architecture 
using some things from the PMac platform.
An open firmware device tree is emulated as far as needed. We use the 
same PMU driver (although I hacked it a bit because we directly route 
the PMU interrupts), ADB and RTC driver and the same functions to 
calibrate the decrementer using the VIA timer.

The problem I got is that for example during SCSI transfers (I'm using 
an old scanner) neither the X screen gets updated nor does the system 
respond to any interrupts but the NMI.
Debugging messages through the serial port are still sent. So I have to 
wait until the whole transfer is done. The kernel only receives 
interrupts after a SCSI command has finished and before a new one is 
sent. The data from the scanner is transfered in blocks of 32 kB.
The behaviour is similar when I do an performance test using "dd" or 
"hdparm" on the IDE CD-ROM drive. Burning CDs using that IDE drive 
works but causes the same problem as when scanning with the SCSI 
scanner.
With the IDE hard disk I don't get this problem.
When I run "top" during those problem transfers the CPU utilization by 
the system is higher than 95% ("top" hardly gets updated) - I doubt 
that this is necessary as on the NuBus PMacs the PPC CPUs (and 
especially the 217 MHz G3 with 512 kB L2 cache I'm using) should be 
idle most of the time waiting for the slow 33 MHz system bus.
The strange thing is that the CPU misses all interrupts (except the 
NMI) although interrupts aren't turned off in the CPU (otherwise the 
PMU would shut us down and the NMI wouldn't work). I also tried to use 
a timer to poll the interrupt controllers but the interrupt handling 
routines also only find one pending interrupt in 10 seconds even when I 
constantly move the mouse and hit keys.
At first I thought this was something caused by the hardware but in 
MacOS 9 the SCSI driver doesn't block anything.

But is it possible that this is because of something like scheduling 
problems of the kernel?
And if so might updating to the 2.6 kernel fix that issue?

Tobias

^ permalink raw reply

* Re: Base address of executables - weirdness?
From: Linas Vepstas @ 2006-06-06 17:33 UTC (permalink / raw)
  To: H. Peter Anvin; +Cc: linuxppc-dev
In-Reply-To: <4485A279.4050403@zytor.com>

On Tue, Jun 06, 2006 at 08:42:49AM -0700, H. Peter Anvin wrote:
> I'm trying to track down an odd issue with klibc on ppc32.
> 
> Until recently, binaries linked with ld defaulted to a base address of 
> 0x10000000+SIZEOF_HEADERS.  However, recently I've gotten a couple of 
> reports -- and I've been able to confirm this on my FC5 system -- that 
> some versions of ld links at 0x01800000+SIZEOF_HEADERS.  Needless to 
> say, this is more than a bit confusing, *especially* since "ld -verbose" 
> still reports:
> 
>      PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + 
> SIZEOF_HEADERS;
> 
> ... at the top of the linker script.
> 
> I'm rather baffled.  Has anyone else seen this, and/or have any other 
> explanation?

Googling "0x01800000 linux ppc" brings up some interesting but old hits.

However, I swear I saw someone suggest a patch last week that changed
0x10000000 to 0x01800000 somewhere, (vmlinux.lds ??) as a proposed cure
for a bug. Sorry, I deleted it.

--linas 

^ permalink raw reply

* RE: [PATCH/2.6.17-rc4 4/10]Powerpc:  Add tsi108 pic support
From: Alexandre Bounine @ 2006-06-06 18:58 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Zang Roy-r61911
  Cc: linuxppc-dev list, Paul Mackerras, Yang Xin-Xin-r48390

I forgot to mention another argument in favor of adding separate =
MPIC_SPV_EOI and MPIC_CASC_NOEOI flags:

If we have MPIC with "broken" logic but standard register map we can use =
model ID =3D 0 for
the standard MPIC without creating additional data structure.

Regards,

Alex.

-----Original Message-----
From: Alexandre Bounine=20
Sent: Tuesday, June 06, 2006 10:46 AM
To: 'Benjamin Herrenschmidt'; Zang Roy-r61911
Cc: Kumar Gala; linuxppc-dev list; Yang Xin-Xin-r48390; Paul Mackerras
Subject: RE: [PATCH/2.6.17-rc4 4/10]Powerpc: Add tsi108 pic support

> -----Original Message-----
> From: Benjamin Herrenschmidt [mailto:benh@kernel.crashing.org]
> Sent: Tuesday, June 06, 2006 6:17 AM
> To: Zang Roy-r61911
> Cc: Alexandre Bounine; Kumar Gala; linuxppc-dev list; Yang
> Xin-Xin-r48390; Paul Mackerras
> Subject: RE: [PATCH/2.6.17-rc4 4/10]Powerpc: Add tsi108 pic support
>=20
>=20
> On Tue, 2006-06-06 at 17:43 +0800, Zang Roy-r61911 wrote:
>=20
> > Update Tsi108 implementation of MPIC.
> > Any comment?=20
> >=20
> > Integrate Tundra Semiconductor tsi108 host bridge interrupt=20
> controller=20
> > to mpic arch.
>=20
> Looks much better :) Still a few things...=20
>

Sounds good. We are moving in right direction :)
=20
> > +	mpic =3D mpic_alloc(mpic_paddr,
> > +			MPIC_PRIMARY | MPIC_BIG_ENDIAN |=20
> MPIC_WANTS_RESET |
> > +			MPIC_SPV_EOI | MPIC_CASC_NOEOI |=20
> > +			MPIC_MOD_ID(MPIC_ID_TSI108),
> > +			0, /* num_sources used */
> > +			TSI108_IRQ_BASE,
> > +			0, /* num_sources used */
> > +			NR_IRQS - 4 /* XXXX */,
> > +			mpc7448_hpc2_pic_initsenses,
> > +			sizeof(mpc7448_hpc2_pic_initsenses),=20
> "Tsi108_PIC");
>=20
> That's a hell lot of new flags... I'm not sure we need that many or a
> single TSI108 one that encloses all the new ones. Also, I'm=20
> not sure we
> need that model ID encoding thing. Let's do things simple, besides, I
> don't want to encourage HW folks into doing the same kind of=20
> contraption
> in the future

More details in comments below.

>(btw, tell the TSI folks for me that they had a BAD BAD
> BAD idea to muck around with the base design that way, especially
> changing the register map in incompatible ways for no good reason).
>=20

Done!

> > +	/* Configure MPIC outputs to CPU0 */
> > +	tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
> >  }
>=20
> It doesn't use the standard multiple processor outputs mecanism of
> MPIC ?
> =20
> > +static struct mpic_info mpic_infos[] =3D {
> > +	[0] =3D {	/* Original OpenPIC compatible MPIC */
> > +	.greg_base	=3D MPIC_GREG_BASE,
> > +	.greg_frr0	=3D MPIC_GREG_FEATURE_0,
> > +	.greg_config0	=3D MPIC_GREG_GLOBAL_CONF_0,
> > +	.greg_vendor_id	=3D MPIC_GREG_VENDOR_ID,
> > +	.greg_ipi_vp0	=3D MPIC_GREG_IPI_VECTOR_PRI_0,
> > +	.greg_ipi_stride	=3D MPIC_GREG_IPI_STRIDE,
> > +	.greg_spurious	=3D MPIC_GREG_SPURIOUS,
> > +	.greg_tfrr	=3D MPIC_GREG_TIMER_FREQ,
> > +
>=20
>    .../...
>=20
> It's a bit sad to have to go all the way to doing such tables, but I
> suspect it's probably the best way to handle it at this=20
> point.

> Send more
> nastygrams to the HW folks for me.
>=20

Done:)

> >  	mpic->num_sources =3D 0; /* so far */
> >  	mpic->senses =3D senses;
> >  	mpic->senses_count =3D senses_count;
> > +	mpic->hw_set =3D &mpic_infos[MPIC_GET_MOD_ID(flags)];
>=20
> Well... the model ID thing might not be that a bad idea in=20
> the end :) I
> need to think about it. I might have to deal with yet another=20
> MPIC that
> has another regiser map (yeah yeah, TSI aren't the only ones=20
> to not get
> it)...=20
>

I'll tell this to HW guys as well :)=20

>   .../...
>=20
> > @@ -963,7 +1043,7 @@ int mpic_get_one_irq(struct mpic *mpic,=20
> >  {
> >  	u32 irq;
> > =20
> > -	irq =3D mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK;
> > +	irq =3D mpic_cpu_read(mpic->hw_set->cpu_intack) &=20
> mpic->hw_set->irq_vpr_vector;
> >  #ifdef DEBUG_LOW
> >  	DBG("%s: get_one_irq(): %d\n", mpic->name, irq);
> >  #endif
> > @@ -972,11 +1052,18 @@ #ifdef DEBUG_LOW
> >  		DBG("%s: cascading ...\n", mpic->name);
> >  #endif
> >  		irq =3D mpic->cascade(regs, mpic->cascade_data);
> > -		mpic_eoi(mpic);
> > +#ifdef DEBUG_LOW
> > +		DBG("%s: cascaded irq: %d\n", mpic->name, irq);
> > +#endif
> > +		if (!(mpic->flags & MPIC_CASC_NOEOI))
> > +			mpic_eoi(mpic);
> >  		return irq;
> >  	}
>=20
> Can you tell me why you need the above ? (Why you aren't EOI'ing the
> cascade ?) Note that the cascade handling is going away from=20
> mpic anyway
> with the port to genirq that I'll publish later this week for=20
> 2.6.18 and
> it will almost be handled as a normal interrupt...
>=20

We have a level-signalled irq from the cascaded PCI interrupt =
controller. If I do EOI at=20
this time, level request will not have chance to be cleared (unless all =
PCI interrupts have
an SA_INTERRUPT flag) and result in recurring interrupts.=20

I chose to have an individual flag instead of checking model ID to avoid =
multiple checks within ISR (in case if we have more that one mpic =
version requiring this option). I also expect that it may be useful for =
any external level-signalling cascades connected to MPIC.     =20

> > -	if (unlikely(irq =3D=3D MPIC_VEC_SPURRIOUS))
> > +	if (unlikely(irq =3D=3D MPIC_VEC_SPURRIOUS)) {
> > +		if (mpic->flags & MPIC_SPV_EOI)
> > +			mpic_eoi(mpic);
> >  		return -1;
> > +	}
>=20
> I think the above thing could just test the model ID. It's=20
> unlikely that
> another implementation need the same "feature", so just test the model
> ID rather than adding a flag and if we ever have another=20
> model with the
> same "feature", then we'll go back to adding a flag :)
>=20

Motivation is the same as above - I just do not want to have multiple ID =
checks here. I agree that it is driven by mpic type (model ID) only. I =
can remove this one if you do not expect any
new "broken" MPICs on horizon. =20

> Cheers,
> Ben.
>=20
Thanks for your feedback,
Alex.
>=20
>=20

^ permalink raw reply

* Re: ppc85xx DMA
From: Naru Sundar @ 2006-06-06 18:55 UTC (permalink / raw)
  To: Liu Dave-r63238; +Cc: linuxppc-embedded
In-Reply-To: <9FCDBA58F226D911B202000BDBAD4673026FD940@zch01exm40.ap.freescale.net>

On Tue, Jun 06, 2006 at 09:39:29AM +0800, Liu Dave-r63238 wrote:
> What is the DMA transfer mode? Is direct or chaining mode?

Direct mode.  I fixed an error with my bit ordering for the configuration
registers, and now the transfer seems to complete, but I don't see any
actual data showing up in the destination register that I am writing to.

> Did you ioremap the DMA register space?

Yes, I can write the destination address manually.  So I am thinking my addresses
are wrong.

For the source and dest address I used:

dma_map_single(NULL, ptr, len, DMA_TO_DEVICE)

(which effectively does a virt_to_bus on ppc and so should just return to me
the bus address used by the dma).

-naru

^ permalink raw reply

* Re: Base address of executables - weirdness?
From: H. Peter Anvin @ 2006-06-06 18:21 UTC (permalink / raw)
  To: Linas Vepstas; +Cc: linuxppc-dev
In-Reply-To: <20060606173343.GE9294@austin.ibm.com>

Linas Vepstas wrote:
> On Tue, Jun 06, 2006 at 08:42:49AM -0700, H. Peter Anvin wrote:
>> I'm trying to track down an odd issue with klibc on ppc32.
>>
>> Until recently, binaries linked with ld defaulted to a base address of 
>> 0x10000000+SIZEOF_HEADERS.  However, recently I've gotten a couple of 
>> reports -- and I've been able to confirm this on my FC5 system -- that 
>> some versions of ld links at 0x01800000+SIZEOF_HEADERS.  Needless to 
>> say, this is more than a bit confusing, *especially* since "ld -verbose" 
>> still reports:
>>
>>      PROVIDE (__executable_start = 0x10000000); . = 0x10000000 + 
>> SIZEOF_HEADERS;
>>
>> ... at the top of the linker script.
>>
>> I'm rather baffled.  Has anyone else seen this, and/or have any other 
>> explanation?
> 
> Googling "0x01800000 linux ppc" brings up some interesting but old hits.
> 
> However, I swear I saw someone suggest a patch last week that changed
> 0x10000000 to 0x01800000 somewhere, (vmlinux.lds ??) as a proposed cure
> for a bug. Sorry, I deleted it.
> 

Well, it's worse than I previously surmised.  I can't seem to find any combination of 
options which work on both affected and unaffected binutils.  This is a real mess.

	-hpa

^ permalink raw reply

* RE: Intercept System call using Kernel  module is 2.6 kernel
From: Jenkins, Clive @ 2006-06-06 17:14 UTC (permalink / raw)
  To: Meswani, Mitesh, linuxppc-dev

>        x=3Dmitesh_func();=20
>        printf("mitesh_func returned %d\n",x);

The first thing would be to change your user-space program
to print the error number from errno after your "system call".

        x=3Dmitesh_func();=20
        printf("mitesh_func returned %d, errno=3D%d\n",x,errno);

Or you can use perror() -- look it up.

Clive
 =20


4) I verify from the system logs that when I insmod the kernel module I
get all the print statements. I verified from the logs  that the address
of the sys_call_table is correctly passed and from /proc/kallsysms I can
see that my function mitesh_func has been defined and has the address as
indicated in the logs.=20

The problem is that when I execute my user app I expect to see two
things:=20
 a) I should see a message in the log "Executing mitesh_func..." and=20
 b) A return value of 2=20

However I get an error value -1 returned.=20

Any help and ideas are highly appreciated. =20

Thank you in advance,=20
Mitesh=20

^ permalink raw reply

* Re: Intercept System call using Kernel  module is 2.6 kernel
From: Arnd Bergmann @ 2006-06-06 17:48 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Meswani, Mitesh
In-Reply-To: <C26C730943E01145B4F89E37FE0A022002BBC7A6@itdsrvmail02.utep.edu>

Am Tuesday 06 June 2006 18:25 schrieb Meswani, Mitesh:
> Any help and ideas are highly appreciated. =C2=A0

Tell your professor that the task you were given is=20

a) pointless, as you wouldn't use this kind of thing to
   solve an actual problem other than bad OS design
   homework.
b) not a correct approach regarding maintainability, since
   you can't tell for an arbitrary kernel version if
   the particular syscall you're abusing is now used for
   something else.

As a replacement task, choose one or more of the following:

=2D implement a syscall by _recompiling_ the kernel and call
  that from your user application.
=2D write a misc device driver that exposes a device to
  do ioctl() on.
=2D create a file in each of sysfs, procfs and debugfs to
  do your operation on, using read() and write().
=2D use a netlink socket for a two way communication with
  a kernel module.

	Arnd <><

^ permalink raw reply

* Re: process starvation with 2.6 scheduler
From: Thiago Galesi @ 2006-06-06 17:09 UTC (permalink / raw)
  To: Kallol Biswas; +Cc: linuxppc-dev
In-Reply-To: <478F19F21671F04298A2116393EEC3D527421D@sjc1exm08.pmc_nt.nt.pmc-sierra.bc.ca>

Did you try it with a _real_ CPU? My bet is that the timer interrupt
is overwhelming the CPU (even at 100Hz, 400kHz is too slow).

-- 
-
Thiago Galesi

^ permalink raw reply

* Re: Collecting hypervisor call stats
From: Mike Kravetz @ 2006-06-06 16:46 UTC (permalink / raw)
  To: Christopher Yeoh; +Cc: Chris Yeoh, Bryan Rosenburg, linuxppc-dev
In-Reply-To: <17534.30511.192632.558778@localhost.localdomain>

On Thu, Jun 01, 2006 at 03:12:15PM +1000, Christopher Yeoh wrote:
> Here's a patch we've used for collecting hcall counts and times.

Thanks for the patch/code Chris!  I'm using this as a basis for something
that we may want to merge into the tree.  Just a couple of questions.

Your 'wrappers' have the following general form:

> +long plpar_hcall(unsigned long opcode, unsigned long arg1,
> +			unsigned long arg2, unsigned long arg3,
> +			unsigned long arg4, unsigned long *out1,
> +			unsigned long *out2, unsigned long *out3)
> +{
> +    long retcode;
> +    unsigned long t_entry;
> +    int opcode_index;
> +    
> +    opcode_index = map_hcall_to_index(opcode);
> +    
> +    t_entry = mfspr(SPRN_PURR);
> +    barrier();
> +    
> +    retcode = plpar_hcall_real(opcode, arg1, arg2, arg3, arg4,
> +			       out1, out2, out3);
> +    
> +    barrier();
> +    get_cpu_var(hcall_type_count)[opcode_index]++;
> +    put_cpu_var(hcall_type_count);
> +    get_cpu_var(hcall_type_time)[opcode_index] += mfspr(SPRN_PURR) - t_entry;
> +    put_cpu_var(hcall_type_time);
> +    
> +    return retcode;
> +};

Can you explain the need for barrier(s) before and after the call to the
real routine?  It usually takes me a couple days of thought to figure out
exactly where these are needed. :)

The use of get_cpu_var/put_cpu_var result in disabling/enabling preemption.
I can understand why this would be desirable to assure the accuracy of the
statistics.  But, I was wondering if the desired accuracy is worth the added
overhead.  My thought was to make these as lightweight as possible and
sacrifice some accuracy if necessary.  After all, no 'internal decisions' are
being made because of this data.  It is simply exposed to user land.
Thoughts?

Thanks,
-- 
Mike

^ permalink raw reply

* Re: Intercept System call using Kernel  module is 2.6 kernel
From: Jeff.Fellin @ 2006-06-06 17:02 UTC (permalink / raw)
  To: mmeswani; +Cc: linuxppc-dev, linuxppc-dev-bounces+jeff.fellin=rflelect.com

                                                                                                                                     
                      "Meswani, Mitesh" <mmeswani@utep.edu>                                                                          
                      Sent by:                                             To:       <linuxppc-dev@ozlabs.org>                       
                      linuxppc-dev-bounces+jeff.fellin=rflelect.com        cc:                                                       
                      @ozlabs.org                                          Subject:  Intercept System call using Kernel  module is   
                                                                            2.6 kernel                                               
                                                                                                                                     
                      06/06/2006 12:25                                                                                               
                                                                                                                                     
                                                                                                                                     










>Hello


>I am attempting to run some user code with kernel space permission. I am
using the ppc64 kernel version >2.6.16-rc4-3-ppc64 for IBM Power5
processors.
>In this kernel module I am trying to implement a function that can be
called from user space.
>
>I have found through various posts that using unused system calls and
replacing them temporarily can acheive this >objective.
>
>This is what I am doing, but its not working, please bear with the
slightly long code that follows:
>
>1) since the 2.6 kernel does not export sys_call_table, I grep it from the
boot image
First sign what you are doing is not a good idea. There are better methods
of this
1) device driver interface with read/write/ioctl interface
2) procfs files from a module/driver
3) sysfs files from a module/driver

SNIP
>
>The problem is that when I execute my user app I expect to see two things:
 >a) I should see a message in the log "Executing mitesh_func..." and
> b) A return value of 2
>However I get an error value -1 returned.
An indication of thinking of system calls vs other methods is wrong!.

>Any help and ideas are highly appreciated.
Don't add your own or hijack system calls

Thank you in advance,
Mitesh
 _______________________________________________
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev

^ permalink raw reply

* Intercept System call using Kernel  module is 2.6 kernel
From: Meswani, Mitesh @ 2006-06-06 16:25 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <C26C730943E01145B4F89E37FE0A022002BBC7A2@itdsrvmail02.utep.edu>

[-- Attachment #1: Type: text/plain, Size: 2792 bytes --]

 
 
Hello 
 
 
I am attempting to run some user code with kernel space permission. I am using the ppc64 kernel version 2.6.16-rc4-3-ppc64 for IBM Power5 processors. 
In this kernel module I am trying to implement a function that can be called from user space. 
 
I have found through various posts that using unused system calls and replacing them temporarily can acheive this objective. 
 
This is what I am doing, but its not working, please bear with the slightly long code that follows: 
 
1) since the 2.6 kernel does not export sys_call_table, I grep it from the boot image
 
2) Next I write the kernel module as : 
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/syscalls.h>
unsigned long **sctable;
void *org_func;  /***** Copy of the original calls address ********/

asmlinkage int mitesh_func(void)   
{ 
        printk(KERN_ALERT "Executing mitesh_func...\n"); 
        return 2;
} 

int init_module(void)
{
 unsigned long ptr;
 unsigned long *p;
 ptr = 0x23203404;  /*** some hard coded addresses from grepping for sys_call_table *****/
  p = (unsigned long *)ptr;
  sctable = (unsigned long **)p;
  printk("The address of the system call table is: 0x%x\n",&sctable[0]);
  printk("The address of syscall #137 is: 0x%x\n",sctable[137]);

org_func = (void *) (sctable[137]);  /**** Store the original sys call ****/
 printk("Original func address 0x%x stored \n",org_func);
 sctable[137] = (void *) mitesh_func;  /**** replace with mitesh_func ****/
printk("The new sys call address is 0x%x and stored as : 0x%x\n",mitesh_func, sctable[137]);

  return 0; 
}
void cleanup_module(void)

{
        sctable[137] = (void *) org_func; 
        printk("Upon module unload the sctable #137 address is :0x%x\n",sctable[137]);
        printk("Module is unloaded!\n");
}

3) My user app looks like this:
#include <stdio.h> 
#include <errno.h> 
#include <asm-ppc64/unistd.h> 
#define __NR_mitesh_func 137 
 
_syscall0(int, mitesh_func); 
void main() 
{
        int x=0; 
        x=mitesh_func(); 
        printf("mitesh_func returned %d\n",x);
}  

 
4) I verify from the system logs that when I insmod the kernel module I get all the print statements. I verified from the logs  that the address of the sys_call_table is correctly passed and from /proc/kallsysms I can see that my function mitesh_func has been defined and has the address as indicated in the logs. 
 
The problem is that when I execute my user app I expect to see two things: 
 a) I should see a message in the log "Executing mitesh_func..." and 
 b) A return value of 2 
 
However I get an error value -1 returned. 
 
Any help and ideas are highly appreciated.  
 
Thank you in advance, 
Mitesh 
 

[-- Attachment #2: Type: text/html, Size: 5523 bytes --]

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox