* [PATCH] ia64 oprofile support for 2.6.0-test4
@ 2003-08-26 20:42 Will Cohen
2003-08-26 20:58 ` David Mosberger
` (7 more replies)
0 siblings, 8 replies; 9+ messages in thread
From: Will Cohen @ 2003-08-26 20:42 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 498 bytes --]
I have revised the oprofile patch for ia64 to provide just the basic
timer interrupt mechanism to avoid interferring with the perfmon 2.0
support. I have verified the oprofile ia64 patch applies cleanly and
build a working kernel with the 2.6.0-test4 kernel and the
linux-2.6.0-test4-ia64-030826.diff.bz2 patch.
More work is required to get oprofile to work with the perfmon 2.0.
However, the patch as it is currently implements should not cause
problems for the perfom 2.0 support.
-Will
[-- Attachment #2: oprof20030825d.patch --]
[-- Type: text/plain, Size: 6219 bytes --]
--- linux-2.6.0-test4-bk2oprof/arch/ia64/kernel/time.c.orig 2003-08-22 19:53:07.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/kernel/time.c 2003-08-26 15:15:35.568906131 -0400
@@ -18,6 +18,7 @@
#include <linux/interrupt.h>
#include <linux/efi.h>
#include <linux/timex.h>
+#include <linux/profile.h>
#include <asm/delay.h>
#include <asm/hw_irq.h>
@@ -39,29 +40,6 @@
#endif
static void
-do_profile (unsigned long ip)
-{
- extern cpumask_t prof_cpu_mask;
-
- if (!prof_buffer)
- return;
-
- if (!cpu_isset(smp_processor_id(), prof_cpu_mask))
- return;
-
- ip -= (unsigned long) _stext;
- ip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds IP values silently, put them into the last
- * histogram slot, so if present, they will show up as a sharp peak.
- */
- if (ip > prof_len - 1)
- ip = prof_len - 1;
-
- atomic_inc((atomic_t *) &prof_buffer[ip]);
-}
-
-static void
itc_reset (void)
{
}
@@ -199,6 +177,47 @@
tv->tv_usec = usec;
}
+/*
+ * The profiling function is SMP safe. (nothing can mess
+ * around with "current", and the profiling counters are
+ * updated with atomic operations). This is especially
+ * useful with a profiling multiplier != 1
+ */
+static inline void
+ia64_do_profile(struct pt_regs * regs)
+{
+ unsigned long eip;
+ extern unsigned long prof_cpu_mask;
+
+ profile_hook(regs);
+
+ if (user_mode(regs))
+ return;
+
+ if (!prof_buffer)
+ return;
+
+ eip = instruction_pointer(regs);
+
+ /*
+ * Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
+ * (default is all CPUs.)
+ */
+ if (!((1<<smp_processor_id()) & prof_cpu_mask))
+ return;
+
+ eip -= (unsigned long) &_stext;
+ eip >>= prof_shift;
+ /*
+ * Don't ignore out-of-bounds EIP values silently,
+ * put them into the last histogram slot, so if
+ * present, they will show up as a sharp peak.
+ */
+ if (eip > prof_len-1)
+ eip = prof_len-1;
+ atomic_inc((atomic_t *)&prof_buffer[eip]);
+}
+
static irqreturn_t
timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
{
@@ -210,14 +229,9 @@
printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
ia64_get_itc(), new_itm);
+ ia64_do_profile(regs);
+
while (1) {
- /*
- * Do kernel PC profiling here. We multiply the instruction number by
- * four so that we can use a prof_shift of 2 to get instruction-level
- * instead of just bundle-level accuracy.
- */
- if (!user_mode(regs))
- do_profile(regs->cr_iip + 4*ia64_psr(regs)->ri);
#ifdef CONFIG_SMP
smp_do_timer(regs);
--- linux-2.6.0-test4-bk2oprof/arch/ia64/Makefile.orig 2003-08-22 19:51:04.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/Makefile 2003-08-25 14:35:58.000000000 -0400
@@ -65,6 +65,7 @@
drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/
+drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/
boot := arch/ia64/hp/sim/boot
--- /dev/null 2003-08-22 16:30:19.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/oprofile/init.c 2003-08-25 15:58:25.000000000 -0400
@@ -0,0 +1,25 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+extern void timer_init(struct oprofile_operations ** ops);
+
+int __init oprofile_arch_init(struct oprofile_operations ** ops)
+{
+ return -ENODEV;
+}
+
+
+void oprofile_arch_exit(void)
+{
+}
--- /dev/null 2003-08-22 16:30:19.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/oprofile/Kconfig 2003-08-25 14:35:58.000000000 -0400
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+ depends on EXPERIMENTAL
+
+config PROFILING
+ bool "Profiling support (EXPERIMENTAL)"
+ help
+ Say Y here to enable the extended profiling support mechanisms used
+ by profilers such as OProfile.
+
+
+config OPROFILE
+ tristate "OProfile system profiling (EXPERIMENTAL)"
+ depends on PROFILING
+ help
+ OProfile is a profiling system capable of profiling the
+ whole system, include the kernel, kernel modules, libraries,
+ and applications.
+
+ If unsure, say N.
+
+endmenu
+
--- /dev/null 2003-08-22 16:30:19.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/oprofile/Makefile 2003-08-25 14:35:58.000000000 -0400
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) init.o
--- linux-2.6.0-test4-bk2oprof/arch/ia64/Kconfig.orig 2003-08-25 11:29:46.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/Kconfig 2003-08-25 14:35:58.000000000 -0400
@@ -589,6 +589,8 @@
source "arch/ia64/hp/sim/Kconfig"
+source "arch/ia64/oprofile/Kconfig"
+
menu "Kernel hacking"
--- linux-2.6.0-test4-bk2oprof/include/asm-ia64/hw_irq.h.orig 2003-08-22 19:55:39.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/include/asm-ia64/hw_irq.h 2003-08-26 15:13:40.369970010 -0400
@@ -9,6 +9,7 @@
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/types.h>
+#include <linux/profile.h>
#include <asm/machvec.h>
#include <asm/ptrace.h>
--- linux-2.6.0-test4-bk2oprof/include/asm-ia64/ptrace.h.orig 2003-08-22 19:57:23.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/include/asm-ia64/ptrace.h 2003-08-26 15:12:29.157256063 -0400
@@ -223,6 +223,13 @@
};
#ifdef __KERNEL__
+/*
+ * We use the ia64_psr(regs)->ri to determine which of the three
+ * instructions in bundle took the sample. The instructions in the
+ * ia64 do not fall on nice four byte boundaries, so there is no point
+ * in multiplying ia64_psr(regs)->ri by 4.
+ */
+#define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
/* given a pointer to a task_struct, return the user's pt_regs */
# define ia64_task_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)
# define ia64_psr(regs) ((struct ia64_psr *) &(regs)->cr_ipsr)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] ia64 oprofile support for 2.6.0-test4
2003-08-26 20:42 [PATCH] ia64 oprofile support for 2.6.0-test4 Will Cohen
@ 2003-08-26 20:58 ` David Mosberger
2003-08-26 21:19 ` Will Cohen
` (6 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: David Mosberger @ 2003-08-26 20:58 UTC (permalink / raw)
To: linux-ia64
>>>>> On Tue, 26 Aug 2003 16:42:27 -0400, Will Cohen <wcohen@redhat.com> said:
Will> + eip = instruction_pointer(regs);
eip? How about calling it "ip", which is the register name and what's
used everywhere else in the ia64 tree.
Will> +/*
Will> + * We use the ia64_psr(regs)->ri to determine which of the three
Will> + * instructions in bundle took the sample. The instructions in the
Will> + * ia64 do not fall on nice four byte boundaries, so there is no point
Will> + * in multiplying ia64_psr(regs)->ri by 4.
Will> + */
Will> +#define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
How are you going to get instruction-level precision with this?
Given this:
Will> - ip >>= prof_shift;
you'd have to use a prof_shift of 0, which is wasteful. If you
multiply ri by 4, you can use a prof_shift of 2, reducing the
histogram size by a factor of 4 while still getting instruction-level
accuracy.
I can see why you don't want to do the multiply-by-four in
instruction_pointer(), but if that's what you want to avoid, I think
ia64_do_profile() should should do it so we can get the desired
effect.
--david
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] ia64 oprofile support for 2.6.0-test4
2003-08-26 20:42 [PATCH] ia64 oprofile support for 2.6.0-test4 Will Cohen
2003-08-26 20:58 ` David Mosberger
@ 2003-08-26 21:19 ` Will Cohen
2003-08-26 21:51 ` Will Cohen
` (5 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Will Cohen @ 2003-08-26 21:19 UTC (permalink / raw)
To: linux-ia64
David Mosberger wrote:
>>>>>>On Tue, 26 Aug 2003 16:42:27 -0400, Will Cohen <wcohen@redhat.com> said:
>>>>>
>
> Will> + eip = instruction_pointer(regs);
>
> eip? How about calling it "ip", which is the register name and what's
> used everywhere else in the ia64 tree.
>
> Will> +/*
> Will> + * We use the ia64_psr(regs)->ri to determine which of the three
> Will> + * instructions in bundle took the sample. The instructions in the
> Will> + * ia64 do not fall on nice four byte boundaries, so there is no point
> Will> + * in multiplying ia64_psr(regs)->ri by 4.
> Will> + */
> Will> +#define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
>
> How are you going to get instruction-level precision with this?
I looked at gdb handled this. It appeared for setting break points and
disassembling code gdb handles the instructions in the bundle in this
manner. Putting the instruction number as the low bits in the address,
0, 1, or 2.
> Given this:
>
> Will> - ip >>= prof_shift;
>
> you'd have to use a prof_shift of 0, which is wasteful. If you
> multiply ri by 4, you can use a prof_shift of 2, reducing the
> histogram size by a factor of 4 while still getting instruction-level
> accuracy.
> I can see why you don't want to do the multiply-by-four in
> instruction_pointer(), but if that's what you want to avoid, I think
> ia64_do_profile() should should do it so we can get the desired
> effect.
>
> --david
For OProfile it didn't make a difference, but for the histograms it is
wasteful. I didn't think about the impact on the traditional histogram.
There are two different ways that the instructions within a bundle are
being handled: gdb and the kernel profiling code. I chose the gdb approach.
Do the histogram analysis tools currently handle the fiction of bundle
instruction 0 at address ending in 0, bundle instruction 1 ending in 4,
and bundle instruction 2 in 8? GDB doesn't.
-Will
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] ia64 oprofile support for 2.6.0-test4
2003-08-26 20:42 [PATCH] ia64 oprofile support for 2.6.0-test4 Will Cohen
2003-08-26 20:58 ` David Mosberger
2003-08-26 21:19 ` Will Cohen
@ 2003-08-26 21:51 ` Will Cohen
2003-08-26 21:58 ` David Mosberger
` (4 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Will Cohen @ 2003-08-26 21:51 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 1385 bytes --]
I have revised the patch based on the comments below:
1) use ip instead of eip
2) multiply ri by 4, to get more compact histograms
-Will
David Mosberger wrote:
>>>>>>On Tue, 26 Aug 2003 16:42:27 -0400, Will Cohen <wcohen@redhat.com> said:
>>>>>
>
> Will> + eip = instruction_pointer(regs);
>
> eip? How about calling it "ip", which is the register name and what's
> used everywhere else in the ia64 tree.
>
> Will> +/*
> Will> + * We use the ia64_psr(regs)->ri to determine which of the three
> Will> + * instructions in bundle took the sample. The instructions in the
> Will> + * ia64 do not fall on nice four byte boundaries, so there is no point
> Will> + * in multiplying ia64_psr(regs)->ri by 4.
> Will> + */
> Will> +#define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
>
> How are you going to get instruction-level precision with this?
>
> Given this:
>
> Will> - ip >>= prof_shift;
>
> you'd have to use a prof_shift of 0, which is wasteful. If you
> multiply ri by 4, you can use a prof_shift of 2, reducing the
> histogram size by a factor of 4 while still getting instruction-level
> accuracy.
>
> I can see why you don't want to do the multiply-by-four in
> instruction_pointer(), but if that's what you want to avoid, I think
> ia64_do_profile() should should do it so we can get the desired
> effect.
>
> --david
[-- Attachment #2: oprof20030825e.patch --]
[-- Type: text/plain, Size: 6259 bytes --]
--- linux-2.6.0-test4-bk2oprof/arch/ia64/kernel/time.c.orig 2003-08-22 19:53:07.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/kernel/time.c 2003-08-26 17:33:13.112032287 -0400
@@ -18,6 +18,7 @@
#include <linux/interrupt.h>
#include <linux/efi.h>
#include <linux/timex.h>
+#include <linux/profile.h>
#include <asm/delay.h>
#include <asm/hw_irq.h>
@@ -39,29 +40,6 @@
#endif
static void
-do_profile (unsigned long ip)
-{
- extern cpumask_t prof_cpu_mask;
-
- if (!prof_buffer)
- return;
-
- if (!cpu_isset(smp_processor_id(), prof_cpu_mask))
- return;
-
- ip -= (unsigned long) _stext;
- ip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds IP values silently, put them into the last
- * histogram slot, so if present, they will show up as a sharp peak.
- */
- if (ip > prof_len - 1)
- ip = prof_len - 1;
-
- atomic_inc((atomic_t *) &prof_buffer[ip]);
-}
-
-static void
itc_reset (void)
{
}
@@ -199,6 +177,47 @@
tv->tv_usec = usec;
}
+/*
+ * The profiling function is SMP safe. (nothing can mess
+ * around with "current", and the profiling counters are
+ * updated with atomic operations). This is especially
+ * useful with a profiling multiplier != 1
+ */
+static inline void
+ia64_do_profile(struct pt_regs * regs)
+{
+ unsigned long ip;
+ extern unsigned long prof_cpu_mask;
+
+ profile_hook(regs);
+
+ if (user_mode(regs))
+ return;
+
+ if (!prof_buffer)
+ return;
+
+ ip = instruction_pointer(regs);
+
+ /*
+ * Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
+ * (default is all CPUs.)
+ */
+ if (!((1<<smp_processor_id()) & prof_cpu_mask))
+ return;
+
+ ip -= (unsigned long) &_stext;
+ ip >>= prof_shift;
+ /*
+ * Don't ignore out-of-bounds IP values silently,
+ * put them into the last histogram slot, so if
+ * present, they will show up as a sharp peak.
+ */
+ if (ip > prof_len-1)
+ ip = prof_len-1;
+ atomic_inc((atomic_t *)&prof_buffer[ip]);
+}
+
static irqreturn_t
timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
{
@@ -210,14 +229,9 @@
printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
ia64_get_itc(), new_itm);
+ ia64_do_profile(regs);
+
while (1) {
- /*
- * Do kernel PC profiling here. We multiply the instruction number by
- * four so that we can use a prof_shift of 2 to get instruction-level
- * instead of just bundle-level accuracy.
- */
- if (!user_mode(regs))
- do_profile(regs->cr_iip + 4*ia64_psr(regs)->ri);
#ifdef CONFIG_SMP
smp_do_timer(regs);
--- linux-2.6.0-test4-bk2oprof/arch/ia64/Makefile.orig 2003-08-22 19:51:04.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/Makefile 2003-08-25 14:35:58.000000000 -0400
@@ -65,6 +65,7 @@
drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/
+drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/
boot := arch/ia64/hp/sim/boot
--- /dev/null 2003-08-22 16:30:19.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/oprofile/init.c 2003-08-25 15:58:25.000000000 -0400
@@ -0,0 +1,25 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+extern void timer_init(struct oprofile_operations ** ops);
+
+int __init oprofile_arch_init(struct oprofile_operations ** ops)
+{
+ return -ENODEV;
+}
+
+
+void oprofile_arch_exit(void)
+{
+}
--- /dev/null 2003-08-22 16:30:19.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/oprofile/Kconfig 2003-08-25 14:35:58.000000000 -0400
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+ depends on EXPERIMENTAL
+
+config PROFILING
+ bool "Profiling support (EXPERIMENTAL)"
+ help
+ Say Y here to enable the extended profiling support mechanisms used
+ by profilers such as OProfile.
+
+
+config OPROFILE
+ tristate "OProfile system profiling (EXPERIMENTAL)"
+ depends on PROFILING
+ help
+ OProfile is a profiling system capable of profiling the
+ whole system, include the kernel, kernel modules, libraries,
+ and applications.
+
+ If unsure, say N.
+
+endmenu
+
--- /dev/null 2003-08-22 16:30:19.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/oprofile/Makefile 2003-08-25 14:35:58.000000000 -0400
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) init.o
--- linux-2.6.0-test4-bk2oprof/arch/ia64/Kconfig.orig 2003-08-25 11:29:46.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/Kconfig 2003-08-25 14:35:58.000000000 -0400
@@ -589,6 +589,8 @@
source "arch/ia64/hp/sim/Kconfig"
+source "arch/ia64/oprofile/Kconfig"
+
menu "Kernel hacking"
--- linux-2.6.0-test4-bk2oprof/include/asm-ia64/hw_irq.h.orig 2003-08-22 19:55:39.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/include/asm-ia64/hw_irq.h 2003-08-26 15:13:40.000000000 -0400
@@ -9,6 +9,7 @@
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/types.h>
+#include <linux/profile.h>
#include <asm/machvec.h>
#include <asm/ptrace.h>
--- linux-2.6.0-test4-bk2oprof/include/asm-ia64/ptrace.h.orig 2003-08-22 19:57:23.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/include/asm-ia64/ptrace.h 2003-08-26 17:40:59.829146472 -0400
@@ -223,6 +223,14 @@
};
#ifdef __KERNEL__
+/*
+ * We use the ia64_psr(regs)->ri to determine which of the three
+ * instructions in bundle (16 bytes) took the sample. The instructions
+ * in the ia64 do not fall on nice four byte boundaries. However, to
+ * save space in the histogram, the instructions are mapped to 4
+ * byte boundaries.
+ */
+#define instruction_pointer(regs) ((regs)->cr_iip + 4*ia64_psr(regs)->ri)
/* given a pointer to a task_struct, return the user's pt_regs */
# define ia64_task_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)
# define ia64_psr(regs) ((struct ia64_psr *) &(regs)->cr_ipsr)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] ia64 oprofile support for 2.6.0-test4
2003-08-26 20:42 [PATCH] ia64 oprofile support for 2.6.0-test4 Will Cohen
` (2 preceding siblings ...)
2003-08-26 21:51 ` Will Cohen
@ 2003-08-26 21:58 ` David Mosberger
2003-08-26 22:02 ` Will Cohen
` (3 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: David Mosberger @ 2003-08-26 21:58 UTC (permalink / raw)
To: linux-ia64
>>>>> On Tue, 26 Aug 2003 17:19:14 -0400, Will Cohen <wcohen@redhat.com> said:
Will> For OProfile it didn't make a difference, but for the
Will> histograms it is wasteful. I didn't think about the impact on
Will> the traditional histogram.
You can't just break existing data formats for absolutely no reason.
Will> Do the histogram analysis tools currently handle
Yes. For example, I use "readprofile -b" from time to time to get
instruction-level histograms.
Will> the fiction of bundle instruction 0 at address ending in 0,
Will> bundle instruction 1 ending in 4, and bundle instruction 2 in
Will> 8? GDB doesn't.
And encoding the slot number in bits 0 and 1 is somehow less
fictitious? ;-)
--david
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] ia64 oprofile support for 2.6.0-test4
2003-08-26 20:42 [PATCH] ia64 oprofile support for 2.6.0-test4 Will Cohen
` (3 preceding siblings ...)
2003-08-26 21:58 ` David Mosberger
@ 2003-08-26 22:02 ` Will Cohen
2003-08-26 22:05 ` David Mosberger
` (2 subsequent siblings)
7 siblings, 0 replies; 9+ messages in thread
From: Will Cohen @ 2003-08-26 22:02 UTC (permalink / raw)
To: linux-ia64
David Mosberger wrote:
>>>>>>On Tue, 26 Aug 2003 17:19:14 -0400, Will Cohen <wcohen@redhat.com> said:
>>>>>
>
> Will> For OProfile it didn't make a difference, but for the
> Will> histograms it is wasteful. I didn't think about the impact on
> Will> the traditional histogram.
>
> You can't just break existing data formats for absolutely no reason.
>
> Will> Do the histogram analysis tools currently handle
>
> Yes. For example, I use "readprofile -b" from time to time to get
> instruction-level histograms.
>
> Will> the fiction of bundle instruction 0 at address ending in 0,
> Will> bundle instruction 1 ending in 4, and bundle instruction 2 in
> Will> 8? GDB doesn't.
>
> And encoding the slot number in bits 0 and 1 is somehow less
> fictitious? ;-)
They are equally fictitious. I just happen to pick the wrong one. I
changed the patch to be compatible with the existing data format used
for the kernel profiling.
-Will
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] ia64 oprofile support for 2.6.0-test4
2003-08-26 20:42 [PATCH] ia64 oprofile support for 2.6.0-test4 Will Cohen
` (4 preceding siblings ...)
2003-08-26 22:02 ` Will Cohen
@ 2003-08-26 22:05 ` David Mosberger
2003-08-27 13:57 ` Will Cohen
2003-08-28 23:35 ` David Mosberger
7 siblings, 0 replies; 9+ messages in thread
From: David Mosberger @ 2003-08-26 22:05 UTC (permalink / raw)
To: linux-ia64
>>>>> On Tue, 26 Aug 2003 17:51:15 -0400, Will Cohen <wcohen@redhat.com> said:
Will> I have revised the patch based on the comments below: 1) use
Will> ip instead of eip 2) multiply ri by 4, to get more compact
Will> histograms
Hmmh, I'm not sure I like this patch better. Like I mentioned in the
earlier mail, it does make sense to encode the slot number in bits 0
and 1 for instruction_pointer(). That is the canonical representation
used by IA-64 Linux (and ELF, gdb, etc.). The traditional histogram
is a special case, because there it is more useful to get the slot
number bits close to the bundle-address bits, so I think we should
special-case this in ia64_do_profile() instead. Perhaps something
along the lines of:
ip = instruction_pointer(regs);
/* for histogram, encode slot bits in address bits 2 and 3: */
slot = ip & 3;
ip = (ip & ~3UL) + 4*slot;
--david
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] ia64 oprofile support for 2.6.0-test4
2003-08-26 20:42 [PATCH] ia64 oprofile support for 2.6.0-test4 Will Cohen
` (5 preceding siblings ...)
2003-08-26 22:05 ` David Mosberger
@ 2003-08-27 13:57 ` Will Cohen
2003-08-28 23:35 ` David Mosberger
7 siblings, 0 replies; 9+ messages in thread
From: Will Cohen @ 2003-08-27 13:57 UTC (permalink / raw)
To: linux-ia64
[-- Attachment #1: Type: text/plain, Size: 1101 bytes --]
Revised the instruction_pointer to return the canonical ip and
ia64_do_profile() to adjust the resulting ip value for the histogram.
-Will
David Mosberger wrote:
>>>>>>On Tue, 26 Aug 2003 17:51:15 -0400, Will Cohen <wcohen@redhat.com> said:
>>>>>
>
> Will> I have revised the patch based on the comments below: 1) use
> Will> ip instead of eip 2) multiply ri by 4, to get more compact
> Will> histograms
>
> Hmmh, I'm not sure I like this patch better. Like I mentioned in the
> earlier mail, it does make sense to encode the slot number in bits 0
> and 1 for instruction_pointer(). That is the canonical representation
> used by IA-64 Linux (and ELF, gdb, etc.). The traditional histogram
> is a special case, because there it is more useful to get the slot
> number bits close to the bundle-address bits, so I think we should
> special-case this in ia64_do_profile() instead. Perhaps something
> along the lines of:
>
> ip = instruction_pointer(regs);
> /* for histogram, encode slot bits in address bits 2 and 3: */
> slot = ip & 3;
> ip = (ip & ~3UL) + 4*slot;
>
> --david
[-- Attachment #2: oprof20030825f.patch --]
[-- Type: text/plain, Size: 6326 bytes --]
--- linux-2.6.0-test4-bk2oprof/arch/ia64/kernel/time.c.orig 2003-08-22 19:53:07.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/kernel/time.c 2003-08-27 09:42:42.731374187 -0400
@@ -18,6 +18,7 @@
#include <linux/interrupt.h>
#include <linux/efi.h>
#include <linux/timex.h>
+#include <linux/profile.h>
#include <asm/delay.h>
#include <asm/hw_irq.h>
@@ -39,29 +40,6 @@
#endif
static void
-do_profile (unsigned long ip)
-{
- extern cpumask_t prof_cpu_mask;
-
- if (!prof_buffer)
- return;
-
- if (!cpu_isset(smp_processor_id(), prof_cpu_mask))
- return;
-
- ip -= (unsigned long) _stext;
- ip >>= prof_shift;
- /*
- * Don't ignore out-of-bounds IP values silently, put them into the last
- * histogram slot, so if present, they will show up as a sharp peak.
- */
- if (ip > prof_len - 1)
- ip = prof_len - 1;
-
- atomic_inc((atomic_t *) &prof_buffer[ip]);
-}
-
-static void
itc_reset (void)
{
}
@@ -199,6 +177,52 @@
tv->tv_usec = usec;
}
+/*
+ * The profiling function is SMP safe. (nothing can mess
+ * around with "current", and the profiling counters are
+ * updated with atomic operations). This is especially
+ * useful with a profiling multiplier != 1
+ */
+static inline void
+ia64_do_profile(struct pt_regs * regs)
+{
+ unsigned long ip, slot;
+ extern unsigned long prof_cpu_mask;
+
+ profile_hook(regs);
+
+ if (user_mode(regs))
+ return;
+
+ if (!prof_buffer)
+ return;
+
+ ip = instruction_pointer(regs);
+ /* Conserve space in histogram by encoding slot bits in address
+ * bits 2 and 3 rather than bits 0 and 1.
+ */
+ slot = ip & 3;
+ ip = (ip & ~3UL) + 4*slot;
+
+ /*
+ * Only measure the CPUs specified by /proc/irq/prof_cpu_mask.
+ * (default is all CPUs.)
+ */
+ if (!((1<<smp_processor_id()) & prof_cpu_mask))
+ return;
+
+ ip -= (unsigned long) &_stext;
+ ip >>= prof_shift;
+ /*
+ * Don't ignore out-of-bounds IP values silently,
+ * put them into the last histogram slot, so if
+ * present, they will show up as a sharp peak.
+ */
+ if (ip > prof_len-1)
+ ip = prof_len-1;
+ atomic_inc((atomic_t *)&prof_buffer[ip]);
+}
+
static irqreturn_t
timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
{
@@ -210,14 +234,9 @@
printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
ia64_get_itc(), new_itm);
+ ia64_do_profile(regs);
+
while (1) {
- /*
- * Do kernel PC profiling here. We multiply the instruction number by
- * four so that we can use a prof_shift of 2 to get instruction-level
- * instead of just bundle-level accuracy.
- */
- if (!user_mode(regs))
- do_profile(regs->cr_iip + 4*ia64_psr(regs)->ri);
#ifdef CONFIG_SMP
smp_do_timer(regs);
--- linux-2.6.0-test4-bk2oprof/arch/ia64/Makefile.orig 2003-08-22 19:51:04.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/Makefile 2003-08-25 14:35:58.000000000 -0400
@@ -65,6 +65,7 @@
drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/
+drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/
boot := arch/ia64/hp/sim/boot
--- /dev/null 2003-08-22 16:30:19.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/oprofile/init.c 2003-08-25 15:58:25.000000000 -0400
@@ -0,0 +1,25 @@
+/**
+ * @file init.c
+ *
+ * @remark Copyright 2002 OProfile authors
+ * @remark Read the file COPYING
+ *
+ * @author John Levon <levon@movementarian.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/oprofile.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+extern void timer_init(struct oprofile_operations ** ops);
+
+int __init oprofile_arch_init(struct oprofile_operations ** ops)
+{
+ return -ENODEV;
+}
+
+
+void oprofile_arch_exit(void)
+{
+}
--- /dev/null 2003-08-22 16:30:19.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/oprofile/Kconfig 2003-08-25 14:35:58.000000000 -0400
@@ -0,0 +1,23 @@
+
+menu "Profiling support"
+ depends on EXPERIMENTAL
+
+config PROFILING
+ bool "Profiling support (EXPERIMENTAL)"
+ help
+ Say Y here to enable the extended profiling support mechanisms used
+ by profilers such as OProfile.
+
+
+config OPROFILE
+ tristate "OProfile system profiling (EXPERIMENTAL)"
+ depends on PROFILING
+ help
+ OProfile is a profiling system capable of profiling the
+ whole system, include the kernel, kernel modules, libraries,
+ and applications.
+
+ If unsure, say N.
+
+endmenu
+
--- /dev/null 2003-08-22 16:30:19.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/oprofile/Makefile 2003-08-25 14:35:58.000000000 -0400
@@ -0,0 +1,9 @@
+obj-$(CONFIG_OPROFILE) += oprofile.o
+
+DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
+ oprof.o cpu_buffer.o buffer_sync.o \
+ event_buffer.o oprofile_files.o \
+ oprofilefs.o oprofile_stats.o \
+ timer_int.o )
+
+oprofile-y := $(DRIVER_OBJS) init.o
--- linux-2.6.0-test4-bk2oprof/arch/ia64/Kconfig.orig 2003-08-25 11:29:46.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/arch/ia64/Kconfig 2003-08-25 14:35:58.000000000 -0400
@@ -589,6 +589,8 @@
source "arch/ia64/hp/sim/Kconfig"
+source "arch/ia64/oprofile/Kconfig"
+
menu "Kernel hacking"
--- linux-2.6.0-test4-bk2oprof/include/asm-ia64/hw_irq.h.orig 2003-08-22 19:55:39.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/include/asm-ia64/hw_irq.h 2003-08-26 15:13:40.000000000 -0400
@@ -9,6 +9,7 @@
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/types.h>
+#include <linux/profile.h>
#include <asm/machvec.h>
#include <asm/ptrace.h>
--- linux-2.6.0-test4-bk2oprof/include/asm-ia64/ptrace.h.orig 2003-08-22 19:57:23.000000000 -0400
+++ linux-2.6.0-test4-bk2oprof/include/asm-ia64/ptrace.h 2003-08-27 09:44:09.313087486 -0400
@@ -223,6 +223,12 @@
};
#ifdef __KERNEL__
+/*
+ * We use the ia64_psr(regs)->ri to determine which of the three
+ * instructions in bundle (16 bytes) took the sample. Generate
+ * the canonical representation by adding to instruction pointer.
+ */
+#define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri)
/* given a pointer to a task_struct, return the user's pt_regs */
# define ia64_task_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1)
# define ia64_psr(regs) ((struct ia64_psr *) &(regs)->cr_ipsr)
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] ia64 oprofile support for 2.6.0-test4
2003-08-26 20:42 [PATCH] ia64 oprofile support for 2.6.0-test4 Will Cohen
` (6 preceding siblings ...)
2003-08-27 13:57 ` Will Cohen
@ 2003-08-28 23:35 ` David Mosberger
7 siblings, 0 replies; 9+ messages in thread
From: David Mosberger @ 2003-08-28 23:35 UTC (permalink / raw)
To: linux-ia64
>>>>> On Wed, 27 Aug 2003 09:57:52 -0400, Will Cohen <wcohen@redhat.com> said:
Will> Revised the instruction_pointer to return the canonical ip and
Will> ia64_do_profile() to adjust the resulting ip value for the
Will> histogram.
Thanks, I applied the patch now, with the following minor changes:
- replaced open-coded prof_cpu_mask test with "cpu_isset()" (otherwise
things don't compile with >64 CPUs)
- fixed some extra blank lines/whitespace issues
I'll push the updated tree a bit later today. Once you had a chance
to test it, please let me know if I broke something.
Thanks,
--david
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2003-08-28 23:35 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2003-08-26 20:42 [PATCH] ia64 oprofile support for 2.6.0-test4 Will Cohen
2003-08-26 20:58 ` David Mosberger
2003-08-26 21:19 ` Will Cohen
2003-08-26 21:51 ` Will Cohen
2003-08-26 21:58 ` David Mosberger
2003-08-26 22:02 ` Will Cohen
2003-08-26 22:05 ` David Mosberger
2003-08-27 13:57 ` Will Cohen
2003-08-28 23:35 ` David Mosberger
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox