public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] perf-counter bits
@ 2009-05-15 13:19 Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

pending perf counter bits
-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/4] perf_counter: remove perf_disable/enable exports
  2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
@ 2009-05-15 13:19 ` Peter Zijlstra
  2009-05-15 14:43   ` [tip:perfcounters/core] " tip-bot for Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 2/4] perf_counter: per user mlock gift Peter Zijlstra
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf_counter-remove-export.patch --]
[-- Type: text/plain, Size: 825 bytes --]

Now that ACPI idle doesn't use it anymore, remove the exports.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Paul Mackerras <paulus@samba.org>
CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
---
 kernel/perf_counter.c |    2 --
 1 file changed, 2 deletions(-)

Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -90,14 +90,12 @@ void perf_disable(void)
 	__perf_disable();
 	hw_perf_disable();
 }
-EXPORT_SYMBOL_GPL(perf_disable); /* ACPI idle */
 
 void perf_enable(void)
 {
 	if (__perf_enable())
 		hw_perf_enable();
 }
-EXPORT_SYMBOL_GPL(perf_enable); /* ACPI idle */
 
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)

-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/4] perf_counter: per user mlock gift
  2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
@ 2009-05-15 13:19 ` Peter Zijlstra
  2009-05-15 14:43   ` [tip:perfcounters/core] " tip-bot for Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
  3 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf_counter-mlock.patch --]
[-- Type: text/plain, Size: 3284 bytes --]

Instead of a per-process mlock gift for perf-counters, use a per-user gift
so that there is less of a DoS potential.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Paul Mackerras <paulus@samba.org>
CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
---
 include/linux/sched.h |    4 ++++
 kernel/perf_counter.c |   24 ++++++++++++++++--------
 2 files changed, 20 insertions(+), 8 deletions(-)

Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -45,7 +45,7 @@ static atomic_t nr_munmap_tracking __rea
 static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
-int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
+int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1522,6 +1522,9 @@ static void perf_mmap_close(struct vm_ar
 
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= counter->data->nr_locked;
 		perf_mmap_data_free(counter);
 		mutex_unlock(&counter->mmap_mutex);
@@ -1537,11 +1540,13 @@ static struct vm_operations_struct perf_
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
+	struct user_struct *user = current_user();
 	unsigned long vma_size;
 	unsigned long nr_pages;
+	unsigned long user_locked, user_lock_limit;
 	unsigned long locked, lock_limit;
+	long user_extra, extra;
 	int ret = 0;
-	long extra;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
@@ -1569,15 +1574,17 @@ static int perf_mmap(struct file *file, 
 		goto unlock;
 	}
 
-	extra = nr_pages /* + 1 only account the data pages */;
-	extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
-	if (extra < 0)
-		extra = 0;
-
-	locked = vma->vm_mm->locked_vm + extra;
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
+
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
 
 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 		ret = -EPERM;
@@ -1590,6 +1597,7 @@ static int perf_mmap(struct file *file, 
 		goto unlock;
 
 	atomic_set(&counter->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
 	vma->vm_mm->locked_vm += extra;
 	counter->data->nr_locked = extra;
 unlock:
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -675,6 +675,10 @@ struct user_struct {
 	struct work_struct work;
 #endif
 #endif
+
+#ifdef CONFIG_PERF_COUNTERS
+	atomic_long_t locked_vm;
+#endif
 };
 
 extern int uids_sysfs_init(void);

-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3/4] perf_counter: frequency based adaptive irq_period
  2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 2/4] perf_counter: per user mlock gift Peter Zijlstra
@ 2009-05-15 13:19 ` Peter Zijlstra
  2009-05-15 13:35   ` Ingo Molnar
                     ` (2 more replies)
  2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
  3 siblings, 3 replies; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf_counter-freq.patch --]
[-- Type: text/plain, Size: 8951 bytes --]

Instead of specifying the irq_period for a counter, provide a target interrupt
frequency and dynamically adapt the irq_period to match this frequency.

Paul, please verify ppc64, I only did a very quick pass over that code.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Paul Mackerras <paulus@samba.org>
CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/perf_counter.c |   13 +++----
 arch/x86/kernel/cpu/perf_counter.c |    9 +----
 include/linux/perf_counter.h       |   10 ++++-
 kernel/perf_counter.c              |   63 +++++++++++++++++++++++++++++--------
 4 files changed, 68 insertions(+), 27 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_counter.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_counter.c
@@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct
 		hwc->nmi = 1;
 	}
 
-	hwc->irq_period	= hw_event->irq_period;
-	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
-		hwc->irq_period = x86_pmu.max_period;
-
-	atomic64_set(&hwc->period_left, hwc->irq_period);
+	atomic64_set(&hwc->period_left,
+			min(x86_pmu.max_period, hwc->irq_period));
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->irq_period;
+	s64 period = min(x86_pmu.max_period, hwc->irq_period);
 	int err;
 
 	/*
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -130,7 +130,11 @@ struct perf_counter_hw_event {
 	 */
 	__u64			config;
 
-	__u64			irq_period;
+	union {
+		__u64		irq_period;
+		__u64		irq_freq;
+	};
+
 	__u32			record_type;
 	__u32			read_format;
 
@@ -146,8 +150,9 @@ struct perf_counter_hw_event {
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
 				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 52;
+				__reserved_1   : 51;
 
 	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
@@ -337,6 +342,7 @@ struct hw_perf_counter {
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
+	u64				interrupts;
 #endif
 };
 
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void)
 	return 0;
 }
 
+void perf_adjust_freq(struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+	u64 irq_period;
+	u64 events, period;
+	s64 delta;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+			continue;
+
+		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+			continue;
+
+		events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+		period = div64_u64(events, counter->hw_event.irq_freq);
+
+		delta = (s64)(1 + period - counter->hw.irq_period);
+		delta >>= 1;
+
+		irq_period = counter->hw.irq_period + delta;
+
+		if (!irq_period)
+			irq_period = 1;
+
+		counter->hw.irq_period = irq_period;
+		counter->hw.interrupts = 0;
+	}
+	spin_unlock(&ctx->lock);
+}
+
 /*
  * Round-robin a context's counters:
  */
@@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 	ctx = &curr->perf_counter_ctx;
 
+	perf_adjust_freq(&cpuctx->ctx);
+	perf_adjust_freq(ctx);
+
 	perf_counter_cpu_sched_out(cpuctx);
 	__perf_counter_task_sched_out(ctx);
 
@@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_co
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
 
+	counter->hw.interrupts++;
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * counters
@@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcount
 	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_counter *counter;
 	struct pt_regs *regs;
+	u64 period;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
 	counter->pmu->read(counter);
@@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcount
 			ret = HRTIMER_NORESTART;
 	}
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+	period = max_t(u64, 10000, counter->hw.irq_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
 	return ret;
 }
@@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enabl
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter
 
 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
-	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	const struct pmu *pmu = NULL;
-	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
 		/*
@@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter
 		else
 			pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
@@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter
 		break;
 	}
 
-	if (pmu)
-		hwc->irq_period = hw_event->irq_period;
-
 	return pmu;
 }
 
@@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_h
 {
 	const struct pmu *pmu;
 	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
 	long err;
 
 	counter = kzalloc(sizeof(*counter), gfpflags);
@@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_h
 
 	pmu = NULL;
 
+	hwc = &counter->hw;
+	if (hw_event->freq && hw_event->irq_freq)
+		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+	else
+		hwc->irq_period = hw_event->irq_period;
+
 	/*
 	 * we currently do not support PERF_RECORD_GROUP on inherited counters
 	 */
Index: linux-2.6/arch/powerpc/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/perf_counter.c
+++ linux-2.6/arch/powerpc/kernel/perf_counter.c
@@ -534,7 +534,7 @@ void hw_perf_enable(void)
 			continue;
 		}
 		val = 0;
-		if (counter->hw_event.irq_period) {
+		if (counter->hw.irq_period) {
 			left = atomic64_read(&counter->hw.period_left);
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
@@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(s
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if ((s64)counter->hw_event.irq_period < 0)
-		return ERR_PTR(-EINVAL);
 	if (!perf_event_raw(&counter->hw_event)) {
 		ev = perf_event_id(&counter->hw_event);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
@@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(s
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+	atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(s
 static void record_and_restart(struct perf_counter *counter, long val,
 			       struct pt_regs *regs, int nmi)
 {
+	u64 period = counter->hw.irq_period;
 	s64 prev, delta, left;
 	int record = 0;
 
@@ -948,11 +947,11 @@ static void record_and_restart(struct pe
 	 */
 	val = 0;
 	left = atomic64_read(&counter->hw.period_left) - delta;
-	if (counter->hw_event.irq_period) {
+	if (period) {
 		if (left <= 0) {
-			left += counter->hw_event.irq_period;
+			left += period;
 			if (left <= 0)
-				left = counter->hw_event.irq_period;
+				left = period;
 			record = 1;
 		}
 		if (left < 0x80000000L)

-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 4/4] perf_counter: update perf-top to use the new freq interface
  2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
                   ` (2 preceding siblings ...)
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
@ 2009-05-15 13:19 ` Peter Zijlstra
  2009-05-15 13:33   ` Ingo Molnar
  2009-05-15 14:43   ` [tip:perfcounters/core] perf top: update " tip-bot for Peter Zijlstra
  3 siblings, 2 replies; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf_counter-tool-freq.patch --]
[-- Type: text/plain, Size: 1836 bytes --]

Provide perf top -F as alternative to -c.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Paul Mackerras <paulus@samba.org>
CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
---
 Documentation/perf_counter/builtin-top.c |    6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

Index: linux-2.6/Documentation/perf_counter/builtin-top.c
===================================================================
--- linux-2.6.orig/Documentation/perf_counter/builtin-top.c
+++ linux-2.6/Documentation/perf_counter/builtin-top.c
@@ -98,6 +98,7 @@ static unsigned int		page_size;
 static unsigned int		mmap_pages			=  16;
 static int			use_mmap			= 0;
 static int			use_munmap			= 0;
+static int			freq				= 0;
 
 static char			*vmlinux;
 
@@ -846,9 +847,10 @@ static void process_options(int argc, ch
 			{"stat",	no_argument,		NULL, 'S'},
 			{"vmlinux",	required_argument,	NULL, 'x'},
 			{"zero",	no_argument,		NULL, 'z'},
+			{"freq",	required_argument,	NULL, 'F'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -889,6 +891,7 @@ static void process_options(int argc, ch
 		case 'm': mmap_pages			=   atoi(optarg); break;
 		case 'M': use_mmap			=              1; break;
 		case 'U': use_munmap			=              1; break;
+		case 'F': freq = 1; default_interval	=   atoi(optarg); break;
 		default: error = 1; break;
 		}
 	}
@@ -1075,6 +1078,7 @@ int cmd_top(int argc, char **argv, const
 			hw_event.nmi		= nmi;
 			hw_event.mmap		= use_mmap;
 			hw_event.munmap		= use_munmap;
+			hw_event.freq		= freq;
 
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {

-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] perf_counter: update perf-top to use the new freq interface
  2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
@ 2009-05-15 13:33   ` Ingo Molnar
  2009-05-15 14:43   ` [tip:perfcounters/core] perf top: update " tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: Ingo Molnar @ 2009-05-15 13:33 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Mackerras, Corey Ashford, linux-kernel,
	Arnaldo Carvalho de Melo


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> Provide perf top -F as alternative to -c.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> CC: Paul Mackerras <paulus@samba.org>
> CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
> ---
>  Documentation/perf_counter/builtin-top.c |    6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> Index: linux-2.6/Documentation/perf_counter/builtin-top.c
> ===================================================================
> --- linux-2.6.orig/Documentation/perf_counter/builtin-top.c
> +++ linux-2.6/Documentation/perf_counter/builtin-top.c
> @@ -98,6 +98,7 @@ static unsigned int		page_size;
>  static unsigned int		mmap_pages			=  16;
>  static int			use_mmap			= 0;
>  static int			use_munmap			= 0;
> +static int			freq				= 0;
>  
>  static char			*vmlinux;
>  
> @@ -846,9 +847,10 @@ static void process_options(int argc, ch
>  			{"stat",	no_argument,		NULL, 'S'},
>  			{"vmlinux",	required_argument,	NULL, 'x'},
>  			{"zero",	no_argument,		NULL, 'z'},
> +			{"freq",	required_argument,	NULL, 'F'},
>  			{NULL,		0,			NULL,  0 }
>  		};
> -		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
> +		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:",
>  				    long_options, &option_index);
>  		if (c == -1)
>  			break;
> @@ -889,6 +891,7 @@ static void process_options(int argc, ch
>  		case 'm': mmap_pages			=   atoi(optarg); break;
>  		case 'M': use_mmap			=              1; break;
>  		case 'U': use_munmap			=              1; break;
> +		case 'F': freq = 1; default_interval	=   atoi(optarg); break;
>  		default: error = 1; break;
>  		}
>  	}
> @@ -1075,6 +1078,7 @@ int cmd_top(int argc, char **argv, const
>  			hw_event.nmi		= nmi;
>  			hw_event.mmap		= use_mmap;
>  			hw_event.munmap		= use_munmap;
> +			hw_event.freq		= freq;
>  
>  			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
>  			if (fd[i][counter] < 0) {

this frequency-based profiling is nice. It's a lot more untuitive to 
users than rigid defaults of 'one IRQ per 100,000 cycles'.

So i think perf-top should be changed to have -F enabled by default, 
with a default 10 KHz frequency for all counters.

But for that we need another fix for this: currently the histogram 
is 'number of interrupts' based, which gets skewed with frequency 
based profiling.

A correct sorting key would be a normalized histogram, along 'number 
of hardware events', which could be measured as deltas between 
interrupts, like this:

	counter_val: 1200000       [ IRQ ] -> { 1200000, RIP-1 }
                 .
                 .
                 .
        counter_val: 1250000       [ IRQ ] -> { 1250000, RIP-2 }
                 .
                 .
                 .
        counter_val: 1260000       [ IRQ ] -> { 1260000, RIP-3 }

look at how the delta between the first and the second IRQ was 50000 
cycles, while the delta between the second and third IRQ was just 
10000 cycles - because the frequency adjustment code shortened the 
period.

So in the histogram, RIP-2 should get 50,000 cycles, and RIP-3 
should get 10,000 cycles.

With the current scheme both would get +1 event credited - which is 
wrong.

Agreed?

	Ingo

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] perf_counter: frequency based adaptive irq_period
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
@ 2009-05-15 13:35   ` Ingo Molnar
  2009-05-15 13:37   ` Peter Zijlstra
  2009-05-15 14:43   ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 13+ messages in thread
From: Ingo Molnar @ 2009-05-15 13:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Mackerras, Corey Ashford, linux-kernel, Thomas Gleixner,
	Arnaldo Carvalho de Melo


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> @@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_h
>  
>  	pmu = NULL;
>  
> +	hwc = &counter->hw;
> +	if (hw_event->freq && hw_event->irq_freq)
> +		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;

this needs to use a 64-bit do_div method too, otherwise we get this 
on 32-bit:

 kernel/built-in.o: In function `perf_counter_alloc':
 perf_counter.c:(.text+0x7ddc7): undefined reference to `__udivdi3'

	Ingo

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] perf_counter: frequency based adaptive irq_period
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
  2009-05-15 13:35   ` Ingo Molnar
@ 2009-05-15 13:37   ` Peter Zijlstra
  2009-05-15 14:43     ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period, 32-bit fix tip-bot for Peter Zijlstra
  2009-05-15 14:43   ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period tip-bot for Peter Zijlstra
  2 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:37 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel

On Fri, 2009-05-15 at 15:19 +0200, Peter Zijlstra wrote:
> plain text document attachment (perf_counter-freq.patch)
> Instead of specifying the irq_period for a counter, provide a target interrupt
> frequency and dynamically adapt the irq_period to match this frequency.
> 
> Paul, please verify ppc64, I only did a very quick pass over that code.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> CC: Paul Mackerras <paulus@samba.org>
> CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
> ---

> @@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_h
>  
>  	pmu = NULL;
>  
> +	hwc = &counter->hw;
> +	if (hw_event->freq && hw_event->irq_freq)
> +		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
> +	else
> +		hwc->irq_period = hw_event->irq_period;
> +



kernel/built-in.o: In function `perf_counter_alloc':
perf_counter.c:(.text+0x7ddc7): undefined reference to `__udivdi3'

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---

Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -2942,7 +2942,7 @@ perf_counter_alloc(struct perf_counter_h
 
 	hwc = &counter->hw;
 	if (hw_event->freq && hw_event->irq_freq)
-		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+		hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq);
 	else
 		hwc->irq_period = hw_event->irq_period;
 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf_counter: remove perf_disable/enable exports
  2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
@ 2009-05-15 14:43   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  548e1ddf255b4ebfb4ef20c08936fd8d4deb3bd9
Gitweb:     http://git.kernel.org/tip/548e1ddf255b4ebfb4ef20c08936fd8d4deb3bd9
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:19:26 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:26:55 +0200

perf_counter: remove perf_disable/enable exports

Now that ACPI idle doesn't use it anymore, remove the exports.

[ Impact: remove dead code/data ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.429826617@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 kernel/perf_counter.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e814ff0..0173738 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -90,14 +90,12 @@ void perf_disable(void)
 	__perf_disable();
 	hw_perf_disable();
 }
-EXPORT_SYMBOL_GPL(perf_disable); /* ACPI idle */
 
 void perf_enable(void)
 {
 	if (__perf_enable())
 		hw_perf_enable();
 }
-EXPORT_SYMBOL_GPL(perf_enable); /* ACPI idle */
 
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf_counter: per user mlock gift
  2009-05-15 13:19 ` [PATCH 2/4] perf_counter: per user mlock gift Peter Zijlstra
@ 2009-05-15 14:43   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  789f90fcf6b0b54e655740e9396c954378542c79
Gitweb:     http://git.kernel.org/tip/789f90fcf6b0b54e655740e9396c954378542c79
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:19:27 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:26:56 +0200

perf_counter: per user mlock gift

Instead of a per-process mlock gift for perf-counters, use a
per-user gift so that there is less of a DoS potential.

[ Impact: allow less worst-case unprivileged memory consumption ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.496182835@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 include/linux/sched.h |    4 ++++
 kernel/perf_counter.c |   22 +++++++++++++++-------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d185758..ff59d12 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -674,6 +674,10 @@ struct user_struct {
 	struct work_struct work;
 #endif
 #endif
+
+#ifdef CONFIG_PERF_COUNTERS
+	atomic_long_t locked_vm;
+#endif
 };
 
 extern int uids_sysfs_init(void);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0173738..93f4a0e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -45,7 +45,7 @@ static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
-int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
+int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1522,6 +1522,9 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= counter->data->nr_locked;
 		perf_mmap_data_free(counter);
 		mutex_unlock(&counter->mmap_mutex);
@@ -1537,11 +1540,13 @@ static struct vm_operations_struct perf_mmap_vmops = {
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
+	struct user_struct *user = current_user();
 	unsigned long vma_size;
 	unsigned long nr_pages;
+	unsigned long user_locked, user_lock_limit;
 	unsigned long locked, lock_limit;
+	long user_extra, extra;
 	int ret = 0;
-	long extra;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
@@ -1569,15 +1574,17 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 	}
 
-	extra = nr_pages /* + 1 only account the data pages */;
-	extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
-	if (extra < 0)
-		extra = 0;
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
 
-	locked = vma->vm_mm->locked_vm + extra;
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
 
 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 		ret = -EPERM;
@@ -1590,6 +1597,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 
 	atomic_set(&counter->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
 	vma->vm_mm->locked_vm += extra;
 	counter->data->nr_locked = extra;
 unlock:

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
  2009-05-15 13:35   ` Ingo Molnar
  2009-05-15 13:37   ` Peter Zijlstra
@ 2009-05-15 14:43   ` tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  60db5e09c13109b13830cc9dcae688003fd39e79
Gitweb:     http://git.kernel.org/tip/60db5e09c13109b13830cc9dcae688003fd39e79
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:19:28 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:26:56 +0200

perf_counter: frequency based adaptive irq_period

Instead of specifying the irq_period for a counter, provide a target interrupt
frequency and dynamically adapt the irq_period to match this frequency.

[ Impact: new perf-counter attribute/feature ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.646195868@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 arch/powerpc/kernel/perf_counter.c |   13 +++----
 arch/x86/kernel/cpu/perf_counter.c |    9 ++---
 include/linux/perf_counter.h       |   10 ++++-
 kernel/perf_counter.c              |   63 +++++++++++++++++++++++++++++-------
 4 files changed, 68 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bb1b463..db8d5ca 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -534,7 +534,7 @@ void hw_perf_enable(void)
 			continue;
 		}
 		val = 0;
-		if (counter->hw_event.irq_period) {
+		if (counter->hw.irq_period) {
 			left = atomic64_read(&counter->hw.period_left);
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
@@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if ((s64)counter->hw_event.irq_period < 0)
-		return ERR_PTR(-EINVAL);
 	if (!perf_event_raw(&counter->hw_event)) {
 		ev = perf_event_id(&counter->hw_event);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
@@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+	atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 static void record_and_restart(struct perf_counter *counter, long val,
 			       struct pt_regs *regs, int nmi)
 {
+	u64 period = counter->hw.irq_period;
 	s64 prev, delta, left;
 	int record = 0;
 
@@ -948,11 +947,11 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 */
 	val = 0;
 	left = atomic64_read(&counter->hw.period_left) - delta;
-	if (counter->hw_event.irq_period) {
+	if (period) {
 		if (left <= 0) {
-			left += counter->hw_event.irq_period;
+			left += period;
 			if (left <= 0)
-				left = counter->hw_event.irq_period;
+				left = period;
 			record = 1;
 		}
 		if (left < 0x80000000L)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5a7f718..886dcf3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->nmi = 1;
 	}
 
-	hwc->irq_period	= hw_event->irq_period;
-	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
-		hwc->irq_period = x86_pmu.max_period;
-
-	atomic64_set(&hwc->period_left, hwc->irq_period);
+	atomic64_set(&hwc->period_left,
+			min(x86_pmu.max_period, hwc->irq_period));
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->irq_period;
+	s64 period = min(x86_pmu.max_period, hwc->irq_period);
 	int err;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e543ecc..004b6e1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -130,7 +130,11 @@ struct perf_counter_hw_event {
 	 */
 	__u64			config;
 
-	__u64			irq_period;
+	union {
+		__u64		irq_period;
+		__u64		irq_freq;
+	};
+
 	__u32			record_type;
 	__u32			read_format;
 
@@ -146,8 +150,9 @@ struct perf_counter_hw_event {
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
 				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 52;
+				__reserved_1   : 51;
 
 	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
@@ -337,6 +342,7 @@ struct hw_perf_counter {
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
+	u64				interrupts;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 93f4a0e..0ad1db4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void)
 	return 0;
 }
 
+void perf_adjust_freq(struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+	u64 irq_period;
+	u64 events, period;
+	s64 delta;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+			continue;
+
+		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+			continue;
+
+		events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+		period = div64_u64(events, counter->hw_event.irq_freq);
+
+		delta = (s64)(1 + period - counter->hw.irq_period);
+		delta >>= 1;
+
+		irq_period = counter->hw.irq_period + delta;
+
+		if (!irq_period)
+			irq_period = 1;
+
+		counter->hw.irq_period = irq_period;
+		counter->hw.interrupts = 0;
+	}
+	spin_unlock(&ctx->lock);
+}
+
 /*
  * Round-robin a context's counters:
  */
@@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 	ctx = &curr->perf_counter_ctx;
 
+	perf_adjust_freq(&cpuctx->ctx);
+	perf_adjust_freq(ctx);
+
 	perf_counter_cpu_sched_out(cpuctx);
 	__perf_counter_task_sched_out(ctx);
 
@@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_counter *counter,
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
 
+	counter->hw.interrupts++;
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * counters
@@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_counter *counter;
 	struct pt_regs *regs;
+	u64 period;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
 	counter->pmu->read(counter);
@@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 			ret = HRTIMER_NORESTART;
 	}
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+	period = max_t(u64, 10000, counter->hw.irq_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
 	return ret;
 }
@@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 
 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
-	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	const struct pmu *pmu = NULL;
-	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
 		/*
@@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 		else
 			pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
@@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 		break;
 	}
 
-	if (pmu)
-		hwc->irq_period = hw_event->irq_period;
-
 	return pmu;
 }
 
@@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 {
 	const struct pmu *pmu;
 	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
 	long err;
 
 	counter = kzalloc(sizeof(*counter), gfpflags);
@@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	pmu = NULL;
 
+	hwc = &counter->hw;
+	if (hw_event->freq && hw_event->irq_freq)
+		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+	else
+		hwc->irq_period = hw_event->irq_period;
+
 	/*
 	 * we currently do not support PERF_RECORD_GROUP on inherited counters
 	 */

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf top: update to use the new freq interface
  2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
  2009-05-15 13:33   ` Ingo Molnar
@ 2009-05-15 14:43   ` tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  f5456a6b056b709282e87a68b4c1b81ac2e866fa
Gitweb:     http://git.kernel.org/tip/f5456a6b056b709282e87a68b4c1b81ac2e866fa
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:19:29 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:26:57 +0200

perf top: update to use the new freq interface

Provide perf top -F as alternative to -c.

[ Impact: new 'perf top' feature ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.707922166@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 Documentation/perf_counter/builtin-top.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index b1549dd..814b2e4 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -98,6 +98,7 @@ static unsigned int		page_size;
 static unsigned int		mmap_pages			=  16;
 static int			use_mmap			= 0;
 static int			use_munmap			= 0;
+static int			freq				= 0;
 
 static char			*vmlinux;
 
@@ -846,9 +847,10 @@ static void process_options(int argc, char **argv)
 			{"stat",	no_argument,		NULL, 'S'},
 			{"vmlinux",	required_argument,	NULL, 'x'},
 			{"zero",	no_argument,		NULL, 'z'},
+			{"freq",	required_argument,	NULL, 'F'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -889,6 +891,7 @@ static void process_options(int argc, char **argv)
 		case 'm': mmap_pages			=   atoi(optarg); break;
 		case 'M': use_mmap			=              1; break;
 		case 'U': use_munmap			=              1; break;
+		case 'F': freq = 1; default_interval	=   atoi(optarg); break;
 		default: error = 1; break;
 		}
 	}
@@ -1075,6 +1078,7 @@ int cmd_top(int argc, char **argv, const char *prefix)
 			hw_event.nmi		= nmi;
 			hw_event.mmap		= use_mmap;
 			hw_event.munmap		= use_munmap;
+			hw_event.freq		= freq;
 
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period, 32-bit fix
  2009-05-15 13:37   ` Peter Zijlstra
@ 2009-05-15 14:43     ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  2e569d36729c8105ae066a9b105068305442cc77
Gitweb:     http://git.kernel.org/tip/2e569d36729c8105ae066a9b105068305442cc77
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:37:47 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:40:25 +0200

perf_counter: frequency based adaptive irq_period, 32-bit fix

fix:

  kernel/built-in.o: In function `perf_counter_alloc':
  perf_counter.c:(.text+0x7ddc7): undefined reference to `__udivdi3'

[ Impact: build fix on 32-bit systems ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <1242394667.6642.1887.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 kernel/perf_counter.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0ad1db4..728a595 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2942,7 +2942,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	hwc = &counter->hw;
 	if (hw_event->freq && hw_event->irq_freq)
-		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+		hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq);
 	else
 		hwc->irq_period = hw_event->irq_period;
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2009-05-15 14:45 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
2009-05-15 14:43   ` [tip:perfcounters/core] " tip-bot for Peter Zijlstra
2009-05-15 13:19 ` [PATCH 2/4] perf_counter: per user mlock gift Peter Zijlstra
2009-05-15 14:43   ` [tip:perfcounters/core] " tip-bot for Peter Zijlstra
2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
2009-05-15 13:35   ` Ingo Molnar
2009-05-15 13:37   ` Peter Zijlstra
2009-05-15 14:43     ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period, 32-bit fix tip-bot for Peter Zijlstra
2009-05-15 14:43   ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period tip-bot for Peter Zijlstra
2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
2009-05-15 13:33   ` Ingo Molnar
2009-05-15 14:43   ` [tip:perfcounters/core] perf top: update " tip-bot for Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox