All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] perf-counter bits
@ 2009-05-15 13:19 Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

pending perf counter bits
-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/4] perf_counter: remove perf_disable/enable exports
  2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
@ 2009-05-15 13:19 ` Peter Zijlstra
  2009-05-15 14:43   ` [tip:perfcounters/core] " tip-bot for Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 2/4] perf_counter: per user mlock gift Peter Zijlstra
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf_counter-remove-export.patch --]
[-- Type: text/plain, Size: 825 bytes --]

Now that ACPI idle doesn't use it anymore, remove the exports.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Paul Mackerras <paulus@samba.org>
CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
---
 kernel/perf_counter.c |    2 --
 1 file changed, 2 deletions(-)

Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -90,14 +90,12 @@ void perf_disable(void)
 	__perf_disable();
 	hw_perf_disable();
 }
-EXPORT_SYMBOL_GPL(perf_disable); /* ACPI idle */
 
 void perf_enable(void)
 {
 	if (__perf_enable())
 		hw_perf_enable();
 }
-EXPORT_SYMBOL_GPL(perf_enable); /* ACPI idle */
 
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)

-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 2/4] perf_counter: per user mlock gift
  2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
@ 2009-05-15 13:19 ` Peter Zijlstra
  2009-05-15 14:43   ` [tip:perfcounters/core] " tip-bot for Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
  3 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf_counter-mlock.patch --]
[-- Type: text/plain, Size: 3284 bytes --]

Instead of a per-process mlock gift for perf-counters, use a per-user gift
so that there is less of a DoS potential.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Paul Mackerras <paulus@samba.org>
CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
---
 include/linux/sched.h |    4 ++++
 kernel/perf_counter.c |   24 ++++++++++++++++--------
 2 files changed, 20 insertions(+), 8 deletions(-)

Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -45,7 +45,7 @@ static atomic_t nr_munmap_tracking __rea
 static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
-int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
+int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1522,6 +1522,9 @@ static void perf_mmap_close(struct vm_ar
 
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= counter->data->nr_locked;
 		perf_mmap_data_free(counter);
 		mutex_unlock(&counter->mmap_mutex);
@@ -1537,11 +1540,13 @@ static struct vm_operations_struct perf_
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
+	struct user_struct *user = current_user();
 	unsigned long vma_size;
 	unsigned long nr_pages;
+	unsigned long user_locked, user_lock_limit;
 	unsigned long locked, lock_limit;
+	long user_extra, extra;
 	int ret = 0;
-	long extra;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
@@ -1569,15 +1574,17 @@ static int perf_mmap(struct file *file, 
 		goto unlock;
 	}
 
-	extra = nr_pages /* + 1 only account the data pages */;
-	extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
-	if (extra < 0)
-		extra = 0;
-
-	locked = vma->vm_mm->locked_vm + extra;
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
+
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
 
 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 		ret = -EPERM;
@@ -1590,6 +1597,7 @@ static int perf_mmap(struct file *file, 
 		goto unlock;
 
 	atomic_set(&counter->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
 	vma->vm_mm->locked_vm += extra;
 	counter->data->nr_locked = extra;
 unlock:
Index: linux-2.6/include/linux/sched.h
===================================================================
--- linux-2.6.orig/include/linux/sched.h
+++ linux-2.6/include/linux/sched.h
@@ -675,6 +675,10 @@ struct user_struct {
 	struct work_struct work;
 #endif
 #endif
+
+#ifdef CONFIG_PERF_COUNTERS
+	atomic_long_t locked_vm;
+#endif
 };
 
 extern int uids_sysfs_init(void);

-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3/4] perf_counter: frequency based adaptive irq_period
  2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
  2009-05-15 13:19 ` [PATCH 2/4] perf_counter: per user mlock gift Peter Zijlstra
@ 2009-05-15 13:19 ` Peter Zijlstra
  2009-05-15 13:35   ` Ingo Molnar
                     ` (2 more replies)
  2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
  3 siblings, 3 replies; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf_counter-freq.patch --]
[-- Type: text/plain, Size: 8951 bytes --]

Instead of specifying the irq_period for a counter, provide a target interrupt
frequency and dynamically adapt the irq_period to match this frequency.

Paul, please verify ppc64, I only did a very quick pass over that code.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Paul Mackerras <paulus@samba.org>
CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/perf_counter.c |   13 +++----
 arch/x86/kernel/cpu/perf_counter.c |    9 +----
 include/linux/perf_counter.h       |   10 ++++-
 kernel/perf_counter.c              |   63 +++++++++++++++++++++++++++++--------
 4 files changed, 68 insertions(+), 27 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_counter.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_counter.c
@@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct
 		hwc->nmi = 1;
 	}
 
-	hwc->irq_period	= hw_event->irq_period;
-	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
-		hwc->irq_period = x86_pmu.max_period;
-
-	atomic64_set(&hwc->period_left, hwc->irq_period);
+	atomic64_set(&hwc->period_left,
+			min(x86_pmu.max_period, hwc->irq_period));
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->irq_period;
+	s64 period = min(x86_pmu.max_period, hwc->irq_period);
 	int err;
 
 	/*
Index: linux-2.6/include/linux/perf_counter.h
===================================================================
--- linux-2.6.orig/include/linux/perf_counter.h
+++ linux-2.6/include/linux/perf_counter.h
@@ -130,7 +130,11 @@ struct perf_counter_hw_event {
 	 */
 	__u64			config;
 
-	__u64			irq_period;
+	union {
+		__u64		irq_period;
+		__u64		irq_freq;
+	};
+
 	__u32			record_type;
 	__u32			read_format;
 
@@ -146,8 +150,9 @@ struct perf_counter_hw_event {
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
 				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 52;
+				__reserved_1   : 51;
 
 	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
@@ -337,6 +342,7 @@ struct hw_perf_counter {
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
+	u64				interrupts;
 #endif
 };
 
Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void)
 	return 0;
 }
 
+void perf_adjust_freq(struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+	u64 irq_period;
+	u64 events, period;
+	s64 delta;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+			continue;
+
+		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+			continue;
+
+		events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+		period = div64_u64(events, counter->hw_event.irq_freq);
+
+		delta = (s64)(1 + period - counter->hw.irq_period);
+		delta >>= 1;
+
+		irq_period = counter->hw.irq_period + delta;
+
+		if (!irq_period)
+			irq_period = 1;
+
+		counter->hw.irq_period = irq_period;
+		counter->hw.interrupts = 0;
+	}
+	spin_unlock(&ctx->lock);
+}
+
 /*
  * Round-robin a context's counters:
  */
@@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 	ctx = &curr->perf_counter_ctx;
 
+	perf_adjust_freq(&cpuctx->ctx);
+	perf_adjust_freq(ctx);
+
 	perf_counter_cpu_sched_out(cpuctx);
 	__perf_counter_task_sched_out(ctx);
 
@@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_co
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
 
+	counter->hw.interrupts++;
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * counters
@@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcount
 	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_counter *counter;
 	struct pt_regs *regs;
+	u64 period;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
 	counter->pmu->read(counter);
@@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcount
 			ret = HRTIMER_NORESTART;
 	}
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+	period = max_t(u64, 10000, counter->hw.irq_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
 	return ret;
 }
@@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enabl
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter
 
 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
-	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	const struct pmu *pmu = NULL;
-	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
 		/*
@@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter
 		else
 			pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
@@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter
 		break;
 	}
 
-	if (pmu)
-		hwc->irq_period = hw_event->irq_period;
-
 	return pmu;
 }
 
@@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_h
 {
 	const struct pmu *pmu;
 	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
 	long err;
 
 	counter = kzalloc(sizeof(*counter), gfpflags);
@@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_h
 
 	pmu = NULL;
 
+	hwc = &counter->hw;
+	if (hw_event->freq && hw_event->irq_freq)
+		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+	else
+		hwc->irq_period = hw_event->irq_period;
+
 	/*
 	 * we currently do not support PERF_RECORD_GROUP on inherited counters
 	 */
Index: linux-2.6/arch/powerpc/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/arch/powerpc/kernel/perf_counter.c
+++ linux-2.6/arch/powerpc/kernel/perf_counter.c
@@ -534,7 +534,7 @@ void hw_perf_enable(void)
 			continue;
 		}
 		val = 0;
-		if (counter->hw_event.irq_period) {
+		if (counter->hw.irq_period) {
 			left = atomic64_read(&counter->hw.period_left);
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
@@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(s
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if ((s64)counter->hw_event.irq_period < 0)
-		return ERR_PTR(-EINVAL);
 	if (!perf_event_raw(&counter->hw_event)) {
 		ev = perf_event_id(&counter->hw_event);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
@@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(s
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+	atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(s
 static void record_and_restart(struct perf_counter *counter, long val,
 			       struct pt_regs *regs, int nmi)
 {
+	u64 period = counter->hw.irq_period;
 	s64 prev, delta, left;
 	int record = 0;
 
@@ -948,11 +947,11 @@ static void record_and_restart(struct pe
 	 */
 	val = 0;
 	left = atomic64_read(&counter->hw.period_left) - delta;
-	if (counter->hw_event.irq_period) {
+	if (period) {
 		if (left <= 0) {
-			left += counter->hw_event.irq_period;
+			left += period;
 			if (left <= 0)
-				left = counter->hw_event.irq_period;
+				left = period;
 			record = 1;
 		}
 		if (left < 0x80000000L)

-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 4/4] perf_counter: update perf-top to use the new freq interface
  2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
                   ` (2 preceding siblings ...)
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
@ 2009-05-15 13:19 ` Peter Zijlstra
  2009-05-15 13:33   ` Ingo Molnar
  2009-05-15 14:43   ` [tip:perfcounters/core] perf top: update " tip-bot for Peter Zijlstra
  3 siblings, 2 replies; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:19 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel, Peter Zijlstra

[-- Attachment #1: perf_counter-tool-freq.patch --]
[-- Type: text/plain, Size: 1836 bytes --]

Provide perf top -F as alternative to -c.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Paul Mackerras <paulus@samba.org>
CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
---
 Documentation/perf_counter/builtin-top.c |    6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

Index: linux-2.6/Documentation/perf_counter/builtin-top.c
===================================================================
--- linux-2.6.orig/Documentation/perf_counter/builtin-top.c
+++ linux-2.6/Documentation/perf_counter/builtin-top.c
@@ -98,6 +98,7 @@ static unsigned int		page_size;
 static unsigned int		mmap_pages			=  16;
 static int			use_mmap			= 0;
 static int			use_munmap			= 0;
+static int			freq				= 0;
 
 static char			*vmlinux;
 
@@ -846,9 +847,10 @@ static void process_options(int argc, ch
 			{"stat",	no_argument,		NULL, 'S'},
 			{"vmlinux",	required_argument,	NULL, 'x'},
 			{"zero",	no_argument,		NULL, 'z'},
+			{"freq",	required_argument,	NULL, 'F'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -889,6 +891,7 @@ static void process_options(int argc, ch
 		case 'm': mmap_pages			=   atoi(optarg); break;
 		case 'M': use_mmap			=              1; break;
 		case 'U': use_munmap			=              1; break;
+		case 'F': freq = 1; default_interval	=   atoi(optarg); break;
 		default: error = 1; break;
 		}
 	}
@@ -1075,6 +1078,7 @@ int cmd_top(int argc, char **argv, const
 			hw_event.nmi		= nmi;
 			hw_event.mmap		= use_mmap;
 			hw_event.munmap		= use_munmap;
+			hw_event.freq		= freq;
 
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {

-- 


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 4/4] perf_counter: update perf-top to use the new freq interface
  2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
@ 2009-05-15 13:33   ` Ingo Molnar
  2009-05-15 14:43   ` [tip:perfcounters/core] perf top: update " tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: Ingo Molnar @ 2009-05-15 13:33 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Mackerras, Corey Ashford, linux-kernel,
	Arnaldo Carvalho de Melo


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> Provide perf top -F as alternative to -c.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> CC: Paul Mackerras <paulus@samba.org>
> CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
> ---
>  Documentation/perf_counter/builtin-top.c |    6 +++++-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> Index: linux-2.6/Documentation/perf_counter/builtin-top.c
> ===================================================================
> --- linux-2.6.orig/Documentation/perf_counter/builtin-top.c
> +++ linux-2.6/Documentation/perf_counter/builtin-top.c
> @@ -98,6 +98,7 @@ static unsigned int		page_size;
>  static unsigned int		mmap_pages			=  16;
>  static int			use_mmap			= 0;
>  static int			use_munmap			= 0;
> +static int			freq				= 0;
>  
>  static char			*vmlinux;
>  
> @@ -846,9 +847,10 @@ static void process_options(int argc, ch
>  			{"stat",	no_argument,		NULL, 'S'},
>  			{"vmlinux",	required_argument,	NULL, 'x'},
>  			{"zero",	no_argument,		NULL, 'z'},
> +			{"freq",	required_argument,	NULL, 'F'},
>  			{NULL,		0,			NULL,  0 }
>  		};
> -		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
> +		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:",
>  				    long_options, &option_index);
>  		if (c == -1)
>  			break;
> @@ -889,6 +891,7 @@ static void process_options(int argc, ch
>  		case 'm': mmap_pages			=   atoi(optarg); break;
>  		case 'M': use_mmap			=              1; break;
>  		case 'U': use_munmap			=              1; break;
> +		case 'F': freq = 1; default_interval	=   atoi(optarg); break;
>  		default: error = 1; break;
>  		}
>  	}
> @@ -1075,6 +1078,7 @@ int cmd_top(int argc, char **argv, const
>  			hw_event.nmi		= nmi;
>  			hw_event.mmap		= use_mmap;
>  			hw_event.munmap		= use_munmap;
> +			hw_event.freq		= freq;
>  
>  			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
>  			if (fd[i][counter] < 0) {

this frequency-based profiling is nice. It's a lot more untuitive to 
users than rigid defaults of 'one IRQ per 100,000 cycles'.

So i think perf-top should be changed to have -F enabled by default, 
with a default 10 KHz frequency for all counters.

But for that we need another fix for this: currently the histogram 
is 'number of interrupts' based, which gets skewed with frequency 
based profiling.

A correct sorting key would be a normalized histogram, along 'number 
of hardware events', which could be measured as deltas between 
interrupts, like this:

	counter_val: 1200000       [ IRQ ] -> { 1200000, RIP-1 }
                 .
                 .
                 .
        counter_val: 1250000       [ IRQ ] -> { 1250000, RIP-2 }
                 .
                 .
                 .
        counter_val: 1260000       [ IRQ ] -> { 1260000, RIP-3 }

look at how the delta between the first and the second IRQ was 50000 
cycles, while the delta between the second and third IRQ was just 
10000 cycles - because the frequency adjustment code shortened the 
period.

So in the histogram, RIP-2 should get 50,000 cycles, and RIP-3 
should get 10,000 cycles.

With the current scheme both would get +1 event credited - which is 
wrong.

Agreed?

	Ingo

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] perf_counter: frequency based adaptive irq_period
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
@ 2009-05-15 13:35   ` Ingo Molnar
  2009-05-15 13:37   ` Peter Zijlstra
  2009-05-15 14:43   ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 13+ messages in thread
From: Ingo Molnar @ 2009-05-15 13:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Paul Mackerras, Corey Ashford, linux-kernel, Thomas Gleixner,
	Arnaldo Carvalho de Melo


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> @@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_h
>  
>  	pmu = NULL;
>  
> +	hwc = &counter->hw;
> +	if (hw_event->freq && hw_event->irq_freq)
> +		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;

this needs to use a 64-bit do_div method too, otherwise we get this 
on 32-bit:

 kernel/built-in.o: In function `perf_counter_alloc':
 perf_counter.c:(.text+0x7ddc7): undefined reference to `__udivdi3'

	Ingo

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/4] perf_counter: frequency based adaptive irq_period
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
  2009-05-15 13:35   ` Ingo Molnar
@ 2009-05-15 13:37   ` Peter Zijlstra
  2009-05-15 14:43     ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period, 32-bit fix tip-bot for Peter Zijlstra
  2009-05-15 14:43   ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period tip-bot for Peter Zijlstra
  2 siblings, 1 reply; 13+ messages in thread
From: Peter Zijlstra @ 2009-05-15 13:37 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Paul Mackerras, Corey Ashford, linux-kernel

On Fri, 2009-05-15 at 15:19 +0200, Peter Zijlstra wrote:
> plain text document attachment (perf_counter-freq.patch)
> Instead of specifying the irq_period for a counter, provide a target interrupt
> frequency and dynamically adapt the irq_period to match this frequency.
> 
> Paul, please verify ppc64, I only did a very quick pass over that code.
> 
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> CC: Paul Mackerras <paulus@samba.org>
> CC: Corey Ashford <cjashfor@linux.vnet.ibm.com>
> ---

> @@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_h
>  
>  	pmu = NULL;
>  
> +	hwc = &counter->hw;
> +	if (hw_event->freq && hw_event->irq_freq)
> +		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
> +	else
> +		hwc->irq_period = hw_event->irq_period;
> +



kernel/built-in.o: In function `perf_counter_alloc':
perf_counter.c:(.text+0x7ddc7): undefined reference to `__udivdi3'

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---

Index: linux-2.6/kernel/perf_counter.c
===================================================================
--- linux-2.6.orig/kernel/perf_counter.c
+++ linux-2.6/kernel/perf_counter.c
@@ -2942,7 +2942,7 @@ perf_counter_alloc(struct perf_counter_h
 
 	hwc = &counter->hw;
 	if (hw_event->freq && hw_event->irq_freq)
-		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+		hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq);
 	else
 		hwc->irq_period = hw_event->irq_period;
 



^ permalink raw reply	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf_counter: remove perf_disable/enable exports
  2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
@ 2009-05-15 14:43   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  548e1ddf255b4ebfb4ef20c08936fd8d4deb3bd9
Gitweb:     http://git.kernel.org/tip/548e1ddf255b4ebfb4ef20c08936fd8d4deb3bd9
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:19:26 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:26:55 +0200

perf_counter: remove perf_disable/enable exports

Now that ACPI idle doesn't use it anymore, remove the exports.

[ Impact: remove dead code/data ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.429826617@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 kernel/perf_counter.c |    2 --
 1 files changed, 0 insertions(+), 2 deletions(-)

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index e814ff0..0173738 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -90,14 +90,12 @@ void perf_disable(void)
 	__perf_disable();
 	hw_perf_disable();
 }
-EXPORT_SYMBOL_GPL(perf_disable); /* ACPI idle */
 
 void perf_enable(void)
 {
 	if (__perf_enable())
 		hw_perf_enable();
 }
-EXPORT_SYMBOL_GPL(perf_enable); /* ACPI idle */
 
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf_counter: per user mlock gift
  2009-05-15 13:19 ` [PATCH 2/4] perf_counter: per user mlock gift Peter Zijlstra
@ 2009-05-15 14:43   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  789f90fcf6b0b54e655740e9396c954378542c79
Gitweb:     http://git.kernel.org/tip/789f90fcf6b0b54e655740e9396c954378542c79
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:19:27 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:26:56 +0200

perf_counter: per user mlock gift

Instead of a per-process mlock gift for perf-counters, use a
per-user gift so that there is less of a DoS potential.

[ Impact: allow less worst-case unprivileged memory consumption ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.496182835@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 include/linux/sched.h |    4 ++++
 kernel/perf_counter.c |   22 +++++++++++++++-------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d185758..ff59d12 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -674,6 +674,10 @@ struct user_struct {
 	struct work_struct work;
 #endif
 #endif
+
+#ifdef CONFIG_PERF_COUNTERS
+	atomic_long_t locked_vm;
+#endif
 };
 
 extern int uids_sysfs_init(void);
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0173738..93f4a0e 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -45,7 +45,7 @@ static atomic_t nr_munmap_tracking __read_mostly;
 static atomic_t nr_comm_tracking __read_mostly;
 
 int sysctl_perf_counter_priv __read_mostly; /* do we need to be privileged */
-int sysctl_perf_counter_mlock __read_mostly = 128; /* 'free' kb per counter */
+int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */
 
 /*
  * Lock for (sysadmin-configurable) counter reservations:
@@ -1522,6 +1522,9 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 
 	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
 				      &counter->mmap_mutex)) {
+		struct user_struct *user = current_user();
+
+		atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm);
 		vma->vm_mm->locked_vm -= counter->data->nr_locked;
 		perf_mmap_data_free(counter);
 		mutex_unlock(&counter->mmap_mutex);
@@ -1537,11 +1540,13 @@ static struct vm_operations_struct perf_mmap_vmops = {
 static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	struct perf_counter *counter = file->private_data;
+	struct user_struct *user = current_user();
 	unsigned long vma_size;
 	unsigned long nr_pages;
+	unsigned long user_locked, user_lock_limit;
 	unsigned long locked, lock_limit;
+	long user_extra, extra;
 	int ret = 0;
-	long extra;
 
 	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
 		return -EINVAL;
@@ -1569,15 +1574,17 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 	}
 
-	extra = nr_pages /* + 1 only account the data pages */;
-	extra -= sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
-	if (extra < 0)
-		extra = 0;
+	user_extra = nr_pages + 1;
+	user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10);
+	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
 
-	locked = vma->vm_mm->locked_vm + extra;
+	extra = 0;
+	if (user_locked > user_lock_limit)
+		extra = user_locked - user_lock_limit;
 
 	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
 	lock_limit >>= PAGE_SHIFT;
+	locked = vma->vm_mm->locked_vm + extra;
 
 	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
 		ret = -EPERM;
@@ -1590,6 +1597,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		goto unlock;
 
 	atomic_set(&counter->mmap_count, 1);
+	atomic_long_add(user_extra, &user->locked_vm);
 	vma->vm_mm->locked_vm += extra;
 	counter->data->nr_locked = extra;
 unlock:

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period
  2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
  2009-05-15 13:35   ` Ingo Molnar
  2009-05-15 13:37   ` Peter Zijlstra
@ 2009-05-15 14:43   ` tip-bot for Peter Zijlstra
  2 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  60db5e09c13109b13830cc9dcae688003fd39e79
Gitweb:     http://git.kernel.org/tip/60db5e09c13109b13830cc9dcae688003fd39e79
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:19:28 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:26:56 +0200

perf_counter: frequency based adaptive irq_period

Instead of specifying the irq_period for a counter, provide a target interrupt
frequency and dynamically adapt the irq_period to match this frequency.

[ Impact: new perf-counter attribute/feature ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.646195868@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 arch/powerpc/kernel/perf_counter.c |   13 +++----
 arch/x86/kernel/cpu/perf_counter.c |    9 ++---
 include/linux/perf_counter.h       |   10 ++++-
 kernel/perf_counter.c              |   63 +++++++++++++++++++++++++++++-------
 4 files changed, 68 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index bb1b463..db8d5ca 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -534,7 +534,7 @@ void hw_perf_enable(void)
 			continue;
 		}
 		val = 0;
-		if (counter->hw_event.irq_period) {
+		if (counter->hw.irq_period) {
 			left = atomic64_read(&counter->hw.period_left);
 			if (left < 0x80000000L)
 				val = 0x80000000L - left;
@@ -829,8 +829,6 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	if (!ppmu)
 		return ERR_PTR(-ENXIO);
-	if ((s64)counter->hw_event.irq_period < 0)
-		return ERR_PTR(-EINVAL);
 	if (!perf_event_raw(&counter->hw_event)) {
 		ev = perf_event_id(&counter->hw_event);
 		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
@@ -901,7 +899,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 
 	counter->hw.config = events[n];
 	counter->hw.counter_base = cflags[n];
-	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
+	atomic64_set(&counter->hw.period_left, counter->hw.irq_period);
 
 	/*
 	 * See if we need to reserve the PMU.
@@ -934,6 +932,7 @@ const struct pmu *hw_perf_counter_init(struct perf_counter *counter)
 static void record_and_restart(struct perf_counter *counter, long val,
 			       struct pt_regs *regs, int nmi)
 {
+	u64 period = counter->hw.irq_period;
 	s64 prev, delta, left;
 	int record = 0;
 
@@ -948,11 +947,11 @@ static void record_and_restart(struct perf_counter *counter, long val,
 	 */
 	val = 0;
 	left = atomic64_read(&counter->hw.period_left) - delta;
-	if (counter->hw_event.irq_period) {
+	if (period) {
 		if (left <= 0) {
-			left += counter->hw_event.irq_period;
+			left += period;
 			if (left <= 0)
-				left = counter->hw_event.irq_period;
+				left = period;
 			record = 1;
 		}
 		if (left < 0x80000000L)
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 5a7f718..886dcf3 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -286,11 +286,8 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 		hwc->nmi = 1;
 	}
 
-	hwc->irq_period	= hw_event->irq_period;
-	if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
-		hwc->irq_period = x86_pmu.max_period;
-
-	atomic64_set(&hwc->period_left, hwc->irq_period);
+	atomic64_set(&hwc->period_left,
+			min(x86_pmu.max_period, hwc->irq_period));
 
 	/*
 	 * Raw event type provide the config in the event structure
@@ -458,7 +455,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
 			     struct hw_perf_counter *hwc, int idx)
 {
 	s64 left = atomic64_read(&hwc->period_left);
-	s64 period = hwc->irq_period;
+	s64 period = min(x86_pmu.max_period, hwc->irq_period);
 	int err;
 
 	/*
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e543ecc..004b6e1 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -130,7 +130,11 @@ struct perf_counter_hw_event {
 	 */
 	__u64			config;
 
-	__u64			irq_period;
+	union {
+		__u64		irq_period;
+		__u64		irq_freq;
+	};
+
 	__u32			record_type;
 	__u32			read_format;
 
@@ -146,8 +150,9 @@ struct perf_counter_hw_event {
 				mmap           :  1, /* include mmap data     */
 				munmap         :  1, /* include munmap data   */
 				comm	       :  1, /* include comm data     */
+				freq           :  1, /* use freq, not period  */
 
-				__reserved_1   : 52;
+				__reserved_1   : 51;
 
 	__u32			extra_config_len;
 	__u32			wakeup_events;	/* wakeup every n events */
@@ -337,6 +342,7 @@ struct hw_perf_counter {
 	atomic64_t			prev_count;
 	u64				irq_period;
 	atomic64_t			period_left;
+	u64				interrupts;
 #endif
 };
 
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 93f4a0e..0ad1db4 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -1046,6 +1046,38 @@ int perf_counter_task_enable(void)
 	return 0;
 }
 
+void perf_adjust_freq(struct perf_counter_context *ctx)
+{
+	struct perf_counter *counter;
+	u64 irq_period;
+	u64 events, period;
+	s64 delta;
+
+	spin_lock(&ctx->lock);
+	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+		if (counter->state != PERF_COUNTER_STATE_ACTIVE)
+			continue;
+
+		if (!counter->hw_event.freq || !counter->hw_event.irq_freq)
+			continue;
+
+		events = HZ * counter->hw.interrupts * counter->hw.irq_period;
+		period = div64_u64(events, counter->hw_event.irq_freq);
+
+		delta = (s64)(1 + period - counter->hw.irq_period);
+		delta >>= 1;
+
+		irq_period = counter->hw.irq_period + delta;
+
+		if (!irq_period)
+			irq_period = 1;
+
+		counter->hw.irq_period = irq_period;
+		counter->hw.interrupts = 0;
+	}
+	spin_unlock(&ctx->lock);
+}
+
 /*
  * Round-robin a context's counters:
  */
@@ -1081,6 +1113,9 @@ void perf_counter_task_tick(struct task_struct *curr, int cpu)
 	cpuctx = &per_cpu(perf_cpu_context, cpu);
 	ctx = &curr->perf_counter_ctx;
 
+	perf_adjust_freq(&cpuctx->ctx);
+	perf_adjust_freq(ctx);
+
 	perf_counter_cpu_sched_out(cpuctx);
 	__perf_counter_task_sched_out(ctx);
 
@@ -2382,6 +2417,8 @@ int perf_counter_overflow(struct perf_counter *counter,
 	int events = atomic_read(&counter->event_limit);
 	int ret = 0;
 
+	counter->hw.interrupts++;
+
 	/*
 	 * XXX event_limit might not quite work as expected on inherited
 	 * counters
@@ -2450,6 +2487,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 	enum hrtimer_restart ret = HRTIMER_RESTART;
 	struct perf_counter *counter;
 	struct pt_regs *regs;
+	u64 period;
 
 	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
 	counter->pmu->read(counter);
@@ -2468,7 +2506,8 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
 			ret = HRTIMER_NORESTART;
 	}
 
-	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
+	period = max_t(u64, 10000, counter->hw.irq_period);
+	hrtimer_forward_now(hrtimer, ns_to_ktime(period));
 
 	return ret;
 }
@@ -2629,8 +2668,9 @@ static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2679,8 +2719,9 @@ static int task_clock_perf_counter_enable(struct perf_counter *counter)
 	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	hwc->hrtimer.function = perf_swcounter_hrtimer;
 	if (hwc->irq_period) {
+		u64 period = max_t(u64, 10000, hwc->irq_period);
 		__hrtimer_start_range_ns(&hwc->hrtimer,
-				ns_to_ktime(hwc->irq_period), 0,
+				ns_to_ktime(period), 0,
 				HRTIMER_MODE_REL, 0);
 	}
 
@@ -2811,9 +2852,7 @@ static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
 
 static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 {
-	struct perf_counter_hw_event *hw_event = &counter->hw_event;
 	const struct pmu *pmu = NULL;
-	struct hw_perf_counter *hwc = &counter->hw;
 
 	/*
 	 * Software counters (currently) can't in general distinguish
@@ -2826,8 +2865,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 	case PERF_COUNT_CPU_CLOCK:
 		pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_TASK_CLOCK:
 		/*
@@ -2839,8 +2876,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 		else
 			pmu = &perf_ops_cpu_clock;
 
-		if (hw_event->irq_period && hw_event->irq_period < 10000)
-			hw_event->irq_period = 10000;
 		break;
 	case PERF_COUNT_PAGE_FAULTS:
 	case PERF_COUNT_PAGE_FAULTS_MIN:
@@ -2854,9 +2889,6 @@ static const struct pmu *sw_perf_counter_init(struct perf_counter *counter)
 		break;
 	}
 
-	if (pmu)
-		hwc->irq_period = hw_event->irq_period;
-
 	return pmu;
 }
 
@@ -2872,6 +2904,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 {
 	const struct pmu *pmu;
 	struct perf_counter *counter;
+	struct hw_perf_counter *hwc;
 	long err;
 
 	counter = kzalloc(sizeof(*counter), gfpflags);
@@ -2907,6 +2940,12 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	pmu = NULL;
 
+	hwc = &counter->hw;
+	if (hw_event->freq && hw_event->irq_freq)
+		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+	else
+		hwc->irq_period = hw_event->irq_period;
+
 	/*
 	 * we currently do not support PERF_RECORD_GROUP on inherited counters
 	 */

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf top: update to use the new freq interface
  2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
  2009-05-15 13:33   ` Ingo Molnar
@ 2009-05-15 14:43   ` tip-bot for Peter Zijlstra
  1 sibling, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  f5456a6b056b709282e87a68b4c1b81ac2e866fa
Gitweb:     http://git.kernel.org/tip/f5456a6b056b709282e87a68b4c1b81ac2e866fa
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:19:29 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:26:57 +0200

perf top: update to use the new freq interface

Provide perf top -F as alternative to -c.

[ Impact: new 'perf top' feature ]

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
LKML-Reference: <20090515132018.707922166@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 Documentation/perf_counter/builtin-top.c |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/Documentation/perf_counter/builtin-top.c b/Documentation/perf_counter/builtin-top.c
index b1549dd..814b2e4 100644
--- a/Documentation/perf_counter/builtin-top.c
+++ b/Documentation/perf_counter/builtin-top.c
@@ -98,6 +98,7 @@ static unsigned int		page_size;
 static unsigned int		mmap_pages			=  16;
 static int			use_mmap			= 0;
 static int			use_munmap			= 0;
+static int			freq				= 0;
 
 static char			*vmlinux;
 
@@ -846,9 +847,10 @@ static void process_options(int argc, char **argv)
 			{"stat",	no_argument,		NULL, 'S'},
 			{"vmlinux",	required_argument,	NULL, 'x'},
 			{"zero",	no_argument,		NULL, 'z'},
+			{"freq",	required_argument,	NULL, 'F'},
 			{NULL,		0,			NULL,  0 }
 		};
-		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMU",
+		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hln:m:p:r:s:Sx:zMUF:",
 				    long_options, &option_index);
 		if (c == -1)
 			break;
@@ -889,6 +891,7 @@ static void process_options(int argc, char **argv)
 		case 'm': mmap_pages			=   atoi(optarg); break;
 		case 'M': use_mmap			=              1; break;
 		case 'U': use_munmap			=              1; break;
+		case 'F': freq = 1; default_interval	=   atoi(optarg); break;
 		default: error = 1; break;
 		}
 	}
@@ -1075,6 +1078,7 @@ int cmd_top(int argc, char **argv, const char *prefix)
 			hw_event.nmi		= nmi;
 			hw_event.mmap		= use_mmap;
 			hw_event.munmap		= use_munmap;
+			hw_event.freq		= freq;
 
 			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
 			if (fd[i][counter] < 0) {

^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period, 32-bit fix
  2009-05-15 13:37   ` Peter Zijlstra
@ 2009-05-15 14:43     ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 13+ messages in thread
From: tip-bot for Peter Zijlstra @ 2009-05-15 14:43 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, tglx,
	cjashfor, mingo

Commit-ID:  2e569d36729c8105ae066a9b105068305442cc77
Gitweb:     http://git.kernel.org/tip/2e569d36729c8105ae066a9b105068305442cc77
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Fri, 15 May 2009 15:37:47 +0200
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 15 May 2009 15:40:25 +0200

perf_counter: frequency based adaptive irq_period, 32-bit fix

fix:

  kernel/built-in.o: In function `perf_counter_alloc':
  perf_counter.c:(.text+0x7ddc7): undefined reference to `__udivdi3'

[ Impact: build fix on 32-bit systems ]

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
LKML-Reference: <1242394667.6642.1887.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 kernel/perf_counter.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 0ad1db4..728a595 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -2942,7 +2942,7 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
 
 	hwc = &counter->hw;
 	if (hw_event->freq && hw_event->irq_freq)
-		hwc->irq_period = TICK_NSEC / hw_event->irq_freq;
+		hwc->irq_period = div64_u64(TICK_NSEC, hw_event->irq_freq);
 	else
 		hwc->irq_period = hw_event->irq_period;
 

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2009-05-15 14:45 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-05-15 13:19 [PATCH 0/4] perf-counter bits Peter Zijlstra
2009-05-15 13:19 ` [PATCH 1/4] perf_counter: remove perf_disable/enable exports Peter Zijlstra
2009-05-15 14:43   ` [tip:perfcounters/core] " tip-bot for Peter Zijlstra
2009-05-15 13:19 ` [PATCH 2/4] perf_counter: per user mlock gift Peter Zijlstra
2009-05-15 14:43   ` [tip:perfcounters/core] " tip-bot for Peter Zijlstra
2009-05-15 13:19 ` [PATCH 3/4] perf_counter: frequency based adaptive irq_period Peter Zijlstra
2009-05-15 13:35   ` Ingo Molnar
2009-05-15 13:37   ` Peter Zijlstra
2009-05-15 14:43     ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period, 32-bit fix tip-bot for Peter Zijlstra
2009-05-15 14:43   ` [tip:perfcounters/core] perf_counter: frequency based adaptive irq_period tip-bot for Peter Zijlstra
2009-05-15 13:19 ` [PATCH 4/4] perf_counter: update perf-top to use the new freq interface Peter Zijlstra
2009-05-15 13:33   ` Ingo Molnar
2009-05-15 14:43   ` [tip:perfcounters/core] perf top: update " tip-bot for Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.