From: Martin Peschke <mp3@de.ibm.com>
To: linux-kernel@vger.kernel.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
linux-s390@vger.kernel.org, tglx@timesys.com,
d.pensator@gmail.com, mingo@redhat.com
Subject: [RFC] [Patch 4/4] timer_stats slimmed down: using statistics infrastucture
Date: Sat, 14 Apr 2007 12:10:01 +0200 [thread overview]
Message-ID: <1176545401.5462.112.camel@dix> (raw)
This patch implements timer_stats based on the statistics
infrastructure.
Here is some sample output, which even somewhat
resembles /proc/timer_stats.
(>80 char lines because of symbol names, sorry for line breaks).
It reads:
<statistic name> <basket> <hits> <label>
with label being:
<pid> <task name> <start_fn>(<stop_fn>)
[root@t2930041 timer_stats]# cat data
expired missed 0x0
expired - 5381 1019 multipathd do_nanosleep (hrtimer_wakeup)
expired - 2707 1 swapper queue_delayed_work_on
(delayed_work_timer_fn)
expired - 2707 1 swapper queue_delayed_work_on
(delayed_work_timer_fn)
expired - 2707 1 swapper queue_delayed_work_on
(delayed_work_timer_fn)
expired - 2707 1 swapper queue_delayed_work_on
(delayed_work_timer_fn)
expired - 1353 1 swapper neigh_table_init_no_netlink
(neigh_periodic_timer)
expired - 1083 0 swapper page_writeback_init (wb_timer_fn)
expired - 1083 1021 multipathd schedule_timeout
(process_timeout)
expired - 1081 1 init schedule_timeout
(process_timeout)
expired - 677 1 swapper neigh_table_init_no_netlink
(neigh_periodic_timer)
expired - 278 0 swapper sk_reset_timer (tcp_delack_timer)
expired - 193 1 swapper nfsd_export_init
(delayed_work_timer_fn)
expired - 181 975 syslogd do_setitimer (it_real_fn)
expired - 104 1296 sshd sk_reset_timer (tcp_write_timer)
expired - 90 1 swapper ip_rt_init (rt_check_expire)
expired - 90 1244 crond do_nanosleep (hrtimer_wakeup)
expired - 54 896 ip __netdev_watchdog_up
(dev_watchdog)
expired - 44 1 swapper inet_initpeers
(peer_check_expire)
expired - 44 0 swapper addrconf_verify (addrconf_verify)
.
.
.
expired - 1 1368 sadc schedule_timeout
(process_timeout)
expired - 1 1370 sadc blk_plug_device
(blk_unplug_timeout)
expired - 1 1370 sadc schedule_timeout
(process_timeout)
# cat /debug/timer_stats/definition
name=expired state=on units=timer/occurrence type=sparse entries=1024
data=[6920660.417313] started=[6920660.417313] stopped=[ 0.000000]
Signed-off-by: Martin Peschke <mp3@de.ibm.com>
---
time/timer_stats.c | 385 ++++++++---------------------------------------------
timer.c | 2
2 files changed, 59 insertions(+), 328 deletions(-)
Index: linux/kernel/time/timer_stats.c
===================================================================
--- linux.orig/kernel/time/timer_stats.c
+++ linux/kernel/time/timer_stats.c
@@ -26,197 +26,95 @@
* the pid and cmdline from the owner process if applicable.
*
* Start/stop data collection:
- * # echo 1[0] >/proc/timer_stats
+ * # echo state=on[off] > /debug/timer_stats/definition
*
* Display the information collected so far:
- * # cat /proc/timer_stats
+ * # cat /debug/timer_stats/data
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-#include <linux/proc_fs.h>
#include <linux/module.h>
-#include <linux/spinlock.h>
#include <linux/sched.h>
#include <linux/seq_file.h>
#include <linux/kallsyms.h>
-
-#include <asm/uaccess.h>
+#include <linux/statistic.h>
/*
* This is our basic unit of interest: a timer expiry event identified
* by the timer, its start/expire functions and the PID of the task that
* started the timer. We count the number of times an event happens:
*/
-struct entry {
- /*
- * Hash list:
- */
- struct entry *next;
-
- /*
- * Hash keys:
- */
- void *timer;
- void *start_func;
- void *expire_func;
- pid_t pid;
-
- /*
- * Number of timeout events:
- */
- unsigned long count;
-
+struct tstat_key {
+ void *timer;
+ void *start_func;
+ void *expire_func;
+ pid_t pid;
/*
* We save the command-line string to preserve
* this information past task exit:
*/
- char comm[TASK_COMM_LEN + 1];
-
-} ____cacheline_aligned_in_smp;
-
-/*
- * Spinlock protecting the tables - not taken during lookup:
- */
-static DEFINE_SPINLOCK(table_lock);
-
-/*
- * Per-CPU lookup locks for fast hash lookup:
- */
-static DEFINE_PER_CPU(spinlock_t, lookup_lock);
-
-/*
- * Mutex to serialize state changes with show-stats activities:
- */
-static DEFINE_MUTEX(show_mutex);
-
-/*
- * Collection status, active/inactive:
- */
-static int __read_mostly active;
-
-/*
- * Beginning/end timestamps of measurement:
- */
-static ktime_t time_start, time_stop;
-
-/*
- * tstat entry structs only get allocated while collection is
- * active and never freed during that time - this simplifies
- * things quite a bit.
- *
- * They get freed when a new collection period is started.
- */
-#define MAX_ENTRIES_BITS 10
-#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS)
-
-static unsigned long nr_entries;
-static struct entry entries[MAX_ENTRIES];
-
-static atomic_t overflow_count;
-
-static void reset_entries(void)
-{
- nr_entries = 0;
- memset(entries, 0, sizeof(entries));
- atomic_set(&overflow_count, 0);
-}
+ char comm[TASK_COMM_LEN + 1];
+};
-static struct entry *alloc_entry(void)
+static void tstat_print_symbol(struct seq_file *m, unsigned long addr)
{
- if (nr_entries >= MAX_ENTRIES)
- return NULL;
-
- return entries + nr_entries++;
-}
-
-/*
- * The entries are in a hash-table, for fast lookup:
- */
-#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1)
-#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS)
-#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1)
-
-#define __tstat_hashfn(entry) \
- (((unsigned long)(entry)->timer ^ \
- (unsigned long)(entry)->start_func ^ \
- (unsigned long)(entry)->expire_func ^ \
- (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK)
-
-#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry))
-
-static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly;
+ char symname[KSYM_NAME_LEN+1];
-static int match_entries(struct entry *entry1, struct entry *entry2)
-{
- return entry1->timer == entry2->timer &&
- entry1->start_func == entry2->start_func &&
- entry1->expire_func == entry2->expire_func &&
- entry1->pid == entry2->pid;
+ if (lookup_symbol_name(addr, symname) < 0)
+ seq_printf(m, "<%p>", (void *)addr);
+ else
+ seq_printf(m, "%s", symname);
}
-/*
- * Look up whether an entry matching this item is present
- * in the hash already. Must be called with irqs off and the
- * lookup lock held:
- */
-static struct entry *tstat_lookup(struct entry *entry, char *comm)
+static void tstat_label(struct statistic_interface *interface, int i,
+ void *_key, struct seq_file *m)
{
- struct entry **head, *curr, *prev;
+ struct tstat_key *key = _key;
- head = tstat_hashentry(entry);
- curr = *head;
+ seq_printf(m, "\t%5d %-16s ", key->pid, key->comm);
+ tstat_print_symbol(m, (unsigned long)key->start_func);
+ seq_puts(m, " (");
+ tstat_print_symbol(m, (unsigned long)key->expire_func);
+ seq_puts(m, ")");
+}
- /*
- * The fastpath is when the entry is already hashed,
- * we do this with the lookup lock held, but with the
- * table lock not held:
- */
- while (curr) {
- if (match_entries(curr, entry))
- return curr;
+enum tstat_enum {
+ TSTAT_EXPIRED,
+ _TSTAT_NUMBER,
+};
- curr = curr->next;
+static struct statistic_info tstat_info[] = {
+ [TSTAT_EXPIRED] = {
+ .name = "expired",
+ .x_unit = "timer",
+ .y_unit = "occurrence",
+ .defaults = "type=sparse entries=1024",
+ .flags = STATISTIC_FLAGS_KEY,
+ .key_size = sizeof(struct tstat_key)
}
- /*
- * Slowpath: allocate, set up and link a new hash entry:
- */
- prev = NULL;
- curr = *head;
-
- spin_lock(&table_lock);
- /*
- * Make sure we have not raced with another CPU:
- */
- while (curr) {
- if (match_entries(curr, entry))
- goto out_unlock;
+};
- prev = curr;
- curr = curr->next;
- }
+struct statistic tstat_stat[_TSTAT_NUMBER];
- curr = alloc_entry();
- if (curr) {
- *curr = *entry;
- curr->count = 0;
- memcpy(curr->comm, comm, TASK_COMM_LEN);
- if (prev)
- prev->next = curr;
- else
- *head = curr;
- curr->next = NULL;
- }
- out_unlock:
- spin_unlock(&table_lock);
+struct statistic_interface tstat_interface = {
+ .stat = tstat_stat,
+ .info = tstat_info,
+ .number = _TSTAT_NUMBER,
+ .label = tstat_label,
+};
- return curr;
+static int __init tstat_init(void)
+{
+ statistic_create(&tstat_interface, "timer_stats");
+ return 0;
}
+__initcall(tstat_init);
/**
- * timer_stats_update_stats - Update the statistics for a timer.
+ * tstat_inc - Update the statistics for a timer.
* @timer: pointer to either a timer_list or a hrtimer
* @pid: the pid of the task which set up the timer
* @startf: pointer to the function which did the timer setup
@@ -229,179 +127,14 @@ static struct entry *tstat_lookup(struct
void timer_stats_update_stats(void *timer, pid_t pid, void *startf,
void *timerf, char * comm)
{
- /*
- * It doesnt matter which lock we take:
- */
- spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id());
- struct entry *entry, input;
- unsigned long flags;
-
- input.timer = timer;
- input.start_func = startf;
- input.expire_func = timerf;
- input.pid = pid;
-
- spin_lock_irqsave(lock, flags);
- if (!active)
- goto out_unlock;
-
- entry = tstat_lookup(&input, comm);
- if (likely(entry))
- entry->count++;
- else
- atomic_inc(&overflow_count);
-
- out_unlock:
- spin_unlock_irqrestore(lock, flags);
-}
-
-static void print_name_offset(struct seq_file *m, unsigned long addr)
-{
- char symname[KSYM_NAME_LEN+1];
-
- if (lookup_symbol_name(addr, symname) < 0)
- seq_printf(m, "<%p>", (void *)addr);
- else
- seq_printf(m, "%s", symname);
-}
-
-static int tstats_show(struct seq_file *m, void *v)
-{
- struct timespec period;
- struct entry *entry;
- unsigned long ms;
- long events = 0;
- ktime_t time;
- int i;
-
- mutex_lock(&show_mutex);
- /*
- * If still active then calculate up to now:
- */
- if (active)
- time_stop = ktime_get();
-
- time = ktime_sub(time_stop, time_start);
-
- period = ktime_to_timespec(time);
- ms = period.tv_nsec / 1000000;
-
- seq_puts(m, "Timer Stats Version: v0.1\n");
- seq_printf(m, "Sample period: %ld.%03ld s\n", period.tv_sec, ms);
- if (atomic_read(&overflow_count))
- seq_printf(m, "Overflow: %d entries\n",
- atomic_read(&overflow_count));
-
- for (i = 0; i < nr_entries; i++) {
- entry = entries + i;
- seq_printf(m, "%4lu, %5d %-16s ",
- entry->count, entry->pid, entry->comm);
-
- print_name_offset(m, (unsigned long)entry->start_func);
- seq_puts(m, " (");
- print_name_offset(m, (unsigned long)entry->expire_func);
- seq_puts(m, ")\n");
-
- events += entry->count;
- }
-
- ms += period.tv_sec * 1000;
- if (!ms)
- ms = 1;
-
- if (events && period.tv_sec)
- seq_printf(m, "%ld total events, %ld.%ld events/sec\n", events,
- events / period.tv_sec, events * 1000 / ms);
- else
- seq_printf(m, "%ld total events\n", events);
-
- mutex_unlock(&show_mutex);
-
- return 0;
-}
-
-/*
- * After a state change, make sure all concurrent lookup/update
- * activities have stopped:
- */
-static void sync_access(void)
-{
- unsigned long flags;
- int cpu;
-
- for_each_online_cpu(cpu) {
- spin_lock_irqsave(&per_cpu(lookup_lock, cpu), flags);
- /* nothing */
- spin_unlock_irqrestore(&per_cpu(lookup_lock, cpu), flags);
- }
-}
-
-static ssize_t tstats_write(struct file *file, const char __user *buf,
- size_t count, loff_t *offs)
-{
- char ctl[2];
+ struct tstat_key key;
- if (count != 2 || *offs)
- return -EINVAL;
+ memset(&key, 0, sizeof(struct tstat_key)); /* FIXME: get rid of it? */
+ key.timer = timer;
+ key.start_func = startf;
+ key.expire_func = timerf;
+ key.pid = pid;
+ memcpy(key.comm, comm, TASK_COMM_LEN);
- if (copy_from_user(ctl, buf, count))
- return -EFAULT;
-
- mutex_lock(&show_mutex);
- switch (ctl[0]) {
- case '0':
- if (active) {
- active = 0;
- time_stop = ktime_get();
- sync_access();
- }
- break;
- case '1':
- if (!active) {
- reset_entries();
- time_start = ktime_get();
- active = 1;
- }
- break;
- default:
- count = -EINVAL;
- }
- mutex_unlock(&show_mutex);
-
- return count;
-}
-
-static int tstats_open(struct inode *inode, struct file *filp)
-{
- return single_open(filp, tstats_show, NULL);
-}
-
-static struct file_operations tstats_fops = {
- .open = tstats_open,
- .read = seq_read,
- .write = tstats_write,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
-void __init init_timer_stats(void)
-{
- int cpu;
-
- for_each_possible_cpu(cpu)
- spin_lock_init(&per_cpu(lookup_lock, cpu));
-}
-
-static int __init init_tstats_procfs(void)
-{
- struct proc_dir_entry *pe;
-
- pe = create_proc_entry("timer_stats", 0644, NULL);
- if (!pe)
- return -ENOMEM;
-
- pe->proc_fops = &tstats_fops;
-
- return 0;
+ statistic_inc_as_key(STAT_SPARSE, tstat_stat, TSTAT_EXPIRED, &key);
}
-__initcall(init_tstats_procfs);
Index: linux/kernel/timer.c
===================================================================
--- linux.orig/kernel/timer.c
+++ linux/kernel/timer.c
@@ -1315,8 +1315,6 @@ void __init init_timers(void)
int err = timer_cpu_notify(&timers_nb, (unsigned long)CPU_UP_PREPARE,
(void *)(long)smp_processor_id());
- init_timer_stats();
-
BUG_ON(err == NOTIFY_BAD);
register_cpu_notifier(&timers_nb);
open_softirq(TIMER_SOFTIRQ, run_timer_softirq, NULL);
reply other threads:[~2007-04-14 10:10 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1176545401.5462.112.camel@dix \
--to=mp3@de.ibm.com \
--cc=akpm@linux-foundation.org \
--cc=d.pensator@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-s390@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=tglx@timesys.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.