linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/6] Add time-of-day option to perf events
@ 2011-06-07 23:53 David Ahern
  2011-06-07 23:55 ` [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes David Ahern
                   ` (6 more replies)
  0 siblings, 7 replies; 26+ messages in thread
From: David Ahern @ 2011-06-07 23:53 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel
  Cc: acme, mingo, peterz, fweisbec, paulus, tglx, David Ahern

Another attempt at printing time-of-day strings on perf events - this
time with tracepoints in the timekeeping code to catch updates to
xtime.

David Ahern (6):
  trace: add tracepoints to timekeeping code - xtime changes
  perf utils: export parse_single_tracepoint_event
  perf: add reference time event
  perf record: add time-of-day option
  perf script: pass trace event to print_trace_event
  perf script: add time-of-day option for displaying events

 include/trace/events/timekeeping.h       |   43 ++++++++
 kernel/time/timekeeping.c                |    8 ++
 tools/perf/Documentation/perf-record.txt |    3 +
 tools/perf/Documentation/perf-script.txt |    8 ++-
 tools/perf/builtin-record.c              |  102 +++++++++++++++++++-
 tools/perf/builtin-script.c              |  157 +++++++++++++++++++++++++++++-
 tools/perf/util/event.c                  |    1 +
 tools/perf/util/event.h                  |    8 ++
 tools/perf/util/parse-events.c           |   13 +--
 tools/perf/util/parse-events.h           |   12 +++
 tools/perf/util/session.c                |    4 +
 tools/perf/util/session.h                |    3 +-
 tools/perf/util/trace-event-parse.c      |   12 +--
 tools/perf/util/trace-event.h            |    2 +-
 14 files changed, 350 insertions(+), 26 deletions(-)
 create mode 100644 include/trace/events/timekeeping.h

-- 
1.7.5.2

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes
  2011-06-07 23:53 [PATCH 0/6] Add time-of-day option to perf events David Ahern
@ 2011-06-07 23:55 ` David Ahern
  2011-06-17 13:23   ` Frederic Weisbecker
  2011-06-07 23:55 ` [PATCH 2/6] perf utils: export parse_single_tracepoint_event David Ahern
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 26+ messages in thread
From: David Ahern @ 2011-06-07 23:55 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel
  Cc: acme, mingo, peterz, fweisbec, paulus, tglx, johnstul, rostedt,
	David Ahern

Trace points in timekeeping.c where xtime is modified by a user
or ntp.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/trace/events/timekeeping.h |   43 ++++++++++++++++++++++++++++++++++++
 kernel/time/timekeeping.c          |    8 ++++++
 2 files changed, 51 insertions(+), 0 deletions(-)
 create mode 100644 include/trace/events/timekeeping.h

diff --git a/include/trace/events/timekeeping.h b/include/trace/events/timekeeping.h
new file mode 100644
index 0000000..3d5d083
--- /dev/null
+++ b/include/trace/events/timekeeping.h
@@ -0,0 +1,43 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM timekeeping
+
+#if !defined(_TRACE_TIMEKEEP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TIMEKEEP_H
+
+#include <linux/tracepoint.h>
+#include <linux/time.h>
+
+DECLARE_EVENT_CLASS(tod_template,
+
+	TP_PROTO(const struct timespec *tv),
+
+	TP_ARGS(tv),
+
+	TP_STRUCT__entry(
+		__field( __kernel_time_t,	tv_sec)
+		__field( long,			tv_nsec)
+	),
+
+	TP_fast_assign(
+		__entry->tv_sec  = tv->tv_sec;
+		__entry->tv_nsec = tv->tv_nsec;
+	),
+
+	TP_printk("tv_sec=%ld tv_nsec=%ld", __entry->tv_sec, __entry->tv_nsec)
+);
+
+DEFINE_EVENT(tod_template, settimeofday,
+	TP_PROTO(const struct timespec *tv),
+	TP_ARGS(tv));
+
+DEFINE_EVENT(tod_template, timekeeping_inject_offset,
+	TP_PROTO(const struct timespec *tv),
+	TP_ARGS(tv));
+
+DEFINE_EVENT(tod_template, timekeeping_inject_sleeptime,
+	TP_PROTO(const struct timespec *tv),
+	TP_ARGS(tv));
+#endif /*  _TRACE_TIMEKEEP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 342408c..fda1af1 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -21,6 +21,9 @@
 #include <linux/tick.h>
 #include <linux/stop_machine.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/timekeeping.h>
+
 /* Structure holding internal timekeeping values. */
 struct timekeeper {
 	/* Current clocksource used for timekeeping. */
@@ -361,6 +364,8 @@ int do_settimeofday(const struct timespec *tv)
 	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
 		return -EINVAL;
 
+	trace_settimeofday(tv);
+
 	write_seqlock_irqsave(&xtime_lock, flags);
 
 	timekeeping_forward_now();
@@ -406,6 +411,8 @@ int timekeeping_inject_offset(struct timespec *ts)
 	timekeeping_forward_now();
 
 	xtime = timespec_add(xtime, *ts);
+	trace_timekeeping_inject_offset(&xtime);
+
 	wall_to_monotonic = timespec_sub(wall_to_monotonic, *ts);
 
 	timekeeper.ntp_error = 0;
@@ -605,6 +612,7 @@ static struct timespec timekeeping_suspend_time;
 static void __timekeeping_inject_sleeptime(struct timespec *delta)
 {
 	xtime = timespec_add(xtime, *delta);
+	trace_timekeeping_inject_sleeptime(&xtime);
 	wall_to_monotonic = timespec_sub(wall_to_monotonic, *delta);
 	total_sleep_time = timespec_add(total_sleep_time, *delta);
 }
-- 
1.7.5.2

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/6] perf utils: export parse_single_tracepoint_event
  2011-06-07 23:53 [PATCH 0/6] Add time-of-day option to perf events David Ahern
  2011-06-07 23:55 ` [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes David Ahern
@ 2011-06-07 23:55 ` David Ahern
  2011-06-07 23:55 ` [PATCH 3/6] perf: add reference time event David Ahern
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 26+ messages in thread
From: David Ahern @ 2011-06-07 23:55 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel
  Cc: acme, mingo, peterz, fweisbec, paulus, tglx, David Ahern

For use by perf-record in later patch. Allow strp arg to be NULL
as well.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/perf/util/parse-events.c |   13 ++++---------
 tools/perf/util/parse-events.h |   12 ++++++++++++
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 41982c3..c15a701 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -19,12 +19,6 @@ struct event_symbol {
 	const char	*alias;
 };
 
-enum event_result {
-	EVT_FAILED,
-	EVT_HANDLED,
-	EVT_HANDLED_ALL
-};
-
 char debugfs_path[MAXPATHLEN];
 
 #define CHW(x) .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_##x
@@ -432,8 +426,8 @@ parse_generic_hw_event(const char **str, struct perf_event_attr *attr)
 	return EVT_HANDLED;
 }
 
-static enum event_result
-parse_single_tracepoint_event(char *sys_name,
+enum event_result
+parse_single_tracepoint_event(const char *sys_name,
 			      const char *evt_name,
 			      unsigned int evt_length,
 			      struct perf_event_attr *attr,
@@ -460,7 +454,8 @@ parse_single_tracepoint_event(char *sys_name,
 	id = atoll(id_buf);
 	attr->config = id;
 	attr->type = PERF_TYPE_TRACEPOINT;
-	*strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
+	if (strp)
+		*strp += strlen(sys_name) + evt_length + 1; /* + 1 for the ':' */
 
 	attr->sample_type |= PERF_SAMPLE_RAW;
 	attr->sample_type |= PERF_SAMPLE_TIME;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 746d3fc..e65f724 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -17,6 +17,12 @@ struct tracepoint_path {
 	struct tracepoint_path *next;
 };
 
+enum event_result {
+	EVT_FAILED,
+	EVT_HANDLED,
+	EVT_HANDLED_ALL
+};
+
 extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
 extern bool have_tracepoints(struct list_head *evlist);
 
@@ -26,6 +32,12 @@ extern const char *__event_name(int type, u64 config);
 
 extern int parse_events(const struct option *opt, const char *str, int unset);
 extern int parse_filter(const struct option *opt, const char *str, int unset);
+extern enum event_result
+parse_single_tracepoint_event(const char *sys_name,
+			      const char *evt_name,
+			      unsigned int evt_length,
+			      struct perf_event_attr *attr,
+			      const char **strp);
 
 #define EVENTS_HELP_MAX (128*1024)
 
-- 
1.7.5.2

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 3/6] perf: add reference time event
  2011-06-07 23:53 [PATCH 0/6] Add time-of-day option to perf events David Ahern
  2011-06-07 23:55 ` [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes David Ahern
  2011-06-07 23:55 ` [PATCH 2/6] perf utils: export parse_single_tracepoint_event David Ahern
@ 2011-06-07 23:55 ` David Ahern
  2011-06-17 13:32   ` Frederic Weisbecker
  2011-06-07 23:56 ` [PATCH 4/6] perf record: add time-of-day option David Ahern
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 26+ messages in thread
From: David Ahern @ 2011-06-07 23:55 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel
  Cc: acme, mingo, peterz, fweisbec, paulus, tglx, David Ahern

For initial perf_clock to time-of-day correlation.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/perf/util/event.c   |    1 +
 tools/perf/util/event.h   |    8 ++++++++
 tools/perf/util/session.c |    4 ++++
 tools/perf/util/session.h |    3 ++-
 4 files changed, 15 insertions(+), 1 deletions(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 3c1b8a6..1a89a04 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -24,6 +24,7 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
 	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
 	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
+	[PERF_RECORD_REFTIME]			= "REF_TIME",
 };
 
 const char *perf_event__name(unsigned int id)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1d7f664..f481f90 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -98,6 +98,7 @@ enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_HEADER_TRACING_DATA		= 66,
 	PERF_RECORD_HEADER_BUILD_ID		= 67,
 	PERF_RECORD_FINISHED_ROUND		= 68,
+	PERF_RECORD_REFTIME			= 69,
 	PERF_RECORD_HEADER_MAX
 };
 
@@ -124,6 +125,12 @@ struct tracing_data_event {
 	u32 size;
 };
 
+struct reftime_event {
+	struct perf_event_header header;
+	struct timeval tv;
+	u64 nsec;
+};
+
 union perf_event {
 	struct perf_event_header	header;
 	struct ip_event			ip;
@@ -137,6 +144,7 @@ union perf_event {
 	struct event_type_event		event_type;
 	struct tracing_data_event	tracing_data;
 	struct build_id_event		build_id;
+	struct reftime_event		reftime;
 };
 
 void perf_event__print_totals(void);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b723f21..322632c 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -792,6 +792,10 @@ static int perf_session__process_user_event(struct perf_session *session, union
 		return ops->build_id(event, session);
 	case PERF_RECORD_FINISHED_ROUND:
 		return ops->finished_round(event, session, ops);
+	case PERF_RECORD_REFTIME:
+		if (ops->reftime)
+			return ops->reftime(event, session);
+		return -EINVAL;
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index de4178d..9728701 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -81,7 +81,8 @@ struct perf_event_ops {
 	event_synth_op	attr,
 			event_type,
 			tracing_data,
-			build_id;
+			build_id,
+			reftime;
 	event_op2	finished_round;
 	bool		ordered_samples;
 	bool		ordering_requires_timestamps;
-- 
1.7.5.2

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 4/6] perf record: add time-of-day option
  2011-06-07 23:53 [PATCH 0/6] Add time-of-day option to perf events David Ahern
                   ` (2 preceding siblings ...)
  2011-06-07 23:55 ` [PATCH 3/6] perf: add reference time event David Ahern
@ 2011-06-07 23:56 ` David Ahern
  2011-06-17 14:14   ` Frederic Weisbecker
  2011-06-07 23:56 ` [PATCH 5/6] perf script: pass trace event to print_trace_event David Ahern
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 26+ messages in thread
From: David Ahern @ 2011-06-07 23:56 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel
  Cc: acme, mingo, peterz, fweisbec, paulus, tglx, David Ahern

Use reftime event for initial correlation of perf_clock to
time-of-day. Add timekeeping trace events to event list to
capture jumps in time-of-day.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/perf/Documentation/perf-record.txt |    3 +
 tools/perf/builtin-record.c              |  102 +++++++++++++++++++++++++++++-
 2 files changed, 104 insertions(+), 1 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 5a520f8..e4c87ba 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -148,6 +148,9 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
 corresponding events, i.e., they always refer to events defined earlier on the command
 line.
 
+--tod::
+Collect data for displaying time-of-day strings when printing events.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 8e2c857..4f8d5f2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -32,6 +32,9 @@
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 
+#define TRACE_TOD_SUBSYSTEM      "timekeeping"
+#define TRACE_TOD_SUBSYSTEM_LEN  11
+
 enum write_mode_t {
 	WRITE_FORCE,
 	WRITE_APPEND
@@ -65,6 +68,8 @@ static bool			sample_address			=  false;
 static bool			sample_time			=  false;
 static bool			no_buildid			=  false;
 static bool			no_buildid_cache		=  false;
+static bool			want_tod			=  false;
+static u64 			tod_sample_type;
 static struct perf_evlist	*evsel_list;
 
 static long			samples				=      0;
@@ -215,6 +220,9 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
 		attr->sample_type	|= PERF_SAMPLE_CPU;
 	}
 
+	if (want_tod)
+		attr->sample_type	|= tod_sample_type;
+
 	if (nodelay) {
 		attr->watermark = 0;
 		attr->wakeup_events = 1;
@@ -248,6 +256,86 @@ static bool perf_evlist__equal(struct perf_evlist *evlist,
 	return true;
 }
 
+static int perf_event__synthesize_reftime(perf_event__handler_t process)
+{
+	union perf_event ev;
+	struct timespec tp;
+
+	memset(&ev, 0, sizeof(ev));
+
+	if (gettimeofday(&ev.reftime.tv, NULL) != 0) {
+		error("gettimeofday failed.\n");
+		return -1;
+	}
+	if (clock_gettime(CLOCK_MONOTONIC, &tp) != 0) {
+		error("clock_gettime(CLOCK_MONOTONIC) failed.\n");
+		return -1;
+	}
+	ev.reftime.nsec = (u64) tp.tv_sec * NSEC_PER_SEC + (u64) tp.tv_nsec;
+
+	ev.header.type = PERF_RECORD_REFTIME;
+	ev.header.size = sizeof(ev.reftime);
+
+	return process(&ev, NULL, session);
+}
+
+static int add_timeofday_events(void)
+{
+	int rc, i, len;
+	struct perf_event_attr attr;
+	struct perf_evsel *evsel;
+
+	/* events that modify xtime */
+	const char *tod_events[] = {"settimeofday",
+				    "timekeeping_inject_offset",
+				    "timekeeping_inject_sleeptime",
+				    NULL};
+
+	i = 0;
+	rc = -1;
+	while (tod_events[i]) {
+		memset(&attr, 0, sizeof(attr));
+		len = strlen(tod_events[i]);
+
+		if (parse_single_tracepoint_event(TRACE_TOD_SUBSYSTEM,
+			tod_events[i], len, &attr, NULL) == EVT_FAILED) {
+			error("Failed to parse event %s\n", tod_events[i]);
+			goto out;
+		}
+
+		evsel = perf_evsel__new(&attr, evsel_list->nr_entries);
+		if (evsel == NULL)
+			return -1;
+
+		perf_evlist__add(evsel_list, evsel);
+
+		/* +2 for ':' delimiter and string terminator */
+		evsel->name = calloc(TRACE_TOD_SUBSYSTEM_LEN + len + 2, 1);
+		if (!evsel->name)
+			return -1;
+
+		sprintf(evsel->name, "timekeeping:%s", tod_events[i]);
+
+		tod_sample_type |= attr.sample_type;
+
+		++i;
+	}
+
+	/*
+	 * right now sample_type for all samples needs to be the same.
+	 * tracepoints are collected at sample period 1 and hence do not
+	 * request the period with the sample. However, default for record
+	 * is cycles at a frequency. So, until this sample_type mess is
+	 * fixed....
+	 */
+	if (freq)
+		tod_sample_type |= PERF_SAMPLE_PERIOD;
+
+	rc = 0;
+out:
+	return rc;
+}
+
 static void open_counters(struct perf_evlist *evlist)
 {
 	struct perf_evsel *pos;
@@ -289,7 +377,8 @@ try_again:
 				 * Old kernel, no attr->sample_id_type_all field
 				 */
 				sample_id_all_avail = false;
-				if (!sample_time && !raw_samples && !time_needed)
+				if (!sample_time && !raw_samples &&
+						!time_needed && !want_tod)
 					attr->sample_type &= ~PERF_SAMPLE_TIME;
 
 				goto retry_sample_id;
@@ -629,6 +718,11 @@ static int __cmd_record(int argc, const char **argv)
 		}
 	}
 
+	if (want_tod &&
+	    (perf_event__synthesize_reftime(process_synthesized_event) != 0))
+		error("Failed to create reftime event. "
+		      "Cannot generate wall-clock timestamps\n");
+
 	machine = perf_session__find_host_machine(session);
 	if (!machine) {
 		pr_err("Couldn't find native kernel information.\n");
@@ -787,6 +881,8 @@ const struct option record_options[] = {
 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
 		     "monitor event in cgroup name only",
 		     parse_cgroups),
+	OPT_BOOLEAN(0, "tod", &want_tod,
+		    "Collect data for time-of-day strings"),
 	OPT_END()
 };
 
@@ -842,6 +938,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
 		goto out_symbol_exit;
 	}
 
+	if (want_tod && (add_timeofday_events() != 0))
+		error("Failed to add timekeeping events to event list.\n"
+		      "Cannot generate wall-clock timestamps\n");
+
 	if (target_pid != -1)
 		target_tid = target_pid;
 
-- 
1.7.5.2

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 5/6] perf script: pass trace event to print_trace_event
  2011-06-07 23:53 [PATCH 0/6] Add time-of-day option to perf events David Ahern
                   ` (3 preceding siblings ...)
  2011-06-07 23:56 ` [PATCH 4/6] perf record: add time-of-day option David Ahern
@ 2011-06-07 23:56 ` David Ahern
  2011-06-07 23:56 ` [PATCH 6/6] perf script: add time-of-day option for displaying events David Ahern
  2011-06-14 18:42 ` [PATCH 0/6] Add time-of-day option to perf events David Ahern
  6 siblings, 0 replies; 26+ messages in thread
From: David Ahern @ 2011-06-07 23:56 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel
  Cc: acme, mingo, peterz, fweisbec, paulus, tglx, David Ahern

Next patch needs parsed trace event in perf-script. No since
extracting it twice.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/perf/builtin-script.c         |   16 ++++++++++++++--
 tools/perf/util/trace-event-parse.c |   12 +-----------
 tools/perf/util/trace-event.h       |    2 +-
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 3056b45..c61ae26 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -362,14 +362,26 @@ static void process_event(union perf_event *event __unused,
 			  struct thread *thread)
 {
 	struct perf_event_attr *attr = &evsel->attr;
+	struct event *tr_event = NULL;
 
 	if (output[attr->type].fields == 0)
 		return;
 
+	if (attr->type == PERF_TYPE_TRACEPOINT) {
+		int type;
+
+		type = trace_parse_common_type(sample->raw_data);
+		tr_event = trace_find_event(type);
+		if (!tr_event) {
+			warning("ug! no event found for type %d", type);
+			return;
+		}
+	}
+
 	print_sample_start(sample, thread, attr);
 
-	if (PRINT_FIELD(TRACE))
-		print_trace_event(sample->cpu, sample->raw_data,
+	if ((attr->type == PERF_TYPE_TRACEPOINT) && PRINT_FIELD(TRACE))
+		print_trace_event(tr_event, sample->cpu, sample->raw_data,
 				  sample->raw_size);
 
 	if (PRINT_FIELD(ADDR))
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 1e88485..1f4dccd 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -2912,20 +2912,10 @@ pretty_print_func_graph(void *data, int size, struct event *event,
 	printf("\n");
 }
 
-void print_trace_event(int cpu, void *data, int size)
+void print_trace_event(struct event *event, int cpu, void *data, int size)
 {
-	struct event *event;
-	int type;
 	int pid;
 
-	type = trace_parse_common_type(data);
-
-	event = trace_find_event(type);
-	if (!event) {
-		warning("ug! no event found for type %d", type);
-		return;
-	}
-
 	pid = trace_parse_common_pid(data);
 
 	if (event->flags & (EVENT_FL_ISFUNCENT | EVENT_FL_ISFUNCRET))
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index f674dda..34ec9f3 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -177,7 +177,7 @@ void print_printk(void);
 
 int parse_ftrace_file(char *buf, unsigned long size);
 int parse_event_file(char *buf, unsigned long size, char *sys);
-void print_trace_event(int cpu, void *data, int size);
+void print_trace_event(struct event *event, int cpu, void *data, int size);
 
 extern int file_bigendian;
 extern int host_bigendian;
-- 
1.7.5.2

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 6/6] perf script: add time-of-day option for displaying events
  2011-06-07 23:53 [PATCH 0/6] Add time-of-day option to perf events David Ahern
                   ` (4 preceding siblings ...)
  2011-06-07 23:56 ` [PATCH 5/6] perf script: pass trace event to print_trace_event David Ahern
@ 2011-06-07 23:56 ` David Ahern
  2011-06-14 18:42 ` [PATCH 0/6] Add time-of-day option to perf events David Ahern
  6 siblings, 0 replies; 26+ messages in thread
From: David Ahern @ 2011-06-07 23:56 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel
  Cc: acme, mingo, peterz, fweisbec, paulus, tglx, David Ahern

Default format is %H:%M:%S and the format can be changed via the --tod argument.
Timekeeping tracepoints are parsed to update the correlation between
perf_clock and time-of-day.

e.g.,
perf script -i perf-cs-tod.data  -f 'tod,comm,tid,cpu,event,ip'

14:06:24.860431         rcuc3    21 [003] context-switches:  ffffffff81040b13
14:06:24.860448   kworker/3:1    83 [003] context-switches:  ffffffff81040b13
14:06:24.860606       swapper     0 [011] context-switches:  ffffffff81040b13
14:06:24.860610  ksoftirqd/11    54 [011] context-switches:  ffffffff81040b13

And capturing a time change:

14:06:24.860606       swapper     0 [011] context-switches:  ffffffff81040b13
14:06:24.860610  ksoftirqd/11    54 [011] context-switches:  ffffffff81040b13
13:50:00.000000          date  5412 [011] settimeofday:  ffffffff81071891
13:50:00.000127       swapper     0 [003] context-switches:  ffffffff81040b13
13:50:00.000137         rcuc3    21 [003] context-switches:  ffffffff81040b13

And it's reset by ntpd:
14:09:26.750894   kworker/1:1    81 [001] context-switches:  ffffffff81040b13
14:09:26.758159       swapper     0 [000] context-switches:  ffffffff81040b13
14:25:54.391283          ntpd  5465 [000] settimeofday:  ffffffff81071891
14:25:54.391296       swapper     0 [008] context-switches:  ffffffff81040b13
14:25:54.391363      rsyslogd  1642 [008] context-switches:  ffffffff81040b13

Another example with user formatted time:

perf script -i perf-cs-tod.data  --tod "%D-%T" -v -f 'tod,comm,tid,event,ip'

06/07/11-14:06:24.860606       swapper     0 context-switches:  ffffffff81040b13
06/07/11-14:06:24.860610  ksoftirqd/11    54 context-switches:  ffffffff81040b13
06/07/11-13:50:00.000000          date  5412 settimeofday:  ffffffff81071891
06/07/11-13:50:00.000127       swapper     0 context-switches:  ffffffff81040b13
06/07/11-13:50:00.000137         rcuc3    21 context-switches:  ffffffff81040b13

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/perf/Documentation/perf-script.txt |    8 ++-
 tools/perf/builtin-script.c              |  141 ++++++++++++++++++++++++++++++
 2 files changed, 148 insertions(+), 1 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index c6068cb..bd30279 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,7 +115,7 @@ OPTIONS
 -f::
 --fields::
         Comma separated list of fields to print. Options are:
-        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr.
+        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, tod.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace
@@ -182,6 +182,12 @@ OPTIONS
 --hide-call-graph::
         When printing symbols do not display call chain.
 
+--tod=<format>::
+        Add time-of-day to output using specified format string. Format
+        string is passed to strftime, so any format recognized by it
+        can be used (see man strftime). Default format is "%H:%M:%S".
+        Microseocnds are appended to the time string.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c61ae26..94dc7b0 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -21,6 +21,8 @@ static u64			last_timestamp;
 static u64			nr_unordered;
 extern const struct option	record_options[];
 static bool			no_callchain;
+static char 			default_tod_fmt[] = "%H:%M:%S";
+static char 			*tod_fmt = default_tod_fmt;
 
 enum perf_output_field {
 	PERF_OUTPUT_COMM            = 1U << 0,
@@ -34,6 +36,7 @@ enum perf_output_field {
 	PERF_OUTPUT_SYM             = 1U << 8,
 	PERF_OUTPUT_DSO             = 1U << 9,
 	PERF_OUTPUT_ADDR            = 1U << 10,
+	PERF_OUTPUT_TIMEOFDAY       = 1U << 11,
 };
 
 struct output_option {
@@ -51,6 +54,7 @@ struct output_option {
 	{.str = "sym",   .field = PERF_OUTPUT_SYM},
 	{.str = "dso",   .field = PERF_OUTPUT_DSO},
 	{.str = "addr",  .field = PERF_OUTPUT_ADDR},
+	{.str = "tod",   .field = PERF_OUTPUT_TIMEOFDAY},
 };
 
 /* default set to maintain compatibility with current format */
@@ -209,6 +213,13 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 				       PERF_OUTPUT_CPU))
 		return -EINVAL;
 
+	if (PRINT_FIELD(TIMEOFDAY) &&
+		perf_event_attr__check_stype(attr, PERF_SAMPLE_TIME, "TIME",
+				       PERF_OUTPUT_TIME)) {
+		pr_err("Samples do not contain timestamps.\n");
+		pr_err("Was --tod used with perf-record?\n");
+	}
+
 	return 0;
 }
 
@@ -243,6 +254,80 @@ static int perf_session__check_output_opt(struct perf_session *session)
 	return 0;
 }
 
+static struct timeval tv_ref;
+static u64 timestamp_ref;
+static const char *timeofday_str(u64 timestamp)
+{
+	struct tm ltime;
+	u64 dt;
+	struct timeval tv_dt, tv_res;
+	static char tstr[64];
+
+	if (timestamp_ref == 0) {
+		static bool show_msg = true;
+		if (show_msg) {
+			warning("Reference clock event not seen; "
+				"Cannot generate time-of-day strings until then.\n"
+				"Was --tod used with perf-record?\n");
+			show_msg = false;
+		}
+		snprintf(tstr, sizeof(tstr), "%16" PRId64, timestamp);
+		return tstr;
+	} else if (timestamp == 0) {
+		snprintf(tstr, sizeof(tstr), "%16" PRId64, timestamp);
+		return tstr;
+	}
+	if (timestamp > timestamp_ref) {
+		dt = timestamp - timestamp_ref;
+		tv_dt.tv_sec = (time_t) (dt / NSEC_PER_SEC);
+		tv_dt.tv_usec = (dt - tv_dt.tv_sec * NSEC_PER_SEC) / 1000;
+		timeradd(&tv_ref, &tv_dt, &tv_res);
+	} else {
+		dt = timestamp_ref - timestamp;
+		tv_dt.tv_sec = (time_t) (dt / NSEC_PER_SEC);
+		tv_dt.tv_usec = (dt - tv_dt.tv_sec * NSEC_PER_SEC) / 1000;
+		timersub(&tv_ref, &tv_dt, &tv_res);
+	}
+
+	if (localtime_r(&tv_res.tv_sec, &ltime) == NULL) {
+		snprintf(tstr, sizeof(tstr), "%15" PRId64, timestamp);
+	} else {
+		char date[64];
+		strftime(date, sizeof(date), tod_fmt, &ltime);
+		snprintf(tstr, sizeof(tstr), "%s.%06ld", date, tv_res.tv_usec);
+	}
+
+	return tstr;
+}
+
+static void process_timeofday_event(struct event *event,
+				    struct perf_sample *sample)
+{
+	unsigned long val;
+
+	if (strcmp(event->name, "settimeofday") &&
+	    strcmp(event->name, "timekeeping_inject_offset") &&
+		strcmp(event->name, "timekeeping_inject_sleeptime"))
+		return;
+
+	val = raw_field_value(event, "tv_sec", sample->raw_data);
+	tv_ref.tv_sec = val;
+
+	val = raw_field_value(event, "tv_nsec", sample->raw_data);
+	tv_ref.tv_usec = val / NSECS_PER_USEC;
+
+	timestamp_ref = sample->time;
+}
+
+static int perf_event__process_reftime(union perf_event *event,
+				       struct perf_session *session __used)
+{
+	tv_ref = event->reftime.tv;
+	timestamp_ref = event->reftime.nsec;
+
+	return 0;
+}
+
 static void print_sample_start(struct perf_sample *sample,
 			       struct thread *thread,
 			       struct perf_event_attr *attr)
@@ -254,6 +339,9 @@ static void print_sample_start(struct perf_sample *sample,
 	unsigned long usecs;
 	unsigned long long nsecs;
 
+	if (PRINT_FIELD(TIMEOFDAY))
+		printf("%s ", timeofday_str(sample->time));
+
 	if (PRINT_FIELD(COMM)) {
 		if (latency_format)
 			printf("%8.8s ", thread->comm);
@@ -376,6 +464,8 @@ static void process_event(union perf_event *event __unused,
 			warning("ug! no event found for type %d", type);
 			return;
 		}
+
+		process_timeofday_event(tr_event, sample);
 	}
 
 	print_sample_start(sample, thread, attr);
@@ -481,6 +571,7 @@ static struct perf_event_ops event_ops = {
 	.event_type	 = perf_event__process_event_type,
 	.tracing_data	 = perf_event__process_tracing_data,
 	.build_id	 = perf_event__process_build_id,
+	.reftime     = perf_event__process_reftime,
 	.ordered_samples = true,
 	.ordering_requires_timestamps = true,
 };
@@ -764,6 +855,49 @@ out:
 	return rc;
 }
 
+static int parse_tod_format(const struct option *opt __used,
+			    const char *arg, int unset __used)
+{
+	int i;
+	char date[128];
+	size_t rc;
+	struct tm ltime;
+
+	if (strlen(arg) == 0) {
+		pr_debug("Time-of-day strings will be suppressed\n");
+		goto out;
+	}
+
+	/* test input string for validity and length of output */
+	localtime_r(0, &ltime);
+	rc = strftime(date, sizeof(date), arg, &ltime);
+	if (rc == 0) {
+		fprintf(stderr, "Invalid format string for time-of-day\n");
+		return -EINVAL;
+	}
+
+out:
+	for (i = 0; i < PERF_TYPE_MAX; ++i) {
+		if (output[i].fields == 0)
+			continue;
+		if (strlen(arg))
+			output[i].fields |= PERF_OUTPUT_TIMEOFDAY;
+		else
+			output[i].fields &= ~PERF_OUTPUT_TIMEOFDAY;
+	}
+
+	if (tod_fmt != default_tod_fmt)
+		free(tod_fmt);
+
+	tod_fmt = strdup(arg);
+	if (!tod_fmt) {
+		fprintf(stderr, "Failed to copy time-of-day format string\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 /* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
 static int is_directory(const char *base_path, const struct dirent *dent)
 {
@@ -1087,6 +1221,9 @@ static const struct option options[] = {
 	OPT_CALLBACK('f', "fields", NULL, "str",
 		     "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
 		     parse_output_fields),
+	OPT_CALLBACK(0, "tod", NULL, "str",
+		     "Format for time-of-day strings. Option is passed to strftime; microseconds are appended. Default is %H:%M:%S.",
+		     parse_tod_format),
 
 	OPT_END()
 };
@@ -1325,6 +1462,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __used)
 
 	perf_session__delete(session);
 	cleanup_scripting();
+
+	if (tod_fmt != default_tod_fmt)
+		free(tod_fmt);
+
 out:
 	return err;
 }
-- 
1.7.5.2

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 0/6] Add time-of-day option to perf events
  2011-06-07 23:53 [PATCH 0/6] Add time-of-day option to perf events David Ahern
                   ` (5 preceding siblings ...)
  2011-06-07 23:56 ` [PATCH 6/6] perf script: add time-of-day option for displaying events David Ahern
@ 2011-06-14 18:42 ` David Ahern
  6 siblings, 0 replies; 26+ messages in thread
From: David Ahern @ 2011-06-14 18:42 UTC (permalink / raw)
  To: linux-perf-users, linux-kernel, mingo, peterz, fweisbec, tglx
  Cc: acme, paulus

ping?

On 06/07/2011 05:53 PM, David Ahern wrote:
> Another attempt at printing time-of-day strings on perf events - this
> time with tracepoints in the timekeeping code to catch updates to
> xtime.
> 
> David Ahern (6):
>   trace: add tracepoints to timekeeping code - xtime changes
>   perf utils: export parse_single_tracepoint_event
>   perf: add reference time event
>   perf record: add time-of-day option
>   perf script: pass trace event to print_trace_event
>   perf script: add time-of-day option for displaying events
> 
>  include/trace/events/timekeeping.h       |   43 ++++++++
>  kernel/time/timekeeping.c                |    8 ++
>  tools/perf/Documentation/perf-record.txt |    3 +
>  tools/perf/Documentation/perf-script.txt |    8 ++-
>  tools/perf/builtin-record.c              |  102 +++++++++++++++++++-
>  tools/perf/builtin-script.c              |  157 +++++++++++++++++++++++++++++-
>  tools/perf/util/event.c                  |    1 +
>  tools/perf/util/event.h                  |    8 ++
>  tools/perf/util/parse-events.c           |   13 +--
>  tools/perf/util/parse-events.h           |   12 +++
>  tools/perf/util/session.c                |    4 +
>  tools/perf/util/session.h                |    3 +-
>  tools/perf/util/trace-event-parse.c      |   12 +--
>  tools/perf/util/trace-event.h            |    2 +-
>  14 files changed, 350 insertions(+), 26 deletions(-)
>  create mode 100644 include/trace/events/timekeeping.h
> 

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes
  2011-06-07 23:55 ` [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes David Ahern
@ 2011-06-17 13:23   ` Frederic Weisbecker
  2011-06-17 14:13     ` David Ahern
  0 siblings, 1 reply; 26+ messages in thread
From: Frederic Weisbecker @ 2011-06-17 13:23 UTC (permalink / raw)
  To: David Ahern
  Cc: linux-perf-users, linux-kernel, acme, mingo, peterz, paulus, tglx,
	johnstul, rostedt

On Tue, Jun 07, 2011 at 05:55:03PM -0600, David Ahern wrote:
> Trace points in timekeeping.c where xtime is modified by a user
> or ntp.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  include/trace/events/timekeeping.h |   43 ++++++++++++++++++++++++++++++++++++
>  kernel/time/timekeeping.c          |    8 ++++++
>  2 files changed, 51 insertions(+), 0 deletions(-)
>  create mode 100644 include/trace/events/timekeeping.h
> 
> diff --git a/include/trace/events/timekeeping.h b/include/trace/events/timekeeping.h
> new file mode 100644
> index 0000000..3d5d083
> --- /dev/null
> +++ b/include/trace/events/timekeeping.h
> @@ -0,0 +1,43 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM timekeeping
> +
> +#if !defined(_TRACE_TIMEKEEP_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_TIMEKEEP_H
> +
> +#include <linux/tracepoint.h>
> +#include <linux/time.h>
> +
> +DECLARE_EVENT_CLASS(tod_template,
> +
> +	TP_PROTO(const struct timespec *tv),
> +
> +	TP_ARGS(tv),
> +
> +	TP_STRUCT__entry(
> +		__field( __kernel_time_t,	tv_sec)
> +		__field( long,			tv_nsec)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->tv_sec  = tv->tv_sec;
> +		__entry->tv_nsec = tv->tv_nsec;
> +	),
> +
> +	TP_printk("tv_sec=%ld tv_nsec=%ld", __entry->tv_sec, __entry->tv_nsec)
> +);
> +
> +DEFINE_EVENT(tod_template, settimeofday,
> +	TP_PROTO(const struct timespec *tv),
> +	TP_ARGS(tv));
> +
> +DEFINE_EVENT(tod_template, timekeeping_inject_offset,
> +	TP_PROTO(const struct timespec *tv),
> +	TP_ARGS(tv));
> +
> +DEFINE_EVENT(tod_template, timekeeping_inject_sleeptime,
> +	TP_PROTO(const struct timespec *tv),
> +	TP_ARGS(tv));

Does the fact it's any of the three way of updating xtime make any
difference from the user point of view?

If not can we rather factorize that in a single settimeofday tracepoint?
Or update_time_of_day if we don't want to confuse the user with the
syscall.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
  2011-06-07 23:55 ` [PATCH 3/6] perf: add reference time event David Ahern
@ 2011-06-17 13:32   ` Frederic Weisbecker
  2011-06-17 14:04     ` David Ahern
  0 siblings, 1 reply; 26+ messages in thread
From: Frederic Weisbecker @ 2011-06-17 13:32 UTC (permalink / raw)
  To: David Ahern
  Cc: linux-perf-users, linux-kernel, acme, mingo, peterz, paulus, tglx

On Tue, Jun 07, 2011 at 05:55:46PM -0600, David Ahern wrote:
> For initial perf_clock to time-of-day correlation.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  tools/perf/util/event.c   |    1 +
>  tools/perf/util/event.h   |    8 ++++++++
>  tools/perf/util/session.c |    4 ++++
>  tools/perf/util/session.h |    3 ++-
>  4 files changed, 15 insertions(+), 1 deletions(-)
> 
> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> index 3c1b8a6..1a89a04 100644
> --- a/tools/perf/util/event.c
> +++ b/tools/perf/util/event.c
> @@ -24,6 +24,7 @@ static const char *perf_event__names[] = {
>  	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
>  	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
>  	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
> +	[PERF_RECORD_REFTIME]			= "REF_TIME",
>  };
>  
>  const char *perf_event__name(unsigned int id)
> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
> index 1d7f664..f481f90 100644
> --- a/tools/perf/util/event.h
> +++ b/tools/perf/util/event.h
> @@ -98,6 +98,7 @@ enum perf_user_event_type { /* above any possible kernel type */
>  	PERF_RECORD_HEADER_TRACING_DATA		= 66,
>  	PERF_RECORD_HEADER_BUILD_ID		= 67,
>  	PERF_RECORD_FINISHED_ROUND		= 68,
> +	PERF_RECORD_REFTIME			= 69,

We would like to avoid adding more custom events like these. They were very convenient
but they steal the kernel event type space. They are deemed for removal in the long term.

Another idea to achieve what you want would be to create a new perf event header feature,
like HEADER_TRACE_INFO or HEADER_BUILD_ID are. Then use that to create a space in the perf
file to save that couple of clocks initial values.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
  2011-06-17 13:32   ` Frederic Weisbecker
@ 2011-06-17 14:04     ` David Ahern
  2011-06-17 14:17       ` Frederic Weisbecker
  0 siblings, 1 reply; 26+ messages in thread
From: David Ahern @ 2011-06-17 14:04 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: linux-perf-users, linux-kernel, acme, mingo, peterz, paulus, tglx



On 06/17/2011 07:32 AM, Frederic Weisbecker wrote:
> On Tue, Jun 07, 2011 at 05:55:46PM -0600, David Ahern wrote:
>> For initial perf_clock to time-of-day correlation.
>>
>> Signed-off-by: David Ahern <dsahern@gmail.com>
>> ---
>>  tools/perf/util/event.c   |    1 +
>>  tools/perf/util/event.h   |    8 ++++++++
>>  tools/perf/util/session.c |    4 ++++
>>  tools/perf/util/session.h |    3 ++-
>>  4 files changed, 15 insertions(+), 1 deletions(-)
>>
>> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
>> index 3c1b8a6..1a89a04 100644
>> --- a/tools/perf/util/event.c
>> +++ b/tools/perf/util/event.c
>> @@ -24,6 +24,7 @@ static const char *perf_event__names[] = {
>>  	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
>>  	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
>>  	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
>> +	[PERF_RECORD_REFTIME]			= "REF_TIME",
>>  };
>>  
>>  const char *perf_event__name(unsigned int id)
>> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
>> index 1d7f664..f481f90 100644
>> --- a/tools/perf/util/event.h
>> +++ b/tools/perf/util/event.h
>> @@ -98,6 +98,7 @@ enum perf_user_event_type { /* above any possible kernel type */
>>  	PERF_RECORD_HEADER_TRACING_DATA		= 66,
>>  	PERF_RECORD_HEADER_BUILD_ID		= 67,
>>  	PERF_RECORD_FINISHED_ROUND		= 68,
>> +	PERF_RECORD_REFTIME			= 69,
> 
> We would like to avoid adding more custom events like these. They were very convenient
> but they steal the kernel event type space. They are deemed for removal in the long term.
> 
> Another idea to achieve what you want would be to create a new perf event header feature,
> like HEADER_TRACE_INFO or HEADER_BUILD_ID are. Then use that to create a space in the perf
> file to save that couple of clocks initial values.

you mean like this:
https://lkml.org/lkml/2010/12/7/813

David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes
  2011-06-17 13:23   ` Frederic Weisbecker
@ 2011-06-17 14:13     ` David Ahern
  2011-06-17 14:19       ` Frederic Weisbecker
  2011-08-15  4:03       ` David Ahern
  0 siblings, 2 replies; 26+ messages in thread
From: David Ahern @ 2011-06-17 14:13 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: linux-perf-users, linux-kernel, acme, mingo, peterz, paulus, tglx,
	johnstul, rostedt



On 06/17/2011 07:23 AM, Frederic Weisbecker wrote:
> On Tue, Jun 07, 2011 at 05:55:03PM -0600, David Ahern wrote:
>> Trace points in timekeeping.c where xtime is modified by a user
>> or ntp.
>>
>> Signed-off-by: David Ahern <dsahern@gmail.com>
>> ---
>>  include/trace/events/timekeeping.h |   43 ++++++++++++++++++++++++++++++++++++
>>  kernel/time/timekeeping.c          |    8 ++++++
>>  2 files changed, 51 insertions(+), 0 deletions(-)
>>  create mode 100644 include/trace/events/timekeeping.h
>>
>> diff --git a/include/trace/events/timekeeping.h b/include/trace/events/timekeeping.h
>> new file mode 100644
>> index 0000000..3d5d083
>> --- /dev/null
>> +++ b/include/trace/events/timekeeping.h
>> @@ -0,0 +1,43 @@
>> +#undef TRACE_SYSTEM
>> +#define TRACE_SYSTEM timekeeping
>> +
>> +#if !defined(_TRACE_TIMEKEEP_H) || defined(TRACE_HEADER_MULTI_READ)
>> +#define _TRACE_TIMEKEEP_H
>> +
>> +#include <linux/tracepoint.h>
>> +#include <linux/time.h>
>> +
>> +DECLARE_EVENT_CLASS(tod_template,
>> +
>> +	TP_PROTO(const struct timespec *tv),
>> +
>> +	TP_ARGS(tv),
>> +
>> +	TP_STRUCT__entry(
>> +		__field( __kernel_time_t,	tv_sec)
>> +		__field( long,			tv_nsec)
>> +	),
>> +
>> +	TP_fast_assign(
>> +		__entry->tv_sec  = tv->tv_sec;
>> +		__entry->tv_nsec = tv->tv_nsec;
>> +	),
>> +
>> +	TP_printk("tv_sec=%ld tv_nsec=%ld", __entry->tv_sec, __entry->tv_nsec)
>> +);
>> +
>> +DEFINE_EVENT(tod_template, settimeofday,
>> +	TP_PROTO(const struct timespec *tv),
>> +	TP_ARGS(tv));
>> +
>> +DEFINE_EVENT(tod_template, timekeeping_inject_offset,
>> +	TP_PROTO(const struct timespec *tv),
>> +	TP_ARGS(tv));
>> +
>> +DEFINE_EVENT(tod_template, timekeeping_inject_sleeptime,
>> +	TP_PROTO(const struct timespec *tv),
>> +	TP_ARGS(tv));
> 
> Does the fact it's any of the three way of updating xtime make any
> difference from the user point of view?

This use case only cares that xtime is updated.

> 
> If not can we rather factorize that in a single settimeofday tracepoint?
> Or update_time_of_day if we don't want to confuse the user with the
> syscall.

Peter and Thomas expressed interest in timekeeping tracepoints. How the
update happens might be wanted. If desired I can consolidate xtime = *tv
into a single update function and put the trace point there.

David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/6] perf record: add time-of-day option
  2011-06-07 23:56 ` [PATCH 4/6] perf record: add time-of-day option David Ahern
@ 2011-06-17 14:14   ` Frederic Weisbecker
  2011-06-17 14:23     ` David Ahern
  0 siblings, 1 reply; 26+ messages in thread
From: Frederic Weisbecker @ 2011-06-17 14:14 UTC (permalink / raw)
  To: David Ahern
  Cc: linux-perf-users, linux-kernel, acme, mingo, peterz, paulus, tglx

On Tue, Jun 07, 2011 at 05:56:04PM -0600, David Ahern wrote:
> Use reftime event for initial correlation of perf_clock to
> time-of-day. Add timekeeping trace events to event list to
> capture jumps in time-of-day.
> 
> Signed-off-by: David Ahern <dsahern@gmail.com>
> ---
>  tools/perf/Documentation/perf-record.txt |    3 +
>  tools/perf/builtin-record.c              |  102 +++++++++++++++++++++++++++++-
>  2 files changed, 104 insertions(+), 1 deletions(-)
> 
> diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
> index 5a520f8..e4c87ba 100644
> --- a/tools/perf/Documentation/perf-record.txt
> +++ b/tools/perf/Documentation/perf-record.txt
> @@ -148,6 +148,9 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
>  corresponding events, i.e., they always refer to events defined earlier on the command
>  line.
>  
> +--tod::
> +Collect data for displaying time-of-day strings when printing events.
> +
>  SEE ALSO
>  --------
>  linkperf:perf-stat[1], linkperf:perf-list[1]
> diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
> index 8e2c857..4f8d5f2 100644
> --- a/tools/perf/builtin-record.c
> +++ b/tools/perf/builtin-record.c
> @@ -32,6 +32,9 @@
>  
>  #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
>  
> +#define TRACE_TOD_SUBSYSTEM      "timekeeping"
> +#define TRACE_TOD_SUBSYSTEM_LEN  11
> +
>  enum write_mode_t {
>  	WRITE_FORCE,
>  	WRITE_APPEND
> @@ -65,6 +68,8 @@ static bool			sample_address			=  false;
>  static bool			sample_time			=  false;
>  static bool			no_buildid			=  false;
>  static bool			no_buildid_cache		=  false;
> +static bool			want_tod			=  false;
> +static u64 			tod_sample_type;
>  static struct perf_evlist	*evsel_list;
>  
>  static long			samples				=      0;
> @@ -215,6 +220,9 @@ static void config_attr(struct perf_evsel *evsel, struct perf_evlist *evlist)
>  		attr->sample_type	|= PERF_SAMPLE_CPU;
>  	}
>  
> +	if (want_tod)
> +		attr->sample_type	|= tod_sample_type;
> +
>  	if (nodelay) {
>  		attr->watermark = 0;
>  		attr->wakeup_events = 1;
> @@ -248,6 +256,86 @@ static bool perf_evlist__equal(struct perf_evlist *evlist,
>  	return true;
>  }
>  
> +static int perf_event__synthesize_reftime(perf_event__handler_t process)
> +{
> +	union perf_event ev;
> +	struct timespec tp;
> +
> +	memset(&ev, 0, sizeof(ev));
> +
> +	if (gettimeofday(&ev.reftime.tv, NULL) != 0) {
> +		error("gettimeofday failed.\n");
> +		return -1;
> +	}
> +	if (clock_gettime(CLOCK_MONOTONIC, &tp) != 0) {
> +		error("clock_gettime(CLOCK_MONOTONIC) failed.\n");
> +		return -1;
> +	}
> +	ev.reftime.nsec = (u64) tp.tv_sec * NSEC_PER_SEC + (u64) tp.tv_nsec;
> +
> +	ev.header.type = PERF_RECORD_REFTIME;
> +	ev.header.size = sizeof(ev.reftime);
> +
> +	return process(&ev, NULL, session);
> +}
> +
> +static int add_timeofday_events(void)
> +{
> +	int rc, i, len;
> +	struct perf_event_attr attr;
> +	struct perf_evsel *evsel;
> +
> +	/* events that modify xtime */
> +	const char *tod_events[] = {"settimeofday",
> +				    "timekeeping_inject_offset",
> +				    "timekeeping_inject_sleeptime",
> +				    NULL};
> +
> +	i = 0;
> +	rc = -1;
> +	while (tod_events[i]) {
> +		memset(&attr, 0, sizeof(attr));
> +		len = strlen(tod_events[i]);
> +
> +		if (parse_single_tracepoint_event(TRACE_TOD_SUBSYSTEM,
> +			tod_events[i], len, &attr, NULL) == EVT_FAILED) {
> +			error("Failed to parse event %s\n", tod_events[i]);
> +			goto out;
> +		}
> +
> +		evsel = perf_evsel__new(&attr, evsel_list->nr_entries);
> +		if (evsel == NULL)
> +			return -1;
> +
> +		perf_evlist__add(evsel_list, evsel);
> +
> +		/* +2 for ':' delimiter and string terminator */
> +		evsel->name = calloc(TRACE_TOD_SUBSYSTEM_LEN + len + 2, 1);
> +		if (!evsel->name)
> +			return -1;
> +
> +		sprintf(evsel->name, "timekeeping:%s", tod_events[i]);
> +
> +		tod_sample_type |= attr.sample_type;
> +
> +		++i;
> +	}
> +
> +	/*
> +	 * right now sample_type for all samples needs to be the same.
> +	 * tracepoints are collected at sample period 1 and hence do not
> +	 * request the period with the sample. However, default for record
> +	 * is cycles at a frequency. So, until this sample_type mess is
> +	 * fixed....
> +	 */
> +	if (freq)
> +		tod_sample_type |= PERF_SAMPLE_PERIOD;
> +
> +	rc = 0;
> +out:
> +	return rc;
> +}

So I feel uncomfortable with this tod_sample_type hack. I think we can't really continue
with this fixed sample_type per session given the kind of hacks that involves.

One thing we could do is to split session->sample_type into an array with one sample
type per event type (hardware, breakpoint, software, tracepoint).

And then each builtin tool can provide their constraints on top of these values:

- builtin-report wants sample_type[HARDWARE] == sample_type[SOFTWARE] == sample_type[TRACEPOINT] == sample_type[BREAKPOINT]
  although that may be tunable by the time but we can start with that.
- builtin-script has no specific constraints, except that sample_type[i] meets what the user passed as a parameter
- etc..

Constraints can probably default to sample_type[i] == sample_type[i+1] to mimic the current behaviour. Then tools
can override that.

What do you think?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
  2011-06-17 14:04     ` David Ahern
@ 2011-06-17 14:17       ` Frederic Weisbecker
  2011-06-17 14:28         ` David Ahern
  2011-07-11  4:20         ` David Ahern
  0 siblings, 2 replies; 26+ messages in thread
From: Frederic Weisbecker @ 2011-06-17 14:17 UTC (permalink / raw)
  To: David Ahern
  Cc: linux-perf-users, linux-kernel, acme, mingo, peterz, paulus, tglx

On Fri, Jun 17, 2011 at 08:04:59AM -0600, David Ahern wrote:
> 
> 
> On 06/17/2011 07:32 AM, Frederic Weisbecker wrote:
> > On Tue, Jun 07, 2011 at 05:55:46PM -0600, David Ahern wrote:
> >> For initial perf_clock to time-of-day correlation.
> >>
> >> Signed-off-by: David Ahern <dsahern@gmail.com>
> >> ---
> >>  tools/perf/util/event.c   |    1 +
> >>  tools/perf/util/event.h   |    8 ++++++++
> >>  tools/perf/util/session.c |    4 ++++
> >>  tools/perf/util/session.h |    3 ++-
> >>  4 files changed, 15 insertions(+), 1 deletions(-)
> >>
> >> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> >> index 3c1b8a6..1a89a04 100644
> >> --- a/tools/perf/util/event.c
> >> +++ b/tools/perf/util/event.c
> >> @@ -24,6 +24,7 @@ static const char *perf_event__names[] = {
> >>  	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
> >>  	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
> >>  	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
> >> +	[PERF_RECORD_REFTIME]			= "REF_TIME",
> >>  };
> >>  
> >>  const char *perf_event__name(unsigned int id)
> >> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
> >> index 1d7f664..f481f90 100644
> >> --- a/tools/perf/util/event.h
> >> +++ b/tools/perf/util/event.h
> >> @@ -98,6 +98,7 @@ enum perf_user_event_type { /* above any possible kernel type */
> >>  	PERF_RECORD_HEADER_TRACING_DATA		= 66,
> >>  	PERF_RECORD_HEADER_BUILD_ID		= 67,
> >>  	PERF_RECORD_FINISHED_ROUND		= 68,
> >> +	PERF_RECORD_REFTIME			= 69,
> > 
> > We would like to avoid adding more custom events like these. They were very convenient
> > but they steal the kernel event type space. They are deemed for removal in the long term.
> > 
> > Another idea to achieve what you want would be to create a new perf event header feature,
> > like HEADER_TRACE_INFO or HEADER_BUILD_ID are. Then use that to create a space in the perf
> > file to save that couple of clocks initial values.
> 
> you mean like this:
> https://lkml.org/lkml/2010/12/7/813
> 
> David

Exactly, why did you change?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes
  2011-06-17 14:13     ` David Ahern
@ 2011-06-17 14:19       ` Frederic Weisbecker
  2011-08-15  4:03       ` David Ahern
  1 sibling, 0 replies; 26+ messages in thread
From: Frederic Weisbecker @ 2011-06-17 14:19 UTC (permalink / raw)
  To: David Ahern, Thomas Gleixner, Peter Zijlstra
  Cc: linux-perf-users, linux-kernel, acme, mingo, paulus, johnstul,
	rostedt

On Fri, Jun 17, 2011 at 08:13:19AM -0600, David Ahern wrote:
> 
> 
> On 06/17/2011 07:23 AM, Frederic Weisbecker wrote:
> > On Tue, Jun 07, 2011 at 05:55:03PM -0600, David Ahern wrote:
> >> Trace points in timekeeping.c where xtime is modified by a user
> >> or ntp.
> >>
> >> Signed-off-by: David Ahern <dsahern@gmail.com>
> >> ---
> >>  include/trace/events/timekeeping.h |   43 ++++++++++++++++++++++++++++++++++++
> >>  kernel/time/timekeeping.c          |    8 ++++++
> >>  2 files changed, 51 insertions(+), 0 deletions(-)
> >>  create mode 100644 include/trace/events/timekeeping.h
> >>
> >> diff --git a/include/trace/events/timekeeping.h b/include/trace/events/timekeeping.h
> >> new file mode 100644
> >> index 0000000..3d5d083
> >> --- /dev/null
> >> +++ b/include/trace/events/timekeeping.h
> >> @@ -0,0 +1,43 @@
> >> +#undef TRACE_SYSTEM
> >> +#define TRACE_SYSTEM timekeeping
> >> +
> >> +#if !defined(_TRACE_TIMEKEEP_H) || defined(TRACE_HEADER_MULTI_READ)
> >> +#define _TRACE_TIMEKEEP_H
> >> +
> >> +#include <linux/tracepoint.h>
> >> +#include <linux/time.h>
> >> +
> >> +DECLARE_EVENT_CLASS(tod_template,
> >> +
> >> +	TP_PROTO(const struct timespec *tv),
> >> +
> >> +	TP_ARGS(tv),
> >> +
> >> +	TP_STRUCT__entry(
> >> +		__field( __kernel_time_t,	tv_sec)
> >> +		__field( long,			tv_nsec)
> >> +	),
> >> +
> >> +	TP_fast_assign(
> >> +		__entry->tv_sec  = tv->tv_sec;
> >> +		__entry->tv_nsec = tv->tv_nsec;
> >> +	),
> >> +
> >> +	TP_printk("tv_sec=%ld tv_nsec=%ld", __entry->tv_sec, __entry->tv_nsec)
> >> +);
> >> +
> >> +DEFINE_EVENT(tod_template, settimeofday,
> >> +	TP_PROTO(const struct timespec *tv),
> >> +	TP_ARGS(tv));
> >> +
> >> +DEFINE_EVENT(tod_template, timekeeping_inject_offset,
> >> +	TP_PROTO(const struct timespec *tv),
> >> +	TP_ARGS(tv));
> >> +
> >> +DEFINE_EVENT(tod_template, timekeeping_inject_sleeptime,
> >> +	TP_PROTO(const struct timespec *tv),
> >> +	TP_ARGS(tv));
> > 
> > Does the fact it's any of the three way of updating xtime make any
> > difference from the user point of view?
> 
> This use case only cares that xtime is updated.
> 
> > 
> > If not can we rather factorize that in a single settimeofday tracepoint?
> > Or update_time_of_day if we don't want to confuse the user with the
> > syscall.
> 
> Peter and Thomas expressed interest in timekeeping tracepoints. How the
> update happens might be wanted. If desired I can consolidate xtime = *tv
> into a single update function and put the trace point there.

Peter, Thomas, any opinion about that?

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/6] perf record: add time-of-day option
  2011-06-17 14:14   ` Frederic Weisbecker
@ 2011-06-17 14:23     ` David Ahern
  2011-06-17 15:15       ` Frederic Weisbecker
  0 siblings, 1 reply; 26+ messages in thread
From: David Ahern @ 2011-06-17 14:23 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: linux-perf-users, linux-kernel, acme, mingo, peterz, paulus, tglx

On 06/17/2011 08:14 AM, Frederic Weisbecker wrote:
> 
> So I feel uncomfortable with this tod_sample_type hack. I think we can't really continue
> with this fixed sample_type per session given the kind of hacks that involves.
> 
> One thing we could do is to split session->sample_type into an array with one sample
> type per event type (hardware, breakpoint, software, tracepoint).
> 
> And then each builtin tool can provide their constraints on top of these values:
> 
> - builtin-report wants sample_type[HARDWARE] == sample_type[SOFTWARE] == sample_type[TRACEPOINT] == sample_type[BREAKPOINT]
>   although that may be tunable by the time but we can start with that.
> - builtin-script has no specific constraints, except that sample_type[i] meets what the user passed as a parameter
> - etc..
> 
> Constraints can probably default to sample_type[i] == sample_type[i+1] to mimic the current behaviour. Then tools
> can override that.
> 
> What do you think?

I started working on sample_type refactoring right after sending this
patchset (though I got sidetracked). Each evsel in the list has a
perf_attr struct which has a sample_type. Why not use that which allows
events to have their own sample type - versus a type per event type?

I'll see if I can get back to it in the next few days and get a better
idea of the pain involved with the refactoring.

David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
  2011-06-17 14:17       ` Frederic Weisbecker
@ 2011-06-17 14:28         ` David Ahern
  2011-07-11  4:20         ` David Ahern
  1 sibling, 0 replies; 26+ messages in thread
From: David Ahern @ 2011-06-17 14:28 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: linux-perf-users, linux-kernel, acme, mingo, peterz, paulus, tglx

On 06/17/2011 08:17 AM, Frederic Weisbecker wrote:
>>> We would like to avoid adding more custom events like these. They were very convenient
>>> but they steal the kernel event type space. They are deemed for removal in the long term.
>>>
>>> Another idea to achieve what you want would be to create a new perf event header feature,
>>> like HEADER_TRACE_INFO or HEADER_BUILD_ID are. Then use that to create a space in the perf
>>> file to save that couple of clocks initial values.
>>
>> you mean like this:
>> https://lkml.org/lkml/2010/12/7/813
>>
>> David
> 
> Exactly, why did you change?

As I recall suggestions on adding new clock events and using those for
periodic samples. Once we have the initial correlation (the point of
this patch) and updates to xtime (the new tracepoints) periodic events
should not be needed.

I'll go back to this option -- and address comments from that round as well.

David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/6] perf record: add time-of-day option
  2011-06-17 14:23     ` David Ahern
@ 2011-06-17 15:15       ` Frederic Weisbecker
  2011-08-15  4:24         ` David Ahern
  0 siblings, 1 reply; 26+ messages in thread
From: Frederic Weisbecker @ 2011-06-17 15:15 UTC (permalink / raw)
  To: David Ahern
  Cc: linux-perf-users, linux-kernel, acme, mingo, peterz, paulus, tglx

On Fri, Jun 17, 2011 at 08:23:01AM -0600, David Ahern wrote:
> On 06/17/2011 08:14 AM, Frederic Weisbecker wrote:
> > 
> > So I feel uncomfortable with this tod_sample_type hack. I think we can't really continue
> > with this fixed sample_type per session given the kind of hacks that involves.
> > 
> > One thing we could do is to split session->sample_type into an array with one sample
> > type per event type (hardware, breakpoint, software, tracepoint).
> > 
> > And then each builtin tool can provide their constraints on top of these values:
> > 
> > - builtin-report wants sample_type[HARDWARE] == sample_type[SOFTWARE] == sample_type[TRACEPOINT] == sample_type[BREAKPOINT]
> >   although that may be tunable by the time but we can start with that.
> > - builtin-script has no specific constraints, except that sample_type[i] meets what the user passed as a parameter
> > - etc..
> > 
> > Constraints can probably default to sample_type[i] == sample_type[i+1] to mimic the current behaviour. Then tools
> > can override that.
> > 
> > What do you think?
> 
> I started working on sample_type refactoring right after sending this
> patchset (though I got sidetracked). Each evsel in the list has a
> perf_attr struct which has a sample_type. Why not use that which allows
> events to have their own sample type - versus a type per event type?

This can make sense, I can figure out some cases where such granularity can be
useful. Branch recording doesn't care about recording period for example I think.

> 
> I'll see if I can get back to it in the next few days and get a better
> idea of the pain involved with the refactoring.

Thanks a lot :)

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
  2011-06-17 14:17       ` Frederic Weisbecker
  2011-06-17 14:28         ` David Ahern
@ 2011-07-11  4:20         ` David Ahern
  2011-07-12 14:30           ` Frederic Weisbecker
  1 sibling, 1 reply; 26+ messages in thread
From: David Ahern @ 2011-07-11  4:20 UTC (permalink / raw)
  To: Frederic Weisbecker, peterz
  Cc: linux-perf-users, linux-kernel, acme, mingo, paulus, tglx

On 06/17/2011 08:17 AM, Frederic Weisbecker wrote:
> On Fri, Jun 17, 2011 at 08:04:59AM -0600, David Ahern wrote:
>>
>>
>> On 06/17/2011 07:32 AM, Frederic Weisbecker wrote:
>>> On Tue, Jun 07, 2011 at 05:55:46PM -0600, David Ahern wrote:
>>>> For initial perf_clock to time-of-day correlation.
>>>>
>>>> Signed-off-by: David Ahern <dsahern@gmail.com>
>>>> ---
>>>>  tools/perf/util/event.c   |    1 +
>>>>  tools/perf/util/event.h   |    8 ++++++++
>>>>  tools/perf/util/session.c |    4 ++++
>>>>  tools/perf/util/session.h |    3 ++-
>>>>  4 files changed, 15 insertions(+), 1 deletions(-)
>>>>
>>>> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
>>>> index 3c1b8a6..1a89a04 100644
>>>> --- a/tools/perf/util/event.c
>>>> +++ b/tools/perf/util/event.c
>>>> @@ -24,6 +24,7 @@ static const char *perf_event__names[] = {
>>>>  	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
>>>>  	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
>>>>  	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
>>>> +	[PERF_RECORD_REFTIME]			= "REF_TIME",
>>>>  };
>>>>  
>>>>  const char *perf_event__name(unsigned int id)
>>>> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
>>>> index 1d7f664..f481f90 100644
>>>> --- a/tools/perf/util/event.h
>>>> +++ b/tools/perf/util/event.h
>>>> @@ -98,6 +98,7 @@ enum perf_user_event_type { /* above any possible kernel type */
>>>>  	PERF_RECORD_HEADER_TRACING_DATA		= 66,
>>>>  	PERF_RECORD_HEADER_BUILD_ID		= 67,
>>>>  	PERF_RECORD_FINISHED_ROUND		= 68,
>>>> +	PERF_RECORD_REFTIME			= 69,
>>>
>>> We would like to avoid adding more custom events like these. They were very convenient
>>> but they steal the kernel event type space. They are deemed for removal in the long term.
>>>
>>> Another idea to achieve what you want would be to create a new perf event header feature,
>>> like HEADER_TRACE_INFO or HEADER_BUILD_ID are. Then use that to create a space in the perf
>>> file to save that couple of clocks initial values.
>>
>> you mean like this:
>> https://lkml.org/lkml/2010/12/7/813
>>
>> David
> 
> Exactly, why did you change?

Finally getting back to this.

The answer to the 'why' is that putting a reference timestamp in the
header field does not work for file appends across reboots. ie., the case:
perf record --tod ...
reboot
perf record -A --tod ...

perf_clock timestamps change across reboots so the reference time
created by the first invocation is not valid for the append case. The
discussion then drifted towards having a kernel side event which per
past patch sets has its own issues.

So to summarize the options proposed to date and issues with the proposals:
1. reference timestamp in header
   - does not work for appends across reboots

2. synthesized events
   - preference against them

3. kernel side event
   - cannot generate an initial sample (with counter value and
perf_clock timestamp) on demand - e.g., start of session; a proposal to
use an ioctl to add one to the event stream was shot down

At this point the only idea that comes to mind is to use a combination
of 2 and 3: add the kernel side clock event
(https://lkml.org/lkml/2011/2/18/11), read the realtime clock counter,
read the monotonic clock timestamp (ie., perf_clock value), and
synthesize a perf sample that is written to the file. The append case
(with mismatch in --tod options between record invocations) would be
handled by having the kernel side clock event in the event list
(perf_evlist__equal would fail if --tod was not used for all invocations).

David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
  2011-07-11  4:20         ` David Ahern
@ 2011-07-12 14:30           ` Frederic Weisbecker
  2011-07-12 16:35             ` David Ahern
  2011-08-04 15:10             ` David Ahern
  0 siblings, 2 replies; 26+ messages in thread
From: Frederic Weisbecker @ 2011-07-12 14:30 UTC (permalink / raw)
  To: David Ahern, Ingo Molnar, Peter Zijlstra,
	Arnaldo Carvalho de Melo
  Cc: linux-perf-users, linux-kernel, paulus, tglx

On Sun, Jul 10, 2011 at 10:20:29PM -0600, David Ahern wrote:
> On 06/17/2011 08:17 AM, Frederic Weisbecker wrote:
> > On Fri, Jun 17, 2011 at 08:04:59AM -0600, David Ahern wrote:
> >>
> >>
> >> On 06/17/2011 07:32 AM, Frederic Weisbecker wrote:
> >>> On Tue, Jun 07, 2011 at 05:55:46PM -0600, David Ahern wrote:
> >>>> For initial perf_clock to time-of-day correlation.
> >>>>
> >>>> Signed-off-by: David Ahern <dsahern@gmail.com>
> >>>> ---
> >>>>  tools/perf/util/event.c   |    1 +
> >>>>  tools/perf/util/event.h   |    8 ++++++++
> >>>>  tools/perf/util/session.c |    4 ++++
> >>>>  tools/perf/util/session.h |    3 ++-
> >>>>  4 files changed, 15 insertions(+), 1 deletions(-)
> >>>>
> >>>> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
> >>>> index 3c1b8a6..1a89a04 100644
> >>>> --- a/tools/perf/util/event.c
> >>>> +++ b/tools/perf/util/event.c
> >>>> @@ -24,6 +24,7 @@ static const char *perf_event__names[] = {
> >>>>  	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
> >>>>  	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
> >>>>  	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
> >>>> +	[PERF_RECORD_REFTIME]			= "REF_TIME",
> >>>>  };
> >>>>  
> >>>>  const char *perf_event__name(unsigned int id)
> >>>> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
> >>>> index 1d7f664..f481f90 100644
> >>>> --- a/tools/perf/util/event.h
> >>>> +++ b/tools/perf/util/event.h
> >>>> @@ -98,6 +98,7 @@ enum perf_user_event_type { /* above any possible kernel type */
> >>>>  	PERF_RECORD_HEADER_TRACING_DATA		= 66,
> >>>>  	PERF_RECORD_HEADER_BUILD_ID		= 67,
> >>>>  	PERF_RECORD_FINISHED_ROUND		= 68,
> >>>> +	PERF_RECORD_REFTIME			= 69,
> >>>
> >>> We would like to avoid adding more custom events like these. They were very convenient
> >>> but they steal the kernel event type space. They are deemed for removal in the long term.
> >>>
> >>> Another idea to achieve what you want would be to create a new perf event header feature,
> >>> like HEADER_TRACE_INFO or HEADER_BUILD_ID are. Then use that to create a space in the perf
> >>> file to save that couple of clocks initial values.
> >>
> >> you mean like this:
> >> https://lkml.org/lkml/2010/12/7/813
> >>
> >> David
> > 
> > Exactly, why did you change?
> 
> Finally getting back to this.
> 
> The answer to the 'why' is that putting a reference timestamp in the
> header field does not work for file appends across reboots. ie., the case:
> perf record --tod ...
> reboot
> perf record -A --tod ...

Damn append mode. I doubt that thing is really used. And it just complexifies
everything. It might be wise to get rid of it?

Ingo, Peter, Arnaldo?
 
> perf_clock timestamps change across reboots so the reference time
> created by the first invocation is not valid for the append case. The
> discussion then drifted towards having a kernel side event which per
> past patch sets has its own issues.
> 
> So to summarize the options proposed to date and issues with the proposals:
> 1. reference timestamp in header
>    - does not work for appends across reboots
> 
> 2. synthesized events
>    - preference against them
> 
> 3. kernel side event
>    - cannot generate an initial sample (with counter value and
> perf_clock timestamp) on demand - e.g., start of session; a proposal to
> use an ioctl to add one to the event stream was shot down
> 
> At this point the only idea that comes to mind is to use a combination
> of 2 and 3: add the kernel side clock event
> (https://lkml.org/lkml/2011/2/18/11), read the realtime clock counter,
> read the monotonic clock timestamp (ie., perf_clock value), and
> synthesize a perf sample that is written to the file. The append case
> (with mismatch in --tod options between record invocations) would be
> handled by having the kernel side clock event in the event list
> (perf_evlist__equal would fail if --tod was not used for all invocations).

Actually you first have to face a deeper problem. events are not stored
in order in the flow, but they are sorted from perf_session__process_events().

The bunch of sorted events is flushed periodically and sent to the consumer.

See flush_sample_queue().

And this sorting is made on top of the sample->time timestamps. So events
are first sorted on sample->time and only afterward you have access to your
gtod tracepoint samples. But if that gtod sample has been taken after a reboot
then its sample->time is not consistant with the rest. It is not well sorted
and thus the reftime won't be updated at the right moment.

So the problem is that reftime update already depends on a consistant cpu
timestamp.

I can't think about a sane way to work around that. Sorting on gtod + cpu timestamp
is not a solution because gtod can change.

I'd rather propose to refuse append mode as long as we have any timestamp. That includes
gtod but also sample timestamps. They are buggy if we reboot.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
  2011-07-12 14:30           ` Frederic Weisbecker
@ 2011-07-12 16:35             ` David Ahern
  2011-07-12 17:03               ` Frederic Weisbecker
  2011-08-04 15:10             ` David Ahern
  1 sibling, 1 reply; 26+ messages in thread
From: David Ahern @ 2011-07-12 16:35 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Ingo Molnar, Peter Zijlstra, Arnaldo Carvalho de Melo,
	linux-perf-users, linux-kernel, paulus, tglx

On 07/12/2011 08:30 AM, Frederic Weisbecker wrote:
> Actually you first have to face a deeper problem. events are not stored
> in order in the flow, but they are sorted from perf_session__process_events().
> 
> The bunch of sorted events is flushed periodically and sent to the consumer.
> 
> See flush_sample_queue().
> 
> And this sorting is made on top of the sample->time timestamps. So events
> are first sorted on sample->time and only afterward you have access to your
> gtod tracepoint samples. But if that gtod sample has been taken after a reboot
> then its sample->time is not consistant with the rest. It is not well sorted
> and thus the reftime won't be updated at the right moment.

I was aware of this function and the buffering /sorting it does; I have
similar code in my local analysis tool (for older perf userspace). I had
not stopped to consider the impacts on the append mode since I never use
that option.

> So the problem is that reftime update already depends on a consistant cpu
> timestamp.
> 
> I can't think about a sane way to work around that. Sorting on gtod + cpu timestamp
> is not a solution because gtod can change.
> 
> I'd rather propose to refuse append mode as long as we have any timestamp. That includes
> gtod but also sample timestamps. They are buggy if we reboot.

I'm fine with restricting append mode if samples have timestamps. In
that case the time of day reference can be added to the header per the
original patch set. The change in behavior does impact a lot of record
use cases (system wide, raw).

David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
  2011-07-12 16:35             ` David Ahern
@ 2011-07-12 17:03               ` Frederic Weisbecker
  0 siblings, 0 replies; 26+ messages in thread
From: Frederic Weisbecker @ 2011-07-12 17:03 UTC (permalink / raw)
  To: David Ahern
  Cc: Ingo Molnar, Peter Zijlstra, Arnaldo Carvalho de Melo,
	linux-perf-users, linux-kernel, paulus, tglx

On Tue, Jul 12, 2011 at 10:35:21AM -0600, David Ahern wrote:
> On 07/12/2011 08:30 AM, Frederic Weisbecker wrote:
> > Actually you first have to face a deeper problem. events are not stored
> > in order in the flow, but they are sorted from perf_session__process_events().
> > 
> > The bunch of sorted events is flushed periodically and sent to the consumer.
> > 
> > See flush_sample_queue().
> > 
> > And this sorting is made on top of the sample->time timestamps. So events
> > are first sorted on sample->time and only afterward you have access to your
> > gtod tracepoint samples. But if that gtod sample has been taken after a reboot
> > then its sample->time is not consistant with the rest. It is not well sorted
> > and thus the reftime won't be updated at the right moment.
> 
> I was aware of this function and the buffering /sorting it does; I have
> similar code in my local analysis tool (for older perf userspace). I had
> not stopped to consider the impacts on the append mode since I never use
> that option.
> 
> > So the problem is that reftime update already depends on a consistant cpu
> > timestamp.
> > 
> > I can't think about a sane way to work around that. Sorting on gtod + cpu timestamp
> > is not a solution because gtod can change.
> > 
> > I'd rather propose to refuse append mode as long as we have any timestamp. That includes
> > gtod but also sample timestamps. They are buggy if we reboot.
> 
> I'm fine with restricting append mode if samples have timestamps. In
> that case the time of day reference can be added to the header per the
> original patch set. The change in behavior does impact a lot of record
> use cases (system wide, raw).

Yep, sounds good.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
  2011-07-12 14:30           ` Frederic Weisbecker
  2011-07-12 16:35             ` David Ahern
@ 2011-08-04 15:10             ` David Ahern
       [not found]               ` <20110808193033.GA2744@ghostprotocols.net>
  1 sibling, 1 reply; 26+ messages in thread
From: David Ahern @ 2011-08-04 15:10 UTC (permalink / raw)
  To: Frederic Weisbecker, Ingo Molnar, Peter Zijlstra,
	Arnaldo Carvalho de Melo
  Cc: linux-perf-users, linux-kernel, paulus, tglx

On 07/12/2011 08:30 AM, Frederic Weisbecker wrote:
> On Sun, Jul 10, 2011 at 10:20:29PM -0600, David Ahern wrote:
>> On 06/17/2011 08:17 AM, Frederic Weisbecker wrote:
>>> On Fri, Jun 17, 2011 at 08:04:59AM -0600, David Ahern wrote:
>>>>
>>>>
>>>> On 06/17/2011 07:32 AM, Frederic Weisbecker wrote:
>>>>> On Tue, Jun 07, 2011 at 05:55:46PM -0600, David Ahern wrote:
>>>>>> For initial perf_clock to time-of-day correlation.
>>>>>>
>>>>>> Signed-off-by: David Ahern <dsahern@gmail.com>
>>>>>> ---
>>>>>>  tools/perf/util/event.c   |    1 +
>>>>>>  tools/perf/util/event.h   |    8 ++++++++
>>>>>>  tools/perf/util/session.c |    4 ++++
>>>>>>  tools/perf/util/session.h |    3 ++-
>>>>>>  4 files changed, 15 insertions(+), 1 deletions(-)
>>>>>>
>>>>>> diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
>>>>>> index 3c1b8a6..1a89a04 100644
>>>>>> --- a/tools/perf/util/event.c
>>>>>> +++ b/tools/perf/util/event.c
>>>>>> @@ -24,6 +24,7 @@ static const char *perf_event__names[] = {
>>>>>>  	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
>>>>>>  	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
>>>>>>  	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
>>>>>> +	[PERF_RECORD_REFTIME]			= "REF_TIME",
>>>>>>  };
>>>>>>  
>>>>>>  const char *perf_event__name(unsigned int id)
>>>>>> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
>>>>>> index 1d7f664..f481f90 100644
>>>>>> --- a/tools/perf/util/event.h
>>>>>> +++ b/tools/perf/util/event.h
>>>>>> @@ -98,6 +98,7 @@ enum perf_user_event_type { /* above any possible kernel type */
>>>>>>  	PERF_RECORD_HEADER_TRACING_DATA		= 66,
>>>>>>  	PERF_RECORD_HEADER_BUILD_ID		= 67,
>>>>>>  	PERF_RECORD_FINISHED_ROUND		= 68,
>>>>>> +	PERF_RECORD_REFTIME			= 69,
>>>>>
>>>>> We would like to avoid adding more custom events like these. They were very convenient
>>>>> but they steal the kernel event type space. They are deemed for removal in the long term.
>>>>>
>>>>> Another idea to achieve what you want would be to create a new perf event header feature,
>>>>> like HEADER_TRACE_INFO or HEADER_BUILD_ID are. Then use that to create a space in the perf
>>>>> file to save that couple of clocks initial values.
>>>>
>>>> you mean like this:
>>>> https://lkml.org/lkml/2010/12/7/813
>>>>
>>>> David
>>>
>>> Exactly, why did you change?
>>
>> Finally getting back to this.
>>
>> The answer to the 'why' is that putting a reference timestamp in the
>> header field does not work for file appends across reboots. ie., the case:
>> perf record --tod ...
>> reboot
>> perf record -A --tod ...
> 
> Damn append mode. I doubt that thing is really used. And it just complexifies
> everything. It might be wise to get rid of it?
> 
> Ingo, Peter, Arnaldo?
>  
>> perf_clock timestamps change across reboots so the reference time
>> created by the first invocation is not valid for the append case. The
>> discussion then drifted towards having a kernel side event which per
>> past patch sets has its own issues.
>>
>> So to summarize the options proposed to date and issues with the proposals:
>> 1. reference timestamp in header
>>    - does not work for appends across reboots
>>
>> 2. synthesized events
>>    - preference against them
>>
>> 3. kernel side event
>>    - cannot generate an initial sample (with counter value and
>> perf_clock timestamp) on demand - e.g., start of session; a proposal to
>> use an ioctl to add one to the event stream was shot down
>>
>> At this point the only idea that comes to mind is to use a combination
>> of 2 and 3: add the kernel side clock event
>> (https://lkml.org/lkml/2011/2/18/11), read the realtime clock counter,
>> read the monotonic clock timestamp (ie., perf_clock value), and
>> synthesize a perf sample that is written to the file. The append case
>> (with mismatch in --tod options between record invocations) would be
>> handled by having the kernel side clock event in the event list
>> (perf_evlist__equal would fail if --tod was not used for all invocations).
> 
> Actually you first have to face a deeper problem. events are not stored
> in order in the flow, but they are sorted from perf_session__process_events().
> 
> The bunch of sorted events is flushed periodically and sent to the consumer.
> 
> See flush_sample_queue().
> 
> And this sorting is made on top of the sample->time timestamps. So events
> are first sorted on sample->time and only afterward you have access to your
> gtod tracepoint samples. But if that gtod sample has been taken after a reboot
> then its sample->time is not consistant with the rest. It is not well sorted
> and thus the reftime won't be updated at the right moment.
> 
> So the problem is that reftime update already depends on a consistant cpu
> timestamp.
> 
> I can't think about a sane way to work around that. Sorting on gtod + cpu timestamp
> is not a solution because gtod can change.
> 
> I'd rather propose to refuse append mode as long as we have any timestamp. That includes
> gtod but also sample timestamps. They are buggy if we reboot.

Arnaldo's sending patches, so I take it he's dug out from backlog. ;-)

Any objections to not allowing append mode for perf-record if samples
contain timestamps?

David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes
  2011-06-17 14:13     ` David Ahern
  2011-06-17 14:19       ` Frederic Weisbecker
@ 2011-08-15  4:03       ` David Ahern
  1 sibling, 0 replies; 26+ messages in thread
From: David Ahern @ 2011-08-15  4:03 UTC (permalink / raw)
  To: peterz, tglx
  Cc: Frederic Weisbecker, linux-perf-users, linux-kernel, acme, mingo,
	paulus, johnstul, rostedt

Thomas/Peter:

On 06/17/2011 08:13 AM, David Ahern wrote:
> 
> 
> On 06/17/2011 07:23 AM, Frederic Weisbecker wrote:
>> On Tue, Jun 07, 2011 at 05:55:03PM -0600, David Ahern wrote:
>>> Trace points in timekeeping.c where xtime is modified by a user
>>> or ntp.
>>>
>>> Signed-off-by: David Ahern <dsahern@gmail.com>
>>> ---
>>>  include/trace/events/timekeeping.h |   43 ++++++++++++++++++++++++++++++++++++
>>>  kernel/time/timekeeping.c          |    8 ++++++
>>>  2 files changed, 51 insertions(+), 0 deletions(-)
>>>  create mode 100644 include/trace/events/timekeeping.h
>>>
>>> diff --git a/include/trace/events/timekeeping.h b/include/trace/events/timekeeping.h
>>> new file mode 100644
>>> index 0000000..3d5d083
>>> --- /dev/null
>>> +++ b/include/trace/events/timekeeping.h
>>> @@ -0,0 +1,43 @@
>>> +#undef TRACE_SYSTEM
>>> +#define TRACE_SYSTEM timekeeping
>>> +
>>> +#if !defined(_TRACE_TIMEKEEP_H) || defined(TRACE_HEADER_MULTI_READ)
>>> +#define _TRACE_TIMEKEEP_H
>>> +
>>> +#include <linux/tracepoint.h>
>>> +#include <linux/time.h>
>>> +
>>> +DECLARE_EVENT_CLASS(tod_template,
>>> +
>>> +	TP_PROTO(const struct timespec *tv),
>>> +
>>> +	TP_ARGS(tv),
>>> +
>>> +	TP_STRUCT__entry(
>>> +		__field( __kernel_time_t,	tv_sec)
>>> +		__field( long,			tv_nsec)
>>> +	),
>>> +
>>> +	TP_fast_assign(
>>> +		__entry->tv_sec  = tv->tv_sec;
>>> +		__entry->tv_nsec = tv->tv_nsec;
>>> +	),
>>> +
>>> +	TP_printk("tv_sec=%ld tv_nsec=%ld", __entry->tv_sec, __entry->tv_nsec)
>>> +);
>>> +
>>> +DEFINE_EVENT(tod_template, settimeofday,
>>> +	TP_PROTO(const struct timespec *tv),
>>> +	TP_ARGS(tv));
>>> +
>>> +DEFINE_EVENT(tod_template, timekeeping_inject_offset,
>>> +	TP_PROTO(const struct timespec *tv),
>>> +	TP_ARGS(tv));
>>> +
>>> +DEFINE_EVENT(tod_template, timekeeping_inject_sleeptime,
>>> +	TP_PROTO(const struct timespec *tv),
>>> +	TP_ARGS(tv));
>>
>> Does the fact it's any of the three way of updating xtime make any
>> difference from the user point of view?
> 
> This use case only cares that xtime is updated.
> 
>>
>> If not can we rather factorize that in a single settimeofday tracepoint?
>> Or update_time_of_day if we don't want to confuse the user with the
>> syscall.
> 
> Peter and Thomas expressed interest in timekeeping tracepoints. How the
> update happens might be wanted. If desired I can consolidate xtime = *tv
> into a single update function and put the trace point there.

Any comments on the proposed trace points in the timekeeping code? I am
working on a new spin of the patches and would like to know if there are
changes needed here too.

Thanks,
David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 3/6] perf: add reference time event
       [not found]               ` <20110808193033.GA2744@ghostprotocols.net>
@ 2011-08-15  4:06                 ` David Ahern
  0 siblings, 0 replies; 26+ messages in thread
From: David Ahern @ 2011-08-15  4:06 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ingo Molnar
  Cc: Frederic Weisbecker, Peter Zijlstra, linux-perf-users,
	linux-kernel, paulus, tglx

Ingo:

On 08/08/2011 01:30 PM, Arnaldo Carvalho de Melo wrote:
>>>> The answer to the 'why' is that putting a reference timestamp in the
>>>> header field does not work for file appends across reboots. ie., the case:
>>>> perf record --tod ...
>>>> reboot
>>>> perf record -A --tod ...
>>>
>>> Damn append mode. I doubt that thing is really used. And it just complexifies
>>> everything. It might be wise to get rid of it?
>>>
>>> Ingo, Peter, Arnaldo?
>>>  
>>>> perf_clock timestamps change across reboots so the reference time
>>>> created by the first invocation is not valid for the append case. The
>>>> discussion then drifted towards having a kernel side event which per
>>>> past patch sets has its own issues.
>>>>
>>>> So to summarize the options proposed to date and issues with the proposals:
>>>> 1. reference timestamp in header
>>>>    - does not work for appends across reboots
>>>>
>>>> 2. synthesized events
>>>>    - preference against them
>>>>
>>>> 3. kernel side event
>>>>    - cannot generate an initial sample (with counter value and
>>>> perf_clock timestamp) on demand - e.g., start of session; a proposal to
>>>> use an ioctl to add one to the event stream was shot down
>>>>
>>>> At this point the only idea that comes to mind is to use a combination
>>>> of 2 and 3: add the kernel side clock event
>>>> (https://lkml.org/lkml/2011/2/18/11), read the realtime clock counter,
>>>> read the monotonic clock timestamp (ie., perf_clock value), and
>>>> synthesize a perf sample that is written to the file. The append case
>>>> (with mismatch in --tod options between record invocations) would be
>>>> handled by having the kernel side clock event in the event list
>>>> (perf_evlist__equal would fail if --tod was not used for all invocations).
>>>
>>> Actually you first have to face a deeper problem. events are not stored
>>> in order in the flow, but they are sorted from perf_session__process_events().
>>>
>>> The bunch of sorted events is flushed periodically and sent to the consumer.
>>>
>>> See flush_sample_queue().
>>>
>>> And this sorting is made on top of the sample->time timestamps. So events
>>> are first sorted on sample->time and only afterward you have access to your
>>> gtod tracepoint samples. But if that gtod sample has been taken after a reboot
>>> then its sample->time is not consistant with the rest. It is not well sorted
>>> and thus the reftime won't be updated at the right moment.
>>>
>>> So the problem is that reftime update already depends on a consistant cpu
>>> timestamp.
>>>
>>> I can't think about a sane way to work around that. Sorting on gtod + cpu timestamp
>>> is not a solution because gtod can change.
>>>
>>> I'd rather propose to refuse append mode as long as we have any timestamp. That includes
>>> gtod but also sample timestamps. They are buggy if we reboot.
>>
>> Arnaldo's sending patches, so I take it he's dug out from backlog. ;-)
>>
>> Any objections to not allowing append mode for perf-record if samples
>> contain timestamps?
> 
> I never used append mode, but having these restrictions on append mode
> seems to be counter intuitive, either we make timestamps work with
> append mode or we remove append mode completely.
> 
> Ingo?
> 
> - Arnaldo

Any opinion on prohibiting append mode if samples contain timestamps? To
summarize perf_clock is reset on reboots which affects sample ordering
for the append case. We can either remove the append option or not allow
it if samples have timestamps.

Thanks,
David

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 4/6] perf record: add time-of-day option
  2011-06-17 15:15       ` Frederic Weisbecker
@ 2011-08-15  4:24         ` David Ahern
  0 siblings, 0 replies; 26+ messages in thread
From: David Ahern @ 2011-08-15  4:24 UTC (permalink / raw)
  To: Frederic Weisbecker, acme
  Cc: linux-perf-users, linux-kernel, mingo, peterz, paulus, tglx

On 06/17/2011 09:15 AM, Frederic Weisbecker wrote:
> On Fri, Jun 17, 2011 at 08:23:01AM -0600, David Ahern wrote:
>> On 06/17/2011 08:14 AM, Frederic Weisbecker wrote:
>>>
>>> So I feel uncomfortable with this tod_sample_type hack. I think we can't really continue
>>> with this fixed sample_type per session given the kind of hacks that involves.
>>>
>>> One thing we could do is to split session->sample_type into an array with one sample
>>> type per event type (hardware, breakpoint, software, tracepoint).
>>>
>>> And then each builtin tool can provide their constraints on top of these values:
>>>
>>> - builtin-report wants sample_type[HARDWARE] == sample_type[SOFTWARE] == sample_type[TRACEPOINT] == sample_type[BREAKPOINT]
>>>   although that may be tunable by the time but we can start with that.
>>> - builtin-script has no specific constraints, except that sample_type[i] meets what the user passed as a parameter
>>> - etc..
>>>
>>> Constraints can probably default to sample_type[i] == sample_type[i+1] to mimic the current behaviour. Then tools
>>> can override that.
>>>
>>> What do you think?
>>
>> I started working on sample_type refactoring right after sending this
>> patchset (though I got sidetracked). Each evsel in the list has a
>> perf_attr struct which has a sample_type. Why not use that which allows
>> events to have their own sample type - versus a type per event type?
> 
> This can make sense, I can figure out some cases where such granularity can be
> useful. Branch recording doesn't care about recording period for example I think.
> 
>>
>> I'll see if I can get back to it in the next few days and get a better
>> idea of the pain involved with the refactoring.
> 
> Thanks a lot :)

Coming back to this one ....

From what I can see sample_type has to be a global per perf session and
all samples have to use the same sample_type or a change is needed to
the API/ABI.

The perf_event_header does not have any information that uniquely
associates it with a specific event type. Right now
perf_evlist__id2evsel() is used to associate a sample with a specific
event (evsel) in the list, but that function requires a parsed sample.
To parse a sample we need the sample_type. So, the sample_type has to be
a global and the same for all samples.

David

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2011-08-15  4:24 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-07 23:53 [PATCH 0/6] Add time-of-day option to perf events David Ahern
2011-06-07 23:55 ` [PATCH 1/6] trace: add tracepoints to timekeeping code - xtime changes David Ahern
2011-06-17 13:23   ` Frederic Weisbecker
2011-06-17 14:13     ` David Ahern
2011-06-17 14:19       ` Frederic Weisbecker
2011-08-15  4:03       ` David Ahern
2011-06-07 23:55 ` [PATCH 2/6] perf utils: export parse_single_tracepoint_event David Ahern
2011-06-07 23:55 ` [PATCH 3/6] perf: add reference time event David Ahern
2011-06-17 13:32   ` Frederic Weisbecker
2011-06-17 14:04     ` David Ahern
2011-06-17 14:17       ` Frederic Weisbecker
2011-06-17 14:28         ` David Ahern
2011-07-11  4:20         ` David Ahern
2011-07-12 14:30           ` Frederic Weisbecker
2011-07-12 16:35             ` David Ahern
2011-07-12 17:03               ` Frederic Weisbecker
2011-08-04 15:10             ` David Ahern
     [not found]               ` <20110808193033.GA2744@ghostprotocols.net>
2011-08-15  4:06                 ` David Ahern
2011-06-07 23:56 ` [PATCH 4/6] perf record: add time-of-day option David Ahern
2011-06-17 14:14   ` Frederic Weisbecker
2011-06-17 14:23     ` David Ahern
2011-06-17 15:15       ` Frederic Weisbecker
2011-08-15  4:24         ` David Ahern
2011-06-07 23:56 ` [PATCH 5/6] perf script: pass trace event to print_trace_event David Ahern
2011-06-07 23:56 ` [PATCH 6/6] perf script: add time-of-day option for displaying events David Ahern
2011-06-14 18:42 ` [PATCH 0/6] Add time-of-day option to perf events David Ahern

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).