All of lore.kernel.org
 help / color / mirror / Atom feed
From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Ingo Molnar <mingo@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [for-next][PATCH 7/7] tracing: Add NMI tracing in hwlat detector
Date: Tue, 06 Sep 2016 09:39:39 -0400	[thread overview]
Message-ID: <20160906134001.215778024@goodmis.org> (raw)
In-Reply-To: 20160906133932.473440583@goodmis.org

[-- Attachment #1: 0007-tracing-Add-NMI-tracing-in-hwlat-detector.patch --]
[-- Type: text/plain, Size: 8452 bytes --]

From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>

As NMIs can also cause latency when interrupts are disabled, the hwlat
detectory has no way to know if the latency it detects is from an NMI or an
SMI or some other hardware glitch.

As ftrace_nmi_enter/exit() funtions are no longer used (except for sh, which
isn't supported anymore), I converted those to "arch_ftrace_nmi_enter/exit"
and use ftrace_nmi_enter/exit() to check if hwlat detector is tracing or
not, and if so, it calls into the hwlat utility.

Since the hwlat detector only has a single kthread that is spinning with
interrupts disabled, it marks what CPU it is on, and if the NMI callback
happens on that CPU, it records the time spent in that NMI. This is added to
the output that is generated by the hwlat detector as:

 #3     inner/outer(us):    9/9     ts:1470836488.206734548
 #4     inner/outer(us):    0/8     ts:1470836497.140808588
 #5     inner/outer(us):    0/6     ts:1470836499.140825168 nmi-total:5 nmi-count:1
 #6     inner/outer(us):    9/9     ts:1470836501.140841748

All time is still tracked in microseconds.

The NMI information is only shown when an NMI occurred during the sample.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/sh/kernel/ftrace.c      |  4 ++--
 include/linux/ftrace_irq.h   | 31 +++++++++++++++++++++++----
 kernel/trace/trace_entries.h |  8 +++++--
 kernel/trace/trace_hwlat.c   | 51 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_output.c  | 16 +++++++++++++-
 5 files changed, 101 insertions(+), 9 deletions(-)

diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c
index 38993e09ef03..deb9eba3a43d 100644
--- a/arch/sh/kernel/ftrace.c
+++ b/arch/sh/kernel/ftrace.c
@@ -139,7 +139,7 @@ static void ftrace_mod_code(void)
 		clear_mod_flag();
 }
 
-void ftrace_nmi_enter(void)
+void arch_ftrace_nmi_enter(void)
 {
 	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
 		smp_rmb();
@@ -150,7 +150,7 @@ void ftrace_nmi_enter(void)
 	smp_mb();
 }
 
-void ftrace_nmi_exit(void)
+void arch_ftrace_nmi_exit(void)
 {
 	/* Finish all executions before clearing nmi_running */
 	smp_mb();
diff --git a/include/linux/ftrace_irq.h b/include/linux/ftrace_irq.h
index dca7bf8cffe2..4ec2c9b205f2 100644
--- a/include/linux/ftrace_irq.h
+++ b/include/linux/ftrace_irq.h
@@ -3,11 +3,34 @@
 
 
 #ifdef CONFIG_FTRACE_NMI_ENTER
-extern void ftrace_nmi_enter(void);
-extern void ftrace_nmi_exit(void);
+extern void arch_ftrace_nmi_enter(void);
+extern void arch_ftrace_nmi_exit(void);
 #else
-static inline void ftrace_nmi_enter(void) { }
-static inline void ftrace_nmi_exit(void) { }
+static inline void arch_ftrace_nmi_enter(void) { }
+static inline void arch_ftrace_nmi_exit(void) { }
 #endif
 
+#ifdef CONFIG_HWLAT_TRACER
+extern bool trace_hwlat_callback_enabled;
+extern void trace_hwlat_callback(bool enter);
+#endif
+
+static inline void ftrace_nmi_enter(void)
+{
+#ifdef CONFIG_HWLAT_TRACER
+	if (trace_hwlat_callback_enabled)
+		trace_hwlat_callback(true);
+#endif
+	arch_ftrace_nmi_enter();
+}
+
+static inline void ftrace_nmi_exit(void)
+{
+	arch_ftrace_nmi_exit();
+#ifdef CONFIG_HWLAT_TRACER
+	if (trace_hwlat_callback_enabled)
+		trace_hwlat_callback(false);
+#endif
+}
+
 #endif /* _LINUX_FTRACE_IRQ_H */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 70d47dd99359..d1cc37e78f99 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -330,18 +330,22 @@ FTRACE_ENTRY(hwlat, hwlat_entry,
 	F_STRUCT(
 		__field(	u64,			duration	)
 		__field(	u64,			outer_duration	)
+		__field(	u64,			nmi_total_ts	)
 		__field_struct( struct timespec,	timestamp	)
 		__field_desc(	long,	timestamp,	tv_sec		)
 		__field_desc(	long,	timestamp,	tv_nsec		)
+		__field(	unsigned int,		nmi_count	)
 		__field(	unsigned int,		seqnum		)
 	),
 
-	F_printk("cnt:%u\tts:%010lu.%010lu\tinner:%llu\touter:%llu\n",
+	F_printk("cnt:%u\tts:%010lu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n",
 		 __entry->seqnum,
 		 __entry->tv_sec,
 		 __entry->tv_nsec,
 		 __entry->duration,
-		 __entry->outer_duration),
+		 __entry->outer_duration,
+		 __entry->nmi_total_ts,
+		 __entry->nmi_count),
 
 	FILTER_OTHER
 );
diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index 65aab3914a56..b97286c48735 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -64,6 +64,15 @@ static struct dentry *hwlat_sample_window;	/* sample window us */
 /* Save the previous tracing_thresh value */
 static unsigned long save_tracing_thresh;
 
+/* NMI timestamp counters */
+static u64 nmi_ts_start;
+static u64 nmi_total_ts;
+static int nmi_count;
+static int nmi_cpu;
+
+/* Tells NMIs to call back to the hwlat tracer to record timestamps */
+bool trace_hwlat_callback_enabled;
+
 /* If the user changed threshold, remember it */
 static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC;
 
@@ -72,7 +81,9 @@ struct hwlat_sample {
 	u64		seqnum;		/* unique sequence */
 	u64		duration;	/* delta */
 	u64		outer_duration;	/* delta (outer loop) */
+	u64		nmi_total_ts;	/* Total time spent in NMIs */
 	struct timespec	timestamp;	/* wall time */
+	int		nmi_count;	/* # NMIs during this sample */
 };
 
 /* keep the global state somewhere. */
@@ -112,6 +123,8 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
 	entry->duration			= sample->duration;
 	entry->outer_duration		= sample->outer_duration;
 	entry->timestamp		= sample->timestamp;
+	entry->nmi_total_ts		= sample->nmi_total_ts;
+	entry->nmi_count		= sample->nmi_count;
 
 	if (!call_filter_check_discard(call, entry, buffer, event))
 		__buffer_unlock_commit(buffer, event);
@@ -125,6 +138,26 @@ static void trace_hwlat_sample(struct hwlat_sample *sample)
 #define init_time(a, b)	(a = b)
 #define time_u64(a)	a
 
+void trace_hwlat_callback(bool enter)
+{
+	if (smp_processor_id() != nmi_cpu)
+		return;
+
+	/*
+	 * Currently trace_clock_local() calls sched_clock() and the
+	 * generic version is not NMI safe.
+	 */
+	if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
+		if (enter)
+			nmi_ts_start = time_get();
+		else
+			nmi_total_ts = time_get() - nmi_ts_start;
+	}
+
+	if (enter)
+		nmi_count++;
+}
+
 /**
  * get_sample - sample the CPU TSC and look for likely hardware latencies
  *
@@ -144,6 +177,14 @@ static int get_sample(void)
 
 	do_div(thresh, NSEC_PER_USEC); /* modifies interval value */
 
+	nmi_cpu = smp_processor_id();
+	nmi_total_ts = 0;
+	nmi_count = 0;
+	/* Make sure NMIs see this first */
+	barrier();
+
+	trace_hwlat_callback_enabled = true;
+
 	init_time(last_t2, 0);
 	start = time_get(); /* start timestamp */
 
@@ -188,6 +229,10 @@ static int get_sample(void)
 
 	} while (total <= hwlat_data.sample_width);
 
+	barrier(); /* finish the above in the view for NMIs */
+	trace_hwlat_callback_enabled = false;
+	barrier(); /* Make sure nmi_total_ts is no longer updated */
+
 	ret = 0;
 
 	/* If we exceed the threshold value, we have found a hardware latency */
@@ -196,11 +241,17 @@ static int get_sample(void)
 
 		ret = 1;
 
+		/* We read in microseconds */
+		if (nmi_total_ts)
+			do_div(nmi_total_ts, NSEC_PER_USEC);
+
 		hwlat_data.count++;
 		s.seqnum = hwlat_data.count;
 		s.duration = sample;
 		s.outer_duration = outer_sample;
 		s.timestamp = CURRENT_TIME;
+		s.nmi_total_ts = nmi_total_ts;
+		s.nmi_count = nmi_count;
 		trace_hwlat_sample(&s);
 
 		/* Keep a running maximum ever recorded hardware latency */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index d67a562df259..3fc20422c166 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -1109,13 +1109,27 @@ trace_hwlat_print(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, entry);
 
-	trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld\n",
+	trace_seq_printf(s, "#%-5u inner/outer(us): %4llu/%-5llu ts:%ld.%09ld",
 			 field->seqnum,
 			 field->duration,
 			 field->outer_duration,
 			 field->timestamp.tv_sec,
 			 field->timestamp.tv_nsec);
 
+	if (field->nmi_count) {
+		/*
+		 * The generic sched_clock() is not NMI safe, thus
+		 * we only record the count and not the time.
+		 */
+		if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK))
+			trace_seq_printf(s, " nmi-total:%llu",
+					 field->nmi_total_ts);
+		trace_seq_printf(s, " nmi-count:%u",
+				 field->nmi_count);
+	}
+
+	trace_seq_putc(s, '\n');
+
 	return trace_handle_return(s);
 }
 
-- 
2.8.1

      parent reply	other threads:[~2016-09-06 13:40 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-06 13:39 [for-next][PATCH 0/7] tracing: Updates for 4.9 Steven Rostedt
2016-09-06 13:39 ` [for-next][PATCH 1/7] tracing/uprobe: Drop isdigit() check in create_trace_uprobe Steven Rostedt
2016-09-06 13:39 ` [for-next][PATCH 2/7] function_graph: Handle TRACE_BPUTS in print_graph_comment Steven Rostedt
2016-09-06 13:39 ` [for-next][PATCH 3/7] ftrace: Access ret_stack->subtime only in the function profiler Steven Rostedt
2016-09-06 13:39 ` [for-next][PATCH 4/7] tracing: Added hardware latency tracer Steven Rostedt
2016-09-06 13:39 ` [for-next][PATCH 5/7] tracing: Add documentation for hwlat_detector tracer Steven Rostedt
2016-09-06 13:39 ` [for-next][PATCH 6/7] tracing: Have hwlat trace migrate across tracing_cpumask CPUs Steven Rostedt
2016-09-06 13:39 ` Steven Rostedt [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160906134001.215778024@goodmis.org \
    --to=rostedt@goodmis.org \
    --cc=akpm@linux-foundation.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.