All of lore.kernel.org
 help / color / mirror / Atom feed
From: git@jeffhostetler.com
To: git@vger.kernel.org
Cc: gitster@pobox.com, peff@peff.net,
	Jeff Hostetler <jeffhost@microsoft.com>
Subject: [PATCH v1 03/25] structured-logging: add structured logging framework
Date: Fri, 13 Jul 2018 16:55:59 +0000	[thread overview]
Message-ID: <20180713165621.52017-4-git@jeffhostetler.com> (raw)
In-Reply-To: <20180713165621.52017-1-git@jeffhostetler.com>

From: Jeff Hostetler <jeffhost@microsoft.com>

Teach git to optionally generate structured logging data in JSON using
the json-writer API.  "cmd_start" and "cmd_end" events are generated.

Structured logging is only available when git is built with
STRUCTURED_LOGGING=1.

Structured logging is only enabled when the config setting "slog.path"
is set to an absolute pathname.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
---
 Documentation/config.txt |   8 ++
 compat/mingw.h           |   7 +
 config.c                 |   3 +
 git-compat-util.h        |   9 ++
 git.c                    |   8 +-
 structured-logging.c     | 366 +++++++++++++++++++++++++++++++++++++++++++++++
 structured-logging.h     |  82 +++++++++++
 usage.c                  |   4 +
 8 files changed, 486 insertions(+), 1 deletion(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index ab641bf..c79f2bf 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3168,6 +3168,14 @@ showbranch.default::
 	The default set of branches for linkgit:git-show-branch[1].
 	See linkgit:git-show-branch[1].
 
+slog.path::
+	(EXPERIMENTAL) Enable structured logging to a file.  This must be
+	an absolute path.  (Git must be compiled with STRUCTURED_LOGGING=1.)
+
+slog.pretty::
+	(EXPERIMENTAL) Pretty-print structured log data when true.
+	(Git must be compiled with STRUCTURED_LOGGING=1.)
+
 splitIndex.maxPercentChange::
 	When the split index feature is used, this specifies the
 	percent of entries the split index can contain compared to the
diff --git a/compat/mingw.h b/compat/mingw.h
index 571019d..d8d8cd3 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -144,8 +144,15 @@ static inline int fcntl(int fd, int cmd, ...)
 	errno = EINVAL;
 	return -1;
 }
+
 /* bash cannot reliably detect negative return codes as failure */
+#if defined(STRUCTURED_LOGGING)
+#include "structured-logging.h"
+#define exit(code) exit(strlog_exit_code((code) & 0xff))
+#else
 #define exit(code) exit((code) & 0xff)
+#endif
+
 #define sigemptyset(x) (void)0
 static inline int sigaddset(sigset_t *set, int signum)
 { return 0; }
diff --git a/config.c b/config.c
index fbbf0f8..b27b024 100644
--- a/config.c
+++ b/config.c
@@ -1476,6 +1476,9 @@ int git_default_config(const char *var, const char *value, void *dummy)
 		return 0;
 	}
 
+	if (starts_with(var, "slog."))
+		return slog_default_config(var, value);
+
 	/* Add other config variables here and to Documentation/config.txt. */
 	return 0;
 }
diff --git a/git-compat-util.h b/git-compat-util.h
index 9a64998..f5352fd 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1239,4 +1239,13 @@ extern void unleak_memory(const void *ptr, size_t len);
 #define UNLEAK(var) do {} while (0)
 #endif
 
+#include "structured-logging.h"
+#if defined(STRUCTURED_LOGGING) && !defined(exit)
+/*
+ * Intercept all calls to exit() so that exit-code can be included
+ * in the "cmd_exit" message written by the at-exit routine.
+ */
+#define exit(code) exit(slog_exit_code(code))
+#endif
+
 #endif
diff --git a/git.c b/git.c
index c2f48d5..024a40d 100644
--- a/git.c
+++ b/git.c
@@ -413,6 +413,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 		setup_work_tree();
 
 	trace_argv_printf(argv, "trace: built-in: git");
+	slog_set_command_name(p->cmd);
 
 	status = p->fn(argc, argv, prefix);
 	if (status)
@@ -700,7 +701,7 @@ static int run_argv(int *argcp, const char ***argv)
 	return done_alias;
 }
 
-int cmd_main(int argc, const char **argv)
+static int real_cmd_main(int argc, const char **argv)
 {
 	const char *cmd;
 	int done_help = 0;
@@ -779,3 +780,8 @@ int cmd_main(int argc, const char **argv)
 
 	return 1;
 }
+
+int cmd_main(int argc, const char **argv)
+{
+	return slog_wrap_main(real_cmd_main, argc, argv);
+}
diff --git a/structured-logging.c b/structured-logging.c
index 702fd84..afa2224 100644
--- a/structured-logging.c
+++ b/structured-logging.c
@@ -1,3 +1,10 @@
+#include "cache.h"
+#include "config.h"
+#include "version.h"
+#include "json-writer.h"
+#include "sigchain.h"
+#include "argv-array.h"
+
 #if !defined(STRUCTURED_LOGGING)
 /*
  * Structured logging is not available.
@@ -6,4 +13,363 @@
 
 #else
 
+#define SLOG_VERSION 0
+
+static uint64_t my__start_time;
+static uint64_t my__exit_time;
+static int my__is_config_loaded;
+static int my__is_enabled;
+static int my__is_pretty;
+static int my__signal;
+static int my__exit_code;
+static int my__pid;
+static int my__wrote_start_event;
+static int my__log_fd = -1;
+
+static char *my__log_path;
+static char *my__command_name;
+static char *my__sub_command_name;
+
+static struct argv_array my__argv = ARGV_ARRAY_INIT;
+static struct json_writer my__errors = JSON_WRITER_INIT;
+
+/*
+ * Write a single event to the structured log file.
+ */
+static void emit_event(struct json_writer *jw, const char *event_name)
+{
+	if (my__log_fd == -1) {
+		my__log_fd = open(my__log_path,
+				  O_WRONLY | O_APPEND | O_CREAT,
+				  0644);
+		if (my__log_fd == -1) {
+			warning("slog: could not open '%s' for logging: %s",
+				my__log_path, strerror(errno));
+			my__is_enabled = 0;
+			return;
+		}
+	}
+
+	/*
+	 * A properly authored JSON string does not have a final NL
+	 * (even when pretty-printing is enabled).  Structured logging
+	 * output should look like a series of terminated forms one
+	 * per line.  Temporarily append a NL to the buffer so that
+	 * the disk write happens atomically.
+	 */
+	strbuf_addch(&jw->json, '\n');
+	if (write(my__log_fd, jw->json.buf, jw->json.len) != jw->json.len)
+		warning("slog: could not write event '%s': %s",
+			event_name, strerror(errno));
+
+	strbuf_setlen(&jw->json, jw->json.len - 1);
+}
+
+static void emit_start_event(void)
+{
+	struct json_writer jw = JSON_WRITER_INIT;
+
+	/* build "cmd_start" event message */
+	jw_object_begin(&jw, my__is_pretty);
+	{
+		jw_object_string(&jw, "event", "cmd_start");
+		jw_object_intmax(&jw, "clock_us", (intmax_t)my__start_time);
+		jw_object_intmax(&jw, "pid", (intmax_t)my__pid);
+
+		if (my__command_name && *my__command_name)
+			jw_object_string(&jw, "command", my__command_name);
+		if (my__sub_command_name && *my__sub_command_name)
+			jw_object_string(&jw, "sub_command", my__sub_command_name);
+
+		jw_object_inline_begin_array(&jw, "argv");
+		{
+			int k;
+			for (k = 0; k < my__argv.argc; k++)
+				jw_array_string(&jw, my__argv.argv[k]);
+		}
+		jw_end(&jw);
+	}
+	jw_end(&jw);
+
+	emit_event(&jw, "cmd_start");
+	jw_release(&jw);
+
+	my__wrote_start_event = 1;
+}
+
+static void emit_exit_event(void)
+{
+	struct json_writer jw = JSON_WRITER_INIT;
+	uint64_t atexit_time = getnanotime() / 1000;
+
+	/* close unterminated forms */
+	if (my__errors.json.len)
+		jw_end(&my__errors);
+
+	/* build "cmd_exit" event message */
+	jw_object_begin(&jw, my__is_pretty);
+	{
+		jw_object_string(&jw, "event", "cmd_exit");
+		jw_object_intmax(&jw, "clock_us", (intmax_t)atexit_time);
+		jw_object_intmax(&jw, "pid", (intmax_t)my__pid);
+
+		if (my__command_name && *my__command_name)
+			jw_object_string(&jw, "command", my__command_name);
+		if (my__sub_command_name && *my__sub_command_name)
+			jw_object_string(&jw, "sub_command", my__sub_command_name);
+
+		jw_object_inline_begin_array(&jw, "argv");
+		{
+			int k;
+			for (k = 0; k < my__argv.argc; k++)
+				jw_array_string(&jw, my__argv.argv[k]);
+		}
+		jw_end(&jw);
+
+		jw_object_inline_begin_object(&jw, "result");
+		{
+			jw_object_intmax(&jw, "exit_code", my__exit_code);
+			if (my__errors.json.len)
+				jw_object_sub_jw(&jw, "errors", &my__errors);
+
+			if (my__signal)
+				jw_object_intmax(&jw, "signal", my__signal);
+
+			if (my__exit_time > 0)
+				jw_object_intmax(&jw, "elapsed_core_us",
+						 my__exit_time - my__start_time);
+
+			jw_object_intmax(&jw, "elapsed_total_us",
+					 atexit_time - my__start_time);
+		}
+		jw_end(&jw);
+
+		jw_object_inline_begin_object(&jw, "version");
+		{
+			jw_object_string(&jw, "git", git_version_string);
+			jw_object_intmax(&jw, "slog", SLOG_VERSION);
+		}
+		jw_end(&jw);
+	}
+	jw_end(&jw);
+
+	emit_event(&jw, "cmd_exit");
+	jw_release(&jw);
+}
+
+static int cfg_path(const char *key, const char *value)
+{
+	if (is_absolute_path(value)) {
+		my__log_path = xstrdup(value);
+		my__is_enabled = 1;
+	} else {
+		warning("'%s' must be an absolute path: '%s'",
+			key, value);
+	}
+
+	return 0;
+}
+
+static int cfg_pretty(const char *key, const char *value)
+{
+	my__is_pretty = git_config_bool(key, value);
+	return 0;
+}
+
+int slog_default_config(const char *key, const char *value)
+{
+	const char *sub;
+
+	/*
+	 * git_default_config() calls slog_default_config() with "slog.*"
+	 * k/v pairs.  git_default_config() MAY or MAY NOT be called when
+	 * cmd_<command>() calls git_config().
+	 *
+	 * Remember if we've ever been called.
+	 */
+	my__is_config_loaded = 1;
+
+	if (skip_prefix(key, "slog.", &sub)) {
+		if (!strcmp(sub, "path"))
+			return cfg_path(key, value);
+		if (!strcmp(sub, "pretty"))
+			return cfg_pretty(key, value);
+	}
+
+	return 0;
+}
+
+static int lazy_load_config_cb(const char *key, const char * value, void *data)
+{
+	return slog_default_config(key, value);
+}
+
+/*
+ * If cmd_<command>() did not cause slog_default_config() to be called
+ * during git_config(), we try to lookup our config settings the first
+ * time we actually need them.
+ *
+ * (We do this rather than using read_early_config() at initialization
+ * because we want any "-c key=value" arguments to be included.)
+ */
+static inline void lazy_load_config(void)
+{
+	if (my__is_config_loaded)
+		return;
+	my__is_config_loaded = 1;
+
+	read_early_config(lazy_load_config_cb, NULL);
+}
+
+int slog_is_enabled(void)
+{
+	lazy_load_config();
+
+	return my__is_enabled;
+}
+
+static void do_final_steps(int in_signal)
+{
+	static int completed = 0;
+
+	if (completed)
+		return;
+	completed = 1;
+
+	if (slog_is_enabled()) {
+		if (!my__wrote_start_event)
+			emit_start_event();
+		emit_exit_event();
+		my__is_enabled = 0;
+	}
+
+	if (my__log_fd != -1)
+		close(my__log_fd);
+	free(my__log_path);
+	free(my__command_name);
+	free(my__sub_command_name);
+	argv_array_clear(&my__argv);
+	jw_release(&my__errors);
+}
+
+static void slog_atexit(void)
+{
+	do_final_steps(0);
+}
+
+static void slog_signal(int signo)
+{
+	my__signal = signo;
+
+	do_final_steps(1);
+
+	sigchain_pop(signo);
+	raise(signo);
+}
+
+static void intern_argv(int argc, const char **argv)
+{
+	int k;
+
+	for (k = 0; k < argc; k++)
+		argv_array_push(&my__argv, argv[k]);
+}
+
+/*
+ * Collect basic startup information before cmd_main() has a chance
+ * to alter the command line and before we have seen the config (to
+ * know if logging is enabled).  And since the config isn't loaded
+ * until cmd_main() dispatches to cmd_<command>(), we have to wait
+ * and lazy-write the "cmd_start" event.
+ *
+ * This also implies that commands such as "help" and "version" that
+ * don't need load the config won't generate any log data.
+ */
+static void initialize(int argc, const char **argv)
+{
+	my__start_time = getnanotime() / 1000;
+	my__pid = getpid();
+
+	intern_argv(argc, argv);
+
+	atexit(slog_atexit);
+
+	/*
+	 * Put up backstop signal handler to ensure we get the "cmd_exit"
+	 * event.  This is primarily for when the pager throws SIGPIPE
+	 * when the user quits.
+	 */
+	sigchain_push(SIGPIPE, slog_signal);
+}
+
+int slog_wrap_main(slog_fn_main_t fn_main, int argc, const char **argv)
+{
+	int result;
+
+	initialize(argc, argv);
+	result = fn_main(argc, argv);
+	slog_exit_code(result);
+
+	return result;
+}
+
+void slog_set_command_name(const char *command_name)
+{
+	/*
+	 * Capture the command name even if logging is not enabled
+	 * because we don't know if the config has been loaded yet by
+	 * the cmd_<command>() and/or it may be too early to force a
+	 * lazy load.
+	 */
+	if (my__command_name)
+		free(my__command_name);
+	my__command_name = xstrdup(command_name);
+}
+
+void slog_set_sub_command_name(const char *sub_command_name)
+{
+	/*
+	 * Capture the sub-command name even if logging is not enabled
+	 * because we don't know if the config has been loaded yet by
+	 * the cmd_<command>() and/or it may be too early to force a
+	 * lazy load.
+	 */
+	if (my__sub_command_name)
+		free(my__sub_command_name);
+	my__sub_command_name = xstrdup(sub_command_name);
+}
+
+int slog_is_pretty(void)
+{
+	return my__is_pretty;
+}
+
+int slog_exit_code(int exit_code)
+{
+	my__exit_time = getnanotime() / 1000;
+	my__exit_code = exit_code;
+
+	return exit_code;
+}
+
+void slog_error_message(const char *prefix, const char *fmt, va_list params)
+{
+	struct strbuf em = STRBUF_INIT;
+	va_list copy_params;
+
+	if (prefix && *prefix)
+		strbuf_addstr(&em, prefix);
+
+	va_copy(copy_params, params);
+	strbuf_vaddf(&em, fmt, copy_params);
+	va_end(copy_params);
+
+	if (!my__errors.json.len)
+		jw_array_begin(&my__errors, my__is_pretty);
+	jw_array_string(&my__errors, em.buf);
+	/* leave my__errors array unterminated for now */
+
+	strbuf_release(&em);
+}
+
 #endif
diff --git a/structured-logging.h b/structured-logging.h
index c9e8c1d..61e98e6 100644
--- a/structured-logging.h
+++ b/structured-logging.h
@@ -1,13 +1,95 @@
 #ifndef STRUCTURED_LOGGING_H
 #define STRUCTURED_LOGGING_H
 
+typedef int (*slog_fn_main_t)(int, const char **);
+
 #if !defined(STRUCTURED_LOGGING)
 /*
  * Structured logging is not available.
  * Stub out all API routines.
  */
+#define slog_is_available() (0)
+#define slog_default_config(k, v) (0)
+#define slog_wrap_main(real_cmd_main, argc, argv) ((real_cmd_main)((argc), (argv)))
+#define slog_set_command_name(n) do { } while (0)
+#define slog_set_sub_command_name(n) do { } while (0)
+#define slog_is_enabled() (0)
+#define slog_is_pretty() (0)
+#define slog_exit_code(exit_code) (exit_code)
+#define slog_error_message(prefix, fmt, params) do { } while (0)
 
 #else
 
+/*
+ * Is structured logging available (compiled-in)?
+ */
+#define slog_is_available() (1)
+
+/*
+ * Process "slog.*" config settings.
+ */
+int slog_default_config(const char *key, const char *value);
+
+/*
+ * Wrapper for the "real" cmd_main().  Initialize structured logging if
+ * enabled, run the given real_cmd_main(), and capture the return value.
+ *
+ * Note:  common-main.c is shared by many top-level commands.
+ * common-main.c:main() does common process setup before calling
+ * the version of cmd_main() found in the executable.  Some commands
+ * SHOULD NOT do logging (such as t/helper/test-tool).  Ones that do
+ * need some common initialization/teardown.
+ *
+ * Use this function for any top-level command that should do logging.
+ *
+ * Usage:
+ *
+ * static int real_cmd_main(int argc, const char **argv)
+ * {
+ *     ....the actual code for the command....
+ * }
+ *
+ * int cmd_main(int argc, const char **argv)
+ * {
+ *     return slog_wrap_main(real_cmd_main, argc, argv);
+ * }
+ * 
+ *
+ * See git.c for an example.
+ */ 
+int slog_wrap_main(slog_fn_main_t real_cmd_main, int argc, const char **argv);
+
+/*
+ * Record a canonical command name and optional sub-command name for the
+ * current process.  For example, "checkout" and "switch-branch".
+ */
+void slog_set_command_name(const char *name);
+void slog_set_sub_command_name(const char *name);
+
+/*
+ * Is structured logging enabled?
+ */
+int slog_is_enabled(void);
+
+/*
+ * Is JSON pretty-printing enabled?
+ */
+int slog_is_pretty(void);
+
+/*
+ * Register the process exit code with the structured logging layer
+ * and return it.  This value will appear in the final "cmd_exit" event.
+ *
+ * Use this to wrap all calls to exit().
+ * Use this before returning in main().
+ */
+int slog_exit_code(int exit_code);
+
+/*
+ * Append formatted error message to the structured log result.
+ * Messages from this will appear in the final "cmd_exit" event.
+ */
+void slog_error_message(const char *prefix, const char *fmt, va_list params);
+
 #endif /* STRUCTURED_LOGGING */
 #endif /* STRUCTURED_LOGGING_H */
diff --git a/usage.c b/usage.c
index cc80333..5d48f6b 100644
--- a/usage.c
+++ b/usage.c
@@ -27,12 +27,16 @@ static NORETURN void usage_builtin(const char *err, va_list params)
 
 static NORETURN void die_builtin(const char *err, va_list params)
 {
+	slog_error_message("fatal: ", err, params);
+
 	vreportf("fatal: ", err, params);
 	exit(128);
 }
 
 static void error_builtin(const char *err, va_list params)
 {
+	slog_error_message("error: ", err, params);
+
 	vreportf("error: ", err, params);
 }
 
-- 
2.9.3


  parent reply	other threads:[~2018-07-13 16:57 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-07-13 16:55 [PATCH v1 00/25] RFC: structured logging git
2018-07-13 16:55 ` [PATCH v1 01/25] structured-logging: design document git
2018-07-14  8:34   ` Simon Ruderich
2018-08-03 15:26   ` Ben Peart
2018-08-09 14:30     ` Jeff Hostetler
2018-08-21  4:47   ` Jonathan Nieder
2018-07-13 16:55 ` [PATCH v1 02/25] structured-logging: add STRUCTURED_LOGGING=1 to Makefile git
2018-08-21  4:49   ` Jonathan Nieder
2018-07-13 16:55 ` git [this message]
2018-07-26  9:09   ` [PATCH v1 03/25] structured-logging: add structured logging framework SZEDER Gábor
2018-07-27 12:45     ` Jeff Hostetler
2018-08-21  5:05   ` Jonathan Nieder
2018-07-13 16:56 ` [PATCH v1 04/25] structured-logging: add session-id to log events git
2018-07-13 16:56 ` [PATCH v1 05/25] structured-logging: set sub_command field for branch command git
2018-07-13 16:56 ` [PATCH v1 06/25] structured-logging: set sub_command field for checkout command git
2018-07-13 16:56 ` [PATCH v1 07/25] structured-logging: t0420 basic tests git
2018-07-13 16:56 ` [PATCH v1 08/25] structured-logging: add detail-event facility git
2018-07-13 16:56 ` [PATCH v1 09/25] structured-logging: add detail-event for lazy_init_name_hash git
2018-07-13 16:56 ` [PATCH v1 10/25] structured-logging: add timer facility git
2018-07-13 16:56 ` [PATCH v1 11/25] structured-logging: add timer around do_read_index git
2018-07-13 16:56 ` [PATCH v1 12/25] structured-logging: add timer around do_write_index git
2018-07-13 16:56 ` [PATCH v1 13/25] structured-logging: add timer around wt-status functions git
2018-07-13 16:56 ` [PATCH v1 14/25] structured-logging: add timer around preload_index git
2018-07-13 16:56 ` [PATCH v1 15/25] structured-logging: t0420 tests for timers git
2018-07-13 16:56 ` [PATCH v1 16/25] structured-logging: add aux-data facility git
2018-07-13 16:56 ` [PATCH v1 17/25] structured-logging: add aux-data for index size git
2018-07-13 16:56 ` [PATCH v1 18/25] structured-logging: add aux-data for size of sparse-checkout file git
2018-07-13 16:56 ` [PATCH v1 19/25] structured-logging: t0420 tests for aux-data git
2018-07-13 16:56 ` [PATCH v1 20/25] structured-logging: add structured logging to remote-curl git
2018-07-13 16:56 ` [PATCH v1 21/25] structured-logging: add detail-events for child processes git
2018-07-13 16:56 ` [PATCH v1 22/25] structured-logging: add child process classification git
2018-07-13 16:56 ` [PATCH v1 23/25] structured-logging: t0420 tests for child process detail events git
2018-07-13 16:56 ` [PATCH v1 24/25] structured-logging: t0420 tests for interacitve child_summary git
2018-07-13 16:56 ` [PATCH v1 25/25] structured-logging: add config data facility git
2018-07-13 18:51 ` [PATCH v1 00/25] RFC: structured logging David Lang
2018-07-16 13:29   ` Jeff Hostetler
2018-08-28 17:38 ` Junio C Hamano
2018-08-28 18:47   ` Jeff Hostetler

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180713165621.52017-4-git@jeffhostetler.com \
    --to=git@jeffhostetler.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jeffhost@microsoft.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.