All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
To: Steven Rostedt <rostedt@goodmis.org>
Cc: Hidehiro Kawai <hidehiro.kawai.ez@hitachi.com>,
	Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>,
	linux-kernel@vger.kernel.org, yrl.pp-manager.tt@hitachi.com
Subject: [PATCH V2 5/5] trace-cmd: Add --virt option for record mode
Date: Fri, 13 Sep 2013 11:06:40 +0900	[thread overview]
Message-ID: <20130913020640.28927.12667.stgit@yunodevel> (raw)
In-Reply-To: <20130913020627.28927.69090.stgit@yunodevel>

Add --virt option for record mode for a virtualization environment.
If we use this option on a guest, we can send trace data in low-overhead.
This is because guests can send trace data to a host without copying the data
by using splice(2).

The format is:

   trace-cmd record --virt -e sched*

<Note>
The client using virtio-serial does not wait for the connection message
"tracecmd" from the server. The client sends the connection message
MSG_TCONNECT first.

<Restriction>
This feature can use from kernel-3.6 which supports splice_read for ftrace
and splice_write for virtio-serial.

Signed-off-by: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
---
 Documentation/trace-cmd-record.1.txt |   11 ++++-
 trace-cmd.h                          |    3 +
 trace-msg.c                          |   80 ++++++++++++++++++++++++++++++++--
 trace-msg.h                          |    4 ++
 trace-record.c                       |   70 ++++++++++++++++++++++++++++--
 5 files changed, 156 insertions(+), 12 deletions(-)

diff --git a/Documentation/trace-cmd-record.1.txt b/Documentation/trace-cmd-record.1.txt
index 832a257..7eb8ac9 100644
--- a/Documentation/trace-cmd-record.1.txt
+++ b/Documentation/trace-cmd-record.1.txt
@@ -240,6 +240,15 @@ OPTIONS
     timestamp to gettimeofday which will allow wall time output from the
     timestamps reading the created 'trace.dat' file.
 
+*--virt*::
+    This option is usded on a guest in a virtualization environment. If a host
+    is running "trace-cmd virt-server", this option is used to have the data
+    sent to the host with virtio-serial like *-N* option. (see also
+    trace-cmd-virt-server(1))
+
+    Note: This option is not supported with latency tracer plugins:
+      wakeup, wakeup_rt, irqsoff, preemptoff and preemptirqsoff
+
 EXAMPLES
 --------
 
@@ -302,7 +311,7 @@ SEE ALSO
 --------
 trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1),
 trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1),
-trace-cmd-list(1), trace-cmd-listen(1)
+trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-virt-server(1)
 
 AUTHOR
 ------
diff --git a/trace-cmd.h b/trace-cmd.h
index ce3df2c..d69ea2e 100644
--- a/trace-cmd.h
+++ b/trace-cmd.h
@@ -250,7 +250,8 @@ void tracecmd_stat_cpu(struct trace_seq *s, int cpu);
 long tracecmd_flush_recording(struct tracecmd_recorder *recorder);
 
 /* for clients */
-int tracecmd_msg_send_init_data(int fd);
+int tracecmd_msg_connect_to_server(int fd);
+int tracecmd_msg_send_init_data_nw(int fd);
 int tracecmd_msg_metadata_send(int fd, char *buf, int size);
 int tracecmd_msg_finish_sending_metadata(int fd);
 void tracecmd_msg_send_close_msg();
diff --git a/trace-msg.c b/trace-msg.c
index 0b3b356..4de1cf3 100644
--- a/trace-msg.c
+++ b/trace-msg.c
@@ -30,6 +30,7 @@
 #include <stdio.h>
 #include <unistd.h>
 #include <arpa/inet.h>
+#include <sys/stat.h>
 #include <sys/types.h>
 #include <linux/types.h>
 
@@ -72,6 +73,7 @@ int cpu_count;
 static int psfd;
 unsigned int page_size;
 int *client_ports;
+int *virt_sfds;
 bool send_metadata;
 
 /* for server */
@@ -268,12 +270,20 @@ static int make_rinit(struct tracecmd_msg *msg)
 	return 0;
 }
 
+static int make_error_msg(u32 len, struct tracecmd_msg *msg)
+{
+	bufcpy(msg, TRACECMD_MSG_HDR_LEN, errmsg, len);
+	return 0;
+}
+
 static u32 tracecmd_msg_get_body_length(u32 cmd)
 {
 	struct tracecmd_msg *msg;
 	u32 len = 0;
 
 	switch (cmd) {
+	case MSG_ERROR:
+		return ntohl(errmsg->size);
 	case MSG_RCONNECT:
 		return sizeof(msg->data.rconnect.str.size) + CONNECTION_MSGSIZE;
 	case MSG_TINIT:
@@ -302,6 +312,7 @@ static u32 tracecmd_msg_get_body_length(u32 cmd)
 		       + sizeof(msg->data.rinit.port_array);
 	case MSG_SENDMETA:
 		return TRACECMD_MSG_MAX_LEN - TRACECMD_MSG_HDR_LEN;
+	case MSG_TCONNECT:
 	case MSG_CLOSE:
 	case MSG_FINMETA:
 		break;
@@ -313,12 +324,15 @@ static u32 tracecmd_msg_get_body_length(u32 cmd)
 static int tracecmd_msg_make_body(u32 cmd, u32 len, struct tracecmd_msg *msg)
 {
 	switch (cmd) {
+	case MSG_ERROR:
+		return make_error_msg(len, msg);
 	case MSG_RCONNECT:
 		return make_rconnect(CONNECTION_MSG, CONNECTION_MSGSIZE, msg);
 	case MSG_TINIT:
 		return make_tinit(len, msg);
 	case MSG_RINIT:
 		return make_rinit(msg);
+	case MSG_TCONNECT:
 	case MSG_CLOSE:
 	case MSG_SENDMETA: /* meta data is not stored here. */
 	case MSG_FINMETA:
@@ -374,6 +388,12 @@ static int tracecmd_msg_send(int fd, u32 cmd)
 	return 0;
 }
 
+static void tracecmd_msg_send_error(int fd, struct tracecmd_msg *msg)
+{
+	errmsg = msg;
+ 	tracecmd_msg_send(fd, MSG_ERROR);
+}
+
 static int tracecmd_msg_read_extra(int fd, char *buf, u32 size, int *n)
 {
 	int r = 0;
@@ -502,20 +522,36 @@ static int tracecmd_msg_send_and_wait_for_msg(int fd, u32 cmd, struct tracecmd_m
 	return 0;
 }
 
-int tracecmd_msg_send_init_data(int fd)
+static int tracecmd_msg_send_init_data(int fd, bool nw)
 {
-	struct tracecmd_msg *msg;
+	struct tracecmd_msg *msg = NULL;
 	int i, cpus;
 	int ret;
+	char buf[PATH_MAX];
 
 	ret = tracecmd_msg_send_and_wait_for_msg(fd, MSG_TINIT, &msg);
 	if (ret < 0)
 		return ret;
 
 	cpus = ntohl(msg->data.rinit.cpus);
-	client_ports = malloc_or_die(sizeof(int) * cpus);
-	for (i = 0; i < cpus; i++)
-		client_ports[i] = ntohl(msg->data.rinit.port_array[i]);
+	if (nw) {
+		client_ports = malloc_or_die(sizeof(int) * cpus);
+		for (i = 0; i < cpus; i++)
+			client_ports[i] =
+					ntohl(msg->data.rinit.port_array[i]);
+	} else {
+		virt_sfds = malloc_or_die(sizeof(int) * cpus);
+
+		/* Open data paths of virtio-serial */
+		for (i = 0; i < cpus; i++) {
+			snprintf(buf, PATH_MAX, TRACE_PATH_CPU, i);
+			virt_sfds[i] = open(buf, O_WRONLY);
+			if (virt_sfds[i] < 0) {
+				warning("Cannot open %s", TRACE_PATH_CPU, i);
+				return -errno;
+			}
+		}
+	}
 
 	/* Next, send meta data */
 	send_metadata = true;
@@ -523,6 +559,40 @@ int tracecmd_msg_send_init_data(int fd)
 	return 0;
 }
 
+int tracecmd_msg_send_init_data_nw(int fd)
+{
+	return tracecmd_msg_send_init_data(fd, true);
+}
+
+static int tracecmd_msg_send_init_data_virt(int fd)
+{
+	return tracecmd_msg_send_init_data(fd, false);
+}
+
+int tracecmd_msg_connect_to_server(int fd)
+{
+	struct tracecmd_msg *msg = NULL;
+	int ret;
+
+	/* connect to a server */
+	ret = tracecmd_msg_send_and_wait_for_msg(fd, MSG_TCONNECT, &msg);
+	if (ret < 0) {
+		if (ret == -EPROTONOSUPPORT)
+			goto error;
+		return ret;
+	}
+
+	ret = tracecmd_msg_send_init_data_virt(fd);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+
+error:
+	tracecmd_msg_send_error(fd, msg);
+	return ret;
+}
+
 static bool process_option(struct tracecmd_msg_opt *opt)
 {
 	/* currently the only option we have is to us TCP */
diff --git a/trace-msg.h b/trace-msg.h
index b23e72b..502c1bf 100644
--- a/trace-msg.h
+++ b/trace-msg.h
@@ -2,6 +2,9 @@
 #define _TRACE_MSG_H_
 
 #include <stdbool.h>
+#define VIRTIO_PORTS	"/dev/virtio-ports/"
+#define AGENT_CTL_PATH	VIRTIO_PORTS "agent-ctl-path"
+#define TRACE_PATH_CPU	VIRTIO_PORTS "trace-path-cpu%d"
 
 #define UDP_MAX_PACKET	(65536 - 20)
 #define V2_MAGIC	"677768\0"
@@ -17,6 +20,7 @@ extern int cpu_count;
 extern unsigned int page_size;
 extern int *client_ports;
 extern bool send_metadata;
+extern int *virt_sfds;
 
 /* for server */
 extern bool done;
diff --git a/trace-record.c b/trace-record.c
index ebfe6c0..1b1d293 100644
--- a/trace-record.c
+++ b/trace-record.c
@@ -80,6 +80,9 @@ static int sfd;
 /* Max size to let a per cpu file get */
 static int max_kb;
 
+struct tracecmd_output *virt_handle;
+static bool virt;
+
 static int do_ptrace;
 
 static int filter_task;
@@ -1578,6 +1581,9 @@ static int create_recorder(struct buffer_instance *instance, int cpu, int extrac
 	if (client_ports) {
 		connect_port(cpu);
 		recorder = tracecmd_create_recorder_fd(client_ports[cpu], cpu, recorder_flags);
+	} else if (virt_sfds) {
+		recorder = tracecmd_create_recorder_fd(virt_sfds[cpu], cpu,
+						       recorder_flags);
 	} else {
 		file = get_temp_file(instance, cpu);
 		recorder = create_recorder_instance(instance, file, cpu);
@@ -1613,7 +1619,7 @@ static void check_first_msg_from_server(int fd)
 		die("server not tracecmd server");
 }
 
-static void communicate_with_listener_v1(int fd)
+static void communicate_with_listener_v1_nw(int fd)
 {
 	char buf[BUFSIZ];
 	ssize_t n;
@@ -1676,9 +1682,9 @@ static void communicate_with_listener_v1(int fd)
 	}
 }
 
-static void communicate_with_listener_v2(int fd)
+static void communicate_with_listener_v2_nw(int fd)
 {
-	if (tracecmd_msg_send_init_data(fd) < 0)
+	if (tracecmd_msg_send_init_data_nw(fd) < 0)
 		die("Cannot communicate with server");
 }
 
@@ -1716,6 +1722,12 @@ static void check_protocol_version(int fd)
 	}
 }
 
+static void communicate_with_listener_virt(int fd)
+{
+	if (tracecmd_msg_connect_to_server(fd) < 0)
+		die("Cannot communicate with server");
+}
+
 static void setup_network(void)
 {
 	struct tracecmd_output *handle;
@@ -1772,11 +1784,11 @@ again:
 			close(sfd);
 			goto again;
 		}
-		communicate_with_listener_v2(sfd);
+		communicate_with_listener_v2_nw(sfd);
 	}
 
 	if (proto_ver == V1_PROTOCOL)
-		communicate_with_listener_v1(sfd);
+		communicate_with_listener_v1_nw(sfd);
 
 	/* Now create the handle through this socket */
 	handle = tracecmd_create_init_fd_glob(sfd, listed_events);
@@ -1787,6 +1799,21 @@ again:
 	/* OK, we are all set, let'r rip! */
 }
 
+static void setup_virtio(void)
+{
+	int fd;
+
+	fd = open(AGENT_CTL_PATH, O_RDWR);
+	if (fd < 0)
+		die("Cannot open %s", AGENT_CTL_PATH);
+
+	communicate_with_listener_virt(fd);
+
+	/* Now create the handle through this socket */
+	virt_handle = tracecmd_create_init_fd_glob(fd, listed_events);
+	tracecmd_msg_finish_sending_metadata(fd);
+}
+
 static void finish_network(void)
 {
 	if (proto_ver == V2_PROTOCOL)
@@ -1795,6 +1822,13 @@ static void finish_network(void)
 	free(host);
 }
 
+static void finish_virt(void)
+{
+	tracecmd_msg_send_close_msg();
+	free(virt_handle);
+	free(virt_sfds);
+}
+
 static void start_threads(void)
 {
 	struct buffer_instance *instance;
@@ -1802,6 +1836,8 @@ static void start_threads(void)
 
 	if (host)
 		setup_network();
+	else if (virt)
+		setup_virtio();
 
 	/* make a thread for every CPU we have */
 	pids = malloc_or_die(sizeof(*pids) * cpu_count * (buffers + 1));
@@ -1846,6 +1882,9 @@ static void record_data(char *date2ts, struct trace_seq *s)
 	if (host) {
 		finish_network();
 		return;
+	} else if (virt) {
+		finish_virt();
+		return;
 	}
 
 	if (latency)
@@ -2337,6 +2376,7 @@ static void record_all_events(void)
 }
 
 enum {
+	OPT_virt	= 252,
 	OPT_nosplice	= 253,
 	OPT_funcstack	= 254,
 	OPT_date	= 255,
@@ -2408,6 +2448,7 @@ void trace_record (int argc, char **argv)
 			{"date", no_argument, NULL, OPT_date},
 			{"func-stack", no_argument, NULL, OPT_funcstack},
 			{"nosplice", no_argument, NULL, OPT_nosplice},
+			{"virt", no_argument, NULL, OPT_virt},
 			{"help", no_argument, NULL, '?'},
 			{NULL, 0, NULL, 0}
 		};
@@ -2519,6 +2560,8 @@ void trace_record (int argc, char **argv)
 		case 'o':
 			if (host)
 				die("-o incompatible with -N");
+			if (virt)
+				die("-o incompatible with --virt");
 			if (!record && !extract)
 				die("start does not take output\n"
 				    "Did you mean 'record'?");
@@ -2550,6 +2593,8 @@ void trace_record (int argc, char **argv)
 		case 'N':
 			if (!record)
 				die("-N only available with record");
+			if (virt)
+				die("-N incompatible with --virt");
 			if (output)
 				die("-N incompatible with -o");
 			host = optarg;
@@ -2562,6 +2607,8 @@ void trace_record (int argc, char **argv)
 			max_kb = atoi(optarg);
 			break;
 		case 't':
+			if (virt)
+				die("-t incompatible with --virt");
 			use_tcp = 1;
 			break;
 		case 'b':
@@ -2588,6 +2635,17 @@ void trace_record (int argc, char **argv)
 		case OPT_nosplice:
 			recorder_flags |= TRACECMD_RECORD_NOSPLICE;
 			break;
+		case OPT_virt:
+			if (!record)
+				die("--virt only available with record");
+			if (host)
+				die("--virt incompatible with -N");
+			if (output)
+				die("--virt incompatible with -o");
+			if (use_tcp)
+				die("--virt incompatible with -t");
+			virt = true;
+			break;
 		default:
 			usage(argv);
 		}
@@ -2663,6 +2721,8 @@ void trace_record (int argc, char **argv)
 			latency = 1;
 			if (host)
 				die("Network tracing not available with latency tracer plugins");
+			if (virt)
+				die("Virtio-trace not available with latency tracer plugins");
 		}
 		if (fset < 0 && (strcmp(plugin, "function") == 0 ||
 				 strcmp(plugin, "function_graph") == 0))


  parent reply	other threads:[~2013-09-13  2:02 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-09-13  2:06 [PATCH V2 0/5] trace-cmd: Support the feature recording trace data of guests on the host Yoshihiro YUNOMAE
2013-09-13  2:06 ` [PATCH V2 1/5] [CLEANUP] trace-cmd: Split out binding a port and fork reader from open_udp() Yoshihiro YUNOMAE
2013-09-13  2:06 ` [PATCH V2 2/5] trace-cmd: Apply the trace-msg protocol for communication between a server and clients Yoshihiro YUNOMAE
2013-10-15  2:21   ` Steven Rostedt
2013-10-17  6:34     ` Yoshihiro YUNOMAE
2013-10-17 21:21       ` Steven Rostedt
2013-10-18  2:19   ` Steven Rostedt
2013-10-22  8:53     ` Yoshihiro YUNOMAE
2013-09-13  2:06 ` [PATCH V2 3/5] trace-cmd: Use poll(2) to wait for a message Yoshihiro YUNOMAE
2013-09-13  2:06 ` [PATCH V2 4/5] trace-cmd: Add virt-server mode for a virtualization environment Yoshihiro YUNOMAE
2013-10-18  2:32   ` Steven Rostedt
2013-10-22  8:55     ` Yoshihiro YUNOMAE
2013-09-13  2:06 ` Yoshihiro YUNOMAE [this message]
2013-10-11  1:39 ` [PATCH V2 0/5] trace-cmd: Support the feature recording trace data of guests on the host Yoshihiro YUNOMAE
2013-10-11  1:46   ` Steven Rostedt
2013-10-14 21:26 ` Steven Rostedt
2013-10-17  6:32   ` Yoshihiro YUNOMAE
2013-10-17 21:11     ` Steven Rostedt
2013-10-18 15:06 ` Steven Rostedt
2013-10-22  8:53   ` Yoshihiro YUNOMAE

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130913020640.28927.12667.stgit@yunodevel \
    --to=yoshihiro.yunomae.ez@hitachi.com \
    --cc=hidehiro.kawai.ez@hitachi.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=masami.hiramatsu.pt@hitachi.com \
    --cc=rostedt@goodmis.org \
    --cc=yrl.pp-manager.tt@hitachi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.