xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
From: Ronald Rojas <ronladred@gmail.com>
To: xen-devel <xen-devel@lists.xen.org>,
	Ian Jackson <ian.jackson@eu.citrix.com>,
	Wei Liu <wei.liu2@citrix.com>,
	George Dunlap <george.dunlap@citrix.com>,
	George Dunlap <dunlapg@umich.edu>
Subject: [PATCH RFC 28/59] controller: Handle worker early death
Date: Wed, 28 Dec 2016 20:14:21 -0500	[thread overview]
Message-ID: <1482974092-15891-28-git-send-email-ronladred@gmail.com> (raw)
In-Reply-To: <1482974092-15891-1-git-send-email-ronladred@gmail.com>

From: George Dunlap <george.dunlap@citrix.com>

Log raw worker output.  In the event of an unexpected worker death,
dump the output and stop further processing.

Also fix an assert that caused workers to die if the timer was too
exact.

Signed-off-by: George Dunlap <george.dunlap@citrix.com>
---
 benchmark.go     |  7 -------
 processworker.go | 18 ++++++++++++++++--
 run.go           | 24 ++++++++++++++++++------
 xenworker.go     | 21 +++++++++++++++++++--
 4 files changed, 53 insertions(+), 17 deletions(-)

diff --git a/benchmark.go b/benchmark.go
index 5e35997..de1f650 100644
--- a/benchmark.go
+++ b/benchmark.go
@@ -74,13 +74,6 @@ type WorkerSet struct {
 	Count int
 }
 
-type Worker interface {
-	SetId(WorkerId)
-	Init(WorkerParams, WorkerConfig) error
-	Shutdown()
-	Process(chan WorkerReport, chan bool)
-}
-
 const (
 	USEC = 1000
 	MSEC = USEC * 1000
diff --git a/processworker.go b/processworker.go
index f517321..999e76a 100644
--- a/processworker.go
+++ b/processworker.go
@@ -32,6 +32,7 @@ type ProcessWorker struct {
 	c *exec.Cmd
 	stdout io.ReadCloser
 	jsonStarted bool
+	Log []string
 }
 
 func (w *ProcessWorker) SetId(i WorkerId) {
@@ -54,7 +55,19 @@ func (w *ProcessWorker) Shutdown() {
 	w.c.Process.Kill()
 }
 
-func (w *ProcessWorker) Process(report chan WorkerReport, done chan bool) {
+func (w *ProcessWorker) DumpLog(f io.Writer) (err error) {
+	b := bufio.NewWriter(f)
+	defer b.Flush()
+	for _, line := range w.Log {
+		_, err = fmt.Println(b, line)
+		if err != nil {
+			return
+		}
+	}
+	return
+}
+
+func (w *ProcessWorker) Process(report chan WorkerReport, done chan WorkerId) {
 	w.c.Start()
 
 	scanner := bufio.NewScanner(w.stdout)
@@ -63,6 +76,7 @@ func (w *ProcessWorker) Process(report chan WorkerReport, done chan bool) {
 		s := scanner.Text()
 		
 		//fmt.Println("Got these bytes: ", s);
+		w.Log = append(w.Log, s)
 
 		if w.jsonStarted {
 			var r WorkerReport
@@ -77,7 +91,7 @@ func (w *ProcessWorker) Process(report chan WorkerReport, done chan bool) {
 		}
 	}
 
-	done <- true
+	done <- w.id
 
 	w.c.Wait()
 }
diff --git a/run.go b/run.go
index ed1957b..1b39730 100644
--- a/run.go
+++ b/run.go
@@ -26,6 +26,7 @@ import (
 	"regexp"
 	"strconv"
 	"bufio"
+	"io"
 )
 
 type WorkerState struct {
@@ -33,6 +34,14 @@ type WorkerState struct {
 	LastReport WorkerReport
 }
 
+type Worker interface {
+	SetId(WorkerId)
+	Init(WorkerParams, WorkerConfig) error
+	Shutdown()
+	Process(chan WorkerReport, chan WorkerId)
+	DumpLog(io.Writer) error
+}
+
 func Report(ws *WorkerState, r WorkerReport) {
 	//fmt.Println(r)
 
@@ -57,7 +66,7 @@ func Report(ws *WorkerState, r WorkerReport) {
 
 type WorkerList map[WorkerId]*WorkerState
 
-func (ws *WorkerList) Start(report chan WorkerReport, done chan bool) (i int) {
+func (ws *WorkerList) Start(report chan WorkerReport, done chan WorkerId) (i int) {
 	i = 0
 	for j := range *ws {
 		go (*ws)[j].w.Process(report, done)
@@ -160,7 +169,7 @@ func (run *BenchmarkRun) Run() (err error) {
 	}
 	
 	report := make(chan WorkerReport)
-	done := make(chan bool)
+	done := make(chan WorkerId)
 	signals := make(chan os.Signal, 1)
 
 	signal.Notify(signals, os.Interrupt)
@@ -179,12 +188,16 @@ func (run *BenchmarkRun) Run() (err error) {
 				run.Results.Raw = append(run.Results.Raw, r)
 				Report(Workers[r.Id], r)
 			}
-		case <-done:
+		case did := <-done:
 			if ! stopped {
-				fmt.Println("WARNING: Worker left early")
+				fmt.Println("WARNING: Worker", did, "left early, shutting down workers")
+				Workers.Stop()
+				stopped = true
+				err = fmt.Errorf("Worker %v exited early", did)
+				Workers[did].w.DumpLog(os.Stdout)
 			}
 			i--;
-			fmt.Println(i, "workers left");
+			fmt.Printf("Worker %v exited; %d workers left\n", did, i);
 		case <-timeout:
 			if ! stopped {
 				Workers.Stop()
@@ -201,7 +214,6 @@ func (run *BenchmarkRun) Run() (err error) {
 				}
 				err = fmt.Errorf("Interrupted")
 			} else {
-				err = fmt.Errorf("Interrupted")
 				fmt.Println("SIGINT received after stop, exiting without cleaning up")
 				return
 			}
diff --git a/xenworker.go b/xenworker.go
index e98c970..45e0876 100644
--- a/xenworker.go
+++ b/xenworker.go
@@ -42,6 +42,7 @@ type XenWorker struct {
 	consoleCmd *exec.Cmd
 	console io.ReadCloser
 	jsonStarted bool
+	Log []string
 }
 
 // We have to capitalize the element names so that the json class can
@@ -231,8 +232,22 @@ func (w *XenWorker) Shutdown() {
 	}
 }
 
+func (w *XenWorker) DumpLog(f io.Writer) (err error) {
+	b := bufio.NewWriter(f)
+	defer b.Flush()
+	for _, line := range w.Log {
+		_, err = fmt.Fprintln(b, line)
+		if err != nil {
+			return
+		}
+	}
+	return
+}
+
+
+
 // FIXME: Return an error
-func (w *XenWorker) Process(report chan WorkerReport, done chan bool) {
+func (w *XenWorker) Process(report chan WorkerReport, done chan WorkerId) {
 	// // xl unpause [vmname]
 	err := xg.Ctx.DomainUnpause(Domid(w.domid))
 	if err != nil {
@@ -244,6 +259,8 @@ func (w *XenWorker) Process(report chan WorkerReport, done chan bool) {
 
 	for scanner.Scan() {
 		s := scanner.Text()
+
+		w.Log = append(w.Log, s)
 		
 		//fmt.Println("Got these bytes: ", s);
 
@@ -265,7 +282,7 @@ func (w *XenWorker) Process(report chan WorkerReport, done chan bool) {
 		}
 	}
 
-	done <- true
+	done <- w.id
 
 	w.consoleCmd.Wait()
 }
-- 
2.7.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  parent reply	other threads:[~2016-12-29  1:14 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-12-29  1:13 [PATCH RFC 01/59] Initial controller framework Ronald Rojas
2016-12-29  1:13 ` [PATCH RFC 02/59] controller: Revamp communication structure Ronald Rojas
2016-12-29  1:13 ` [PATCH RFC 03/59] controller: Initial attempt to generalize process / vm creation Ronald Rojas
2016-12-29  1:13 ` [PATCH RFC 04/59] Controller: Move process worker into its own file Ronald Rojas
2016-12-29  1:13 ` [PATCH RFC 05/59] controller: Add WorkerParams argument to Init in Worker interface Ronald Rojas
2016-12-29  1:13 ` [PATCH RFC 06/59] Reorganize to enable "Dist" directory Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 07/59] controller: Introduce basic Xen functionality Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 08/59] controller: Exit after second SIGINT Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 09/59] controller: Refactor creation and stopping of workers into WorkerList methods Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 10/59] controller: First cut at BenchmarkParams Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 11/59] Refactor to move towards benchmark "plans" and data analysis Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 12/59] Basic 'report' functionality Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 13/59] Add GPL headers / COPYING file (v2 only) Ronald Rojas
2016-12-29 10:51   ` Wei Liu
2016-12-29  1:14 ` [PATCH RFC 14/59] benchmark: Store data in terms of worker sets and worker ids Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 15/59] controller: Move "running" code to a separate file Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 16/59] controller: Rename an element in BenchmarkRun to be more accurate Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 17/59] controller: Collect and display statistics on WorkerSets Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 18/59] controller: Add cpupool global config Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 19/59] Add basic libxl framework, get domain cpu_time Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 20/59] xenworker: Use libxl_domain_unpause rather than forking xl Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 21/59] Report utilization statistics Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 22/59] Use tsc for time rather than rumpkernel clock_gettime() Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 23/59] run: Don't collect results reported after command to stop guests is issued Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 24/59] report: Lots of changes Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 25/59] main: Change default workload to something a bit more extreme Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 26/59] Use kops rather than mops Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 27/59] report: Allow report verbosity to be specified Ronald Rojas
2016-12-29  1:14 ` Ronald Rojas [this message]
2016-12-29  1:14 ` [PATCH RFC 29/59] report: Add basic html report Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 30/59] htmlreport: Include utilization scatterplots Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 31/59] Make a binary that can run reports on a system without libxl Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 32/59] controller: Allow specification of an input file Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 33/59] controller: Add verbosity argument and update README with new instructions Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 34/59] controller: Make a useful config file Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 35/59] libxl: Add ListCpupool Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 36/59] controller: Make 'dummy' at the level of 'run' rather than xenworker Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 37/59] libxl.go: Provide a single global context by default Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 38/59] controller: Allow multiple schedulers in the same benchmark file Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 39/59] libxl.go: Put common link flags in libxl.go Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 40/59] controller: Add / update GPL text Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 41/59] libxl.go: Link statically rather than dynamically Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 42/59] plan: Allow "templating" from other runs Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 43/59] libxl: Add bitmap support Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 44/59] libxl: Implement CpupoolCreate Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 45/59] libxl: Implement Destroy, Add/Remove operations Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 46/59] libxl: Reorganize bitmapGotoC Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 47/59] libxl: Reorganize code Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 48/59] libxl: Add Ctx.CheckOpen Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 49/59] libxl: Implement libxl_cpupool_info and Scheduler.FromString() Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 50/59] libxl: Fix Bitmap.Max(), make Test() / Clear() more robust Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 51/59] controller: Make and/or modify cpupools when possible Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 52/59] libxl: Implement Bitmap.String() Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 53/59] controller: Add WorkerConfig.SoftAffinity Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 54/59] controller/run: Add RunConfig.NumaDisable Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 55/59] plan: Make the matrix generation more programmatic Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 56/59] controller/plan: Add NumaDisable to SimpleMatrix Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 57/59] tools/blktap2: remove unused inclusion of sys/sysctl.l Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 58/59] remove irrelevant files from old repository Ronald Rojas
2016-12-29  1:14 ` [PATCH RFC 59/59] tools/xenlight: Create interface for xenlight info Ronald Rojas
2016-12-29 10:34   ` George Dunlap
2016-12-29 10:52     ` Wei Liu
2016-12-29 13:49       ` Ronald Rojas
2016-12-29 13:45   ` George Dunlap
2016-12-29 20:20     ` George Dunlap
2017-01-03 17:45     ` Ronald Rojas
2017-01-04 16:44       ` George Dunlap

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1482974092-15891-28-git-send-email-ronladred@gmail.com \
    --to=ronladred@gmail.com \
    --cc=dunlapg@umich.edu \
    --cc=george.dunlap@citrix.com \
    --cc=ian.jackson@eu.citrix.com \
    --cc=wei.liu2@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).