[PATCH 0/4] run-command: new check

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/4] run-command: new check_command helper
@ 2013-04-01 21:46 Felipe Contreras
  2013-04-01 21:46 ` [PATCH 1/4] run-command: add " Felipe Contreras
                   ` (3 more replies)
  0 siblings, 4 replies; 21+ messages in thread
From: Felipe Contreras @ 2013-04-01 21:46 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner,
	Felipe Contreras

Hi,

The first patch does all the work, the second patch uses it; basically, this is
needed so the transport-helper code is able to check if the remote-helper child
is stilll running. Without this support, the status of the remote-helper files
and configuration can end up very badly when errors occur, to the point where
the user is unable to use it any more.

The rest of the patches are for testing purposes only. I ran all the tests with
these, and I didn't see any problems.

Cheers.

Felipe Contreras (4):
  run-command: add new check_command helper
  transport-helper: check if remote helper is alive
  tmp: remote-helper: add timers to catch errors
  tmp: run-command: code to exercise check_command

 git-remote-testgit        | 12 +++++++++++
 run-command.c             | 52 +++++++++++++++++++++++++++++++++++++++++------
 run-command.h             |  6 ++++++
 t/t5801-remote-helpers.sh | 19 +++++++++++++++++
 transport-helper.c        | 11 ++++++++++
 5 files changed, 94 insertions(+), 6 deletions(-)

-- 
1.8.2

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 1/4] run-command: add new check_command helper
  2013-04-01 21:46 [PATCH 0/4] run-command: new check_command helper Felipe Contreras
@ 2013-04-01 21:46 ` Felipe Contreras
  2013-04-01 23:23   ` Jeff King
  2013-04-01 21:46 ` [PATCH 2/4] transport-helper: check if remote helper is alive Felipe Contreras
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 21+ messages in thread
From: Felipe Contreras @ 2013-04-01 21:46 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner,
	Felipe Contreras

And persistent_waitpid() to recover the information from the last run.

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
---
 run-command.c | 46 ++++++++++++++++++++++++++++++++++++++++------
 run-command.h |  6 ++++++
 2 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/run-command.c b/run-command.c
index 07e27ff..16833df 100644
--- a/run-command.c
+++ b/run-command.c
@@ -226,14 +226,27 @@ static inline void set_cloexec(int fd)
 		fcntl(fd, F_SETFD, flags | FD_CLOEXEC);
 }
 
-static int wait_or_whine(pid_t pid, const char *argv0)
+static pid_t persistent_waitpid(struct child_process *cmd, pid_t pid, int *stat_loc)
+{
+	if (cmd->last_wait.code) {
+		errno = cmd->last_wait.failed_errno;
+		*stat_loc = cmd->last_wait.status;
+		return errno ? -1 : pid;
+	} else {
+		pid_t waiting;
+		while ((waiting = waitpid(pid, stat_loc, 0)) < 0 && errno == EINTR)
+			;	/* nothing */
+		return waiting;
+	}
+}
+
+static int wait_or_whine(struct child_process *cmd, pid_t pid, const char *argv0)
 {
 	int status, code = -1;
 	pid_t waiting;
 	int failed_errno = 0;
 
-	while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
-		;	/* nothing */
+	waiting = persistent_waitpid(cmd, pid, &status);
 
 	if (waiting < 0) {
 		failed_errno = errno;
@@ -437,7 +450,7 @@ fail_pipe:
 		 * At this point we know that fork() succeeded, but execvp()
 		 * failed. Errors have been reported to our stderr.
 		 */
-		wait_or_whine(cmd->pid, cmd->argv[0]);
+		wait_or_whine(cmd, cmd->pid, cmd->argv[0]);
 		failed_errno = errno;
 		cmd->pid = -1;
 	}
@@ -542,7 +555,7 @@ fail_pipe:
 
 int finish_command(struct child_process *cmd)
 {
-	return wait_or_whine(cmd->pid, cmd->argv[0]);
+	return wait_or_whine(cmd, cmd->pid, cmd->argv[0]);
 }
 
 int run_command(struct child_process *cmd)
@@ -553,6 +566,27 @@ int run_command(struct child_process *cmd)
 	return finish_command(cmd);
 }
 
+int check_command(struct child_process *cmd)
+{
+	int status;
+	pid_t waiting;
+	int failed_errno = 0;
+
+	waiting = waitpid(cmd->pid, &status, WNOHANG);
+
+	if (waiting != cmd->pid)
+		return 1;
+
+	if (waiting < 0)
+		failed_errno = errno;
+
+	cmd->last_wait.code = -1;
+	cmd->last_wait.failed_errno = failed_errno;
+	cmd->last_wait.status = status;
+
+	return 0;
+}
+
 static void prepare_run_command_v_opt(struct child_process *cmd,
 				      const char **argv,
 				      int opt)
@@ -729,7 +763,7 @@ error:
 int finish_async(struct async *async)
 {
 #ifdef NO_PTHREADS
-	return wait_or_whine(async->pid, "child process");
+	return wait_or_whine(cmd, async->pid, "child process");
 #else
 	void *ret = (void *)(intptr_t)(-1);
 
diff --git a/run-command.h b/run-command.h
index 221ce33..66aaac7 100644
--- a/run-command.h
+++ b/run-command.h
@@ -39,11 +39,17 @@ struct child_process {
 	unsigned stdout_to_stderr:1;
 	unsigned use_shell:1;
 	unsigned clean_on_exit:1;
+	struct last_wait {
+		int code;
+		int failed_errno;
+		int status;
+	} last_wait;
 };
 
 int start_command(struct child_process *);
 int finish_command(struct child_process *);
 int run_command(struct child_process *);
+int check_command(struct child_process *);
 
 extern char *find_hook(const char *name);
 extern int run_hook(const char *index_file, const char *name, ...);
-- 
1.8.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/4] run-command: add new check_command helper
  2013-04-01 21:46 ` [PATCH 1/4] run-command: add " Felipe Contreras
@ 2013-04-01 23:23   ` Jeff King
  2013-04-01 23:58     ` Felipe Contreras
  0 siblings, 1 reply; 21+ messages in thread
From: Jeff King @ 2013-04-01 23:23 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 01, 2013 at 03:46:41PM -0600, Felipe Contreras wrote:

> And persistent_waitpid() to recover the information from the last run.
> 
> Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>

A little background would be nice here...what problem are we solving?

> -static int wait_or_whine(pid_t pid, const char *argv0)
> +static pid_t persistent_waitpid(struct child_process *cmd, pid_t pid, int *stat_loc)
> +{
> +	if (cmd->last_wait.code) {
> +		errno = cmd->last_wait.failed_errno;
> +		*stat_loc = cmd->last_wait.status;
> +		return errno ? -1 : pid;
> +	} else {
> +		pid_t waiting;
> +		while ((waiting = waitpid(pid, stat_loc, 0)) < 0 && errno == EINTR)
> +			;	/* nothing */
> +		return waiting;
> +	}
> +}

So it looks we are trying to save the waitpid state from a previous run
and use the saved value. Otherwise, waitpid as normal.

We loop on EINTR when we actually call waitpid(). But we don't check
whether the saved errno is waitpid. What happens if we EINTR during the
saved call to waitpid?

> +static int wait_or_whine(struct child_process *cmd, pid_t pid, const char *argv0)
>  {
>  	int status, code = -1;
>  	pid_t waiting;
>  	int failed_errno = 0;
>  
> -	while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
> -		;	/* nothing */
> +	waiting = persistent_waitpid(cmd, pid, &status);
>  
>  	if (waiting < 0) {
>  		failed_errno = errno;

We now take argv0 into wait_or_whine. But I don't see it being used.
What's it for?

> +int check_command(struct child_process *cmd)
> +{
> +	int status;
> +	pid_t waiting;
> +	int failed_errno = 0;
> +
> +	waiting = waitpid(cmd->pid, &status, WNOHANG);

This might return the pid if it has died, -1 if there was an error, or 0
if the process still exists but hasn't died. So...

> +	if (waiting != cmd->pid)
> +		return 1;
> +
> +	if (waiting < 0)
> +		failed_errno = errno;

How would we ever trigger this second conditional? It makes sense to
return 1 when "waiting == 0", as that is saying "yes, your process is
still running" (though documenting the return either at the top of the
function or in the commit message would be helpful)

But if we get an error from waitpid, we would also return 1, which
doesn't make sense (especially if it is something like EINTR -- I don't
know offhand if we can get EINTR during WNOHANG. It should not block,
but I don't know if it can race with a signal).

> +	cmd->last_wait.code = -1;
> +	cmd->last_wait.failed_errno = failed_errno;
> +	cmd->last_wait.status = status;

Since we can only get here when waiting == cmd->pid, failed_errno is
always 0. We do correctly record the status. Why is code set to -1?  It
seems to be used as a flag to say "this structure is valid". Should it
be defined as "unsigned valid:1;" instead?

-Peff

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/4] run-command: add new check_command helper
  2013-04-01 23:23   ` Jeff King
@ 2013-04-01 23:58     ` Felipe Contreras
  2013-04-02  2:22       ` Jeff King
  0 siblings, 1 reply; 21+ messages in thread
From: Felipe Contreras @ 2013-04-01 23:58 UTC (permalink / raw)
  To: Jeff King
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 1, 2013 at 5:23 PM, Jeff King <peff@peff.net> wrote:
> On Mon, Apr 01, 2013 at 03:46:41PM -0600, Felipe Contreras wrote:

>> -static int wait_or_whine(pid_t pid, const char *argv0)
>> +static pid_t persistent_waitpid(struct child_process *cmd, pid_t pid, int *stat_loc)
>> +{
>> +     if (cmd->last_wait.code) {
>> +             errno = cmd->last_wait.failed_errno;
>> +             *stat_loc = cmd->last_wait.status;
>> +             return errno ? -1 : pid;
>> +     } else {
>> +             pid_t waiting;
>> +             while ((waiting = waitpid(pid, stat_loc, 0)) < 0 && errno == EINTR)
>> +                     ;       /* nothing */
>> +             return waiting;
>> +     }
>> +}
>
> So it looks we are trying to save the waitpid state from a previous run
> and use the saved value. Otherwise, waitpid as normal.
>
> We loop on EINTR when we actually call waitpid(). But we don't check
> whether the saved errno is waitpid. What happens if we EINTR during the
> saved call to waitpid?

Are you saying that even if we have stored the result of a waitpid
command, if errno is EINTR, then we should still loop waitpid()? If
so, I guess this would do the trick:

static pid_t persistent_waitpid(struct child_process *cmd, pid_t pid,
int *stat_loc)
{
	pid_t waiting;

	if (cmd->last_wait.code) {
		errno = cmd->last_wait.failed_errno;
		*stat_loc = cmd->last_wait.status;
		if (errno != EINTR)
			return errno ? -1 : pid;
	}

	while ((waiting = waitpid(pid, stat_loc, 0)) < 0 && errno == EINTR)
		;	/* nothing */
	return waiting;
}

>> +static int wait_or_whine(struct child_process *cmd, pid_t pid, const char *argv0)
>>  {
>>       int status, code = -1;
>>       pid_t waiting;
>>       int failed_errno = 0;
>>
>> -     while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
>> -             ;       /* nothing */
>> +     waiting = persistent_waitpid(cmd, pid, &status);
>>
>>       if (waiting < 0) {
>>               failed_errno = errno;
>
> We now take argv0 into wait_or_whine. But I don't see it being used.
> What's it for?

It was there before:
-static int wait_or_whine(pid_t pid, const char *argv0)
+static int wait_or_whine(struct child_process *cmd, pid_t pid, const
char *argv0)

>> +int check_command(struct child_process *cmd)
>> +{
>> +     int status;
>> +     pid_t waiting;
>> +     int failed_errno = 0;
>> +
>> +     waiting = waitpid(cmd->pid, &status, WNOHANG);
>
> This might return the pid if it has died, -1 if there was an error, or 0
> if the process still exists but hasn't died. So...
>
>> +     if (waiting != cmd->pid)
>> +             return 1;
>> +
>> +     if (waiting < 0)
>> +             failed_errno = errno;
>
> How would we ever trigger this second conditional? It makes sense to
> return 1 when "waiting == 0", as that is saying "yes, your process is
> still running" (though documenting the return either at the top of the
> function or in the commit message would be helpful)
>
> But if we get an error from waitpid, we would also return 1, which
> doesn't make sense (especially if it is something like EINTR -- I don't
> know offhand if we can get EINTR during WNOHANG. It should not block,
> but I don't know if it can race with a signal).

How about this?

if (waiting >= 0 && waiting != cmd->pid)
		return 1;

>> +     cmd->last_wait.code = -1;
>> +     cmd->last_wait.failed_errno = failed_errno;
>> +     cmd->last_wait.status = status;
>
> Since we can only get here when waiting == cmd->pid,

No, also when waiting < 0.

> Why is code set to -1?  It
> seems to be used as a flag to say "this structure is valid". Should it
> be defined as "unsigned valid:1;" instead?

Yeap. I was using it for other purposes before, 'valid' would be better now.

Cheers.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/4] run-command: add new check_command helper
  2013-04-01 23:58     ` Felipe Contreras
@ 2013-04-02  2:22       ` Jeff King
  2013-04-02  5:11         ` Felipe Contreras
  0 siblings, 1 reply; 21+ messages in thread
From: Jeff King @ 2013-04-02  2:22 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 01, 2013 at 05:58:55PM -0600, Felipe Contreras wrote:

> On Mon, Apr 1, 2013 at 5:23 PM, Jeff King <peff@peff.net> wrote:
> > On Mon, Apr 01, 2013 at 03:46:41PM -0600, Felipe Contreras wrote:
> 
> >> -static int wait_or_whine(pid_t pid, const char *argv0)
> >> +static pid_t persistent_waitpid(struct child_process *cmd, pid_t pid, int *stat_loc)
> >> +{
> >> +     if (cmd->last_wait.code) {
> >> +             errno = cmd->last_wait.failed_errno;
> >> +             *stat_loc = cmd->last_wait.status;
> >> +             return errno ? -1 : pid;
> >> +     } else {
> >> +             pid_t waiting;
> >> +             while ((waiting = waitpid(pid, stat_loc, 0)) < 0 && errno == EINTR)
> >> +                     ;       /* nothing */
> >> +             return waiting;
> >> +     }
> >> +}
> >
> > So it looks we are trying to save the waitpid state from a previous run
> > and use the saved value. Otherwise, waitpid as normal.
> >
> > We loop on EINTR when we actually call waitpid(). But we don't check
> > whether the saved errno is waitpid. What happens if we EINTR during the
> > saved call to waitpid?
> 
> Are you saying that even if we have stored the result of a waitpid
> command, if errno is EINTR, then we should still loop waitpid()? If
> so, I guess this would do the trick:

Yes, I think that would work. Though I wonder if it is even worth
storing EINTR at all in the first place. It tells us nothing. In fact,
does storing any error condition really tell us anything? The two states
we are interested in at this point are:

  1. We have reaped the child via waitpid; here is its status.

  2. We have not (either we did not try, it was not dead yet, or we were
     not able to due to an error). We should now try it again.

If we got EINTR the first time around, we would likely get the "real"
answer this time. If we get anything else (like EINVAL or ECHILD), then
we would get the same thing again calling waitpid() later.

> > We now take argv0 into wait_or_whine. But I don't see it being used.
> > What's it for?
> 
> It was there before:
> -static int wait_or_whine(pid_t pid, const char *argv0)
> +static int wait_or_whine(struct child_process *cmd, pid_t pid, const
> char *argv0)

Ah, sorry, I misread the diff. We are adding "cmd", not "argv0".

> >> +     if (waiting != cmd->pid)
> >> +             return 1;
> >> +
> >> +     if (waiting < 0)
> >> +             failed_errno = errno;
> >
> > How would we ever trigger this second conditional?
> [...]
> How about this?
> 
> if (waiting >= 0 && waiting != cmd->pid)
> 		return 1;

That would trigger the rest of your code in the error case, which I
think was your original intent. But then we return "0" from
check_command. Is that right?

There are three states we can be in from calling waitpid:

  1. The process is dead.

  2. The process is not dead.

  3. We could not determine which because waitpid returned an error.

It is clear that check_command is trying to tell its caller (1) or (2);
but what should it say in case of (3)?

Naively, given how patch 2 uses it, I think it would actually make sense
for it to return 1. That is, the semantics are "return 0 if and only if
the pid is verified to be dead; otherwise return 1".

But if we know from reading waitpid(3) that waitpid should only fail due
to EINTR, or due to bogus arguments (e.g., a pid that does not exist or
has already been reaped), then maybe something like this makes sense:

  while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
          ; /* nothing */

  /* pid definitely still going */
  if (!waiting)
          return 1;

  /* pid definitely died */
  if (waiting == cmd->pid) {
          cmd->last_status.valid = 1;
          cmd->last_status.status = status;
          return 0;
  }

  /*
   * this should never happen, since we handed waitpid() a single
   * pid, so it should either return that pid, 0, or an error.
   */
  if (waiting > 0)
          die("BUG: waitpid reported a random pid?");

  /*
   * otherwise, we have an error. Assume the pid is gone, since that
   * is the only reason for waitpid to report a problem besides EINTR.
   * We don't bother recording errno, since we can just repeat
   * the waitpid again later.
   */
   return 0;

> >> +     cmd->last_wait.code = -1;
> >> +     cmd->last_wait.failed_errno = failed_errno;
> >> +     cmd->last_wait.status = status;
> >
> > Since we can only get here when waiting == cmd->pid,
> 
> No, also when waiting < 0.

After the fix above, yes; in the original we would always have exited
already.

As an aside, should check_command be able to be called twice? That is,
should it first check for cmd->last_status.valid and return early if
somebody has already reaped the child? It doesn't matter for the code
you add in patch 2, but it seems like it would give the least surprise
to somebody trying to use it later.

-Peff

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/4] run-command: add new check_command helper
  2013-04-02  2:22       ` Jeff King
@ 2013-04-02  5:11         ` Felipe Contreras
  2013-04-02  5:14           ` Jeff King
  0 siblings, 1 reply; 21+ messages in thread
From: Felipe Contreras @ 2013-04-02  5:11 UTC (permalink / raw)
  To: Jeff King
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 1, 2013 at 8:22 PM, Jeff King <peff@peff.net> wrote:
> On Mon, Apr 01, 2013 at 05:58:55PM -0600, Felipe Contreras wrote:

>> Are you saying that even if we have stored the result of a waitpid
>> command, if errno is EINTR, then we should still loop waitpid()? If
>> so, I guess this would do the trick:
>
> Yes, I think that would work. Though I wonder if it is even worth
> storing EINTR at all in the first place. It tells us nothing. In fact,
> does storing any error condition really tell us anything?

Probably not, I just tried to minimize the potential behavior changes.

> The two states
> we are interested in at this point are:
>
>   1. We have reaped the child via waitpid; here is its status.
>
>   2. We have not (either we did not try, it was not dead yet, or we were
>      not able to due to an error). We should now try it again.
>
> If we got EINTR the first time around, we would likely get the "real"
> answer this time. If we get anything else (like EINVAL or ECHILD), then
> we would get the same thing again calling waitpid() later.
>
>> > We now take argv0 into wait_or_whine. But I don't see it being used.
>> > What's it for?
>>
>> It was there before:
>> -static int wait_or_whine(pid_t pid, const char *argv0)
>> +static int wait_or_whine(struct child_process *cmd, pid_t pid, const
>> char *argv0)
>
> Ah, sorry, I misread the diff. We are adding "cmd", not "argv0".

Yeah, which in fact was already there before.

> That would trigger the rest of your code in the error case, which I
> think was your original intent. But then we return "0" from
> check_command. Is that right?
>
> There are three states we can be in from calling waitpid:
>
>   1. The process is dead.
>
>   2. The process is not dead.
>
>   3. We could not determine which because waitpid returned an error.
>
> It is clear that check_command is trying to tell its caller (1) or (2);
> but what should it say in case of (3)?
>
> Naively, given how patch 2 uses it, I think it would actually make sense
> for it to return 1. That is, the semantics are "return 0 if and only if
> the pid is verified to be dead; otherwise return 1".

I thought if there was an error that constituted a failure.

> But if we know from reading waitpid(3) that waitpid should only fail due
> to EINTR, or due to bogus arguments (e.g., a pid that does not exist or
> has already been reaped), then maybe something like this makes sense:
>
>   while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
>           ; /* nothing */

But we don't want to wait synchronously here, we just want to ping.

>   /* pid definitely still going */
>   if (!waiting)
>           return 1;
>
>   /* pid definitely died */
>   if (waiting == cmd->pid) {
>           cmd->last_status.valid = 1;
>           cmd->last_status.status = status;
>           return 0;
>   }
>
>   /*
>    * this should never happen, since we handed waitpid() a single
>    * pid, so it should either return that pid, 0, or an error.
>    */
>   if (waiting > 0)
>           die("BUG: waitpid reported a random pid?");
>
>   /*
>    * otherwise, we have an error. Assume the pid is gone, since that
>    * is the only reason for waitpid to report a problem besides EINTR.
>    * We don't bother recording errno, since we can just repeat
>    * the waitpid again later.
>    */
>    return 0;

The rest makes sense.

>> >> +     cmd->last_wait.code = -1;
>> >> +     cmd->last_wait.failed_errno = failed_errno;
>> >> +     cmd->last_wait.status = status;
>> >
>> > Since we can only get here when waiting == cmd->pid,
>>
>> No, also when waiting < 0.
>
> After the fix above, yes; in the original we would always have exited
> already.

No:

+       if (waiting != cmd->pid)
+               return 1;

If waiting < 0, waiting != cmd->pid, and therefore this return is not
triggered, and there's only one more return at the end of the
function.

Cheers.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/4] run-command: add new check_command helper
  2013-04-02  5:11         ` Felipe Contreras
@ 2013-04-02  5:14           ` Jeff King
  2013-04-02  5:22             ` Felipe Contreras
  0 siblings, 1 reply; 21+ messages in thread
From: Jeff King @ 2013-04-02  5:14 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 01, 2013 at 11:11:20PM -0600, Felipe Contreras wrote:

> > But if we know from reading waitpid(3) that waitpid should only fail due
> > to EINTR, or due to bogus arguments (e.g., a pid that does not exist or
> > has already been reaped), then maybe something like this makes sense:
> >
> >   while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
> >           ; /* nothing */
> 
> But we don't want to wait synchronously here, we just want to ping.

Yeah, sorry, I forgot the WNOHANG there.

> > After the fix above, yes; in the original we would always have exited
> > already.
> 
> No:
> 
> +       if (waiting != cmd->pid)
> +               return 1;
> 
> If waiting < 0, waiting != cmd->pid, and therefore this return is not
> triggered, and there's only one more return at the end of the
> function.

Are my eyes not working? If waiting < 0, then waiting != cmd->pid, and
therefore this return _is_ triggered.

-Peff

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/4] run-command: add new check_command helper
  2013-04-02  5:14           ` Jeff King
@ 2013-04-02  5:22             ` Felipe Contreras
  2013-04-02  5:26               ` Jeff King
  0 siblings, 1 reply; 21+ messages in thread
From: Felipe Contreras @ 2013-04-02  5:22 UTC (permalink / raw)
  To: Jeff King
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 1, 2013 at 11:14 PM, Jeff King <peff@peff.net> wrote:
> On Mon, Apr 01, 2013 at 11:11:20PM -0600, Felipe Contreras wrote:
>
>> > But if we know from reading waitpid(3) that waitpid should only fail due
>> > to EINTR, or due to bogus arguments (e.g., a pid that does not exist or
>> > has already been reaped), then maybe something like this makes sense:
>> >
>> >   while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
>> >           ; /* nothing */
>>
>> But we don't want to wait synchronously here, we just want to ping.
>
> Yeah, sorry, I forgot the WNOHANG there.

It still can potentially stay in a loop for some cycles.

>> > After the fix above, yes; in the original we would always have exited
>> > already.
>>
>> No:
>>
>> +       if (waiting != cmd->pid)
>> +               return 1;
>>
>> If waiting < 0, waiting != cmd->pid, and therefore this return is not
>> triggered, and there's only one more return at the end of the
>> function.
>
> Are my eyes not working? If waiting < 0, then waiting != cmd->pid, and
> therefore this return _is_ triggered.

Oh, right, it's only after the modification that the code works.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/4] run-command: add new check_command helper
  2013-04-02  5:22             ` Felipe Contreras
@ 2013-04-02  5:26               ` Jeff King
  0 siblings, 0 replies; 21+ messages in thread
From: Jeff King @ 2013-04-02  5:26 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 01, 2013 at 11:22:36PM -0600, Felipe Contreras wrote:

> On Mon, Apr 1, 2013 at 11:14 PM, Jeff King <peff@peff.net> wrote:
> > On Mon, Apr 01, 2013 at 11:11:20PM -0600, Felipe Contreras wrote:
> >
> >> > But if we know from reading waitpid(3) that waitpid should only fail due
> >> > to EINTR, or due to bogus arguments (e.g., a pid that does not exist or
> >> > has already been reaped), then maybe something like this makes sense:
> >> >
> >> >   while ((waiting = waitpid(pid, &status, 0)) < 0 && errno == EINTR)
> >> >           ; /* nothing */
> >>
> >> But we don't want to wait synchronously here, we just want to ping.
> >
> > Yeah, sorry, I forgot the WNOHANG there.
> 
> It still can potentially stay in a loop for some cycles.

That should be OK; it's the same loop we use in wait_or_whine (and that
is in fact how I managed to get the WNOHANG wrong, as I copied the loop
from there but forgot to update the flag variable).  A few cycles is OK,
as it is really about handling a simultaneous signal; it should be rare
that we loop at all, and even rarer to loop more than a single time. On
Linux, I don't think we will ever get EINTR at all, according to the
manpage; however, POSIX seems to allow EINTR even with WNOHANG.

-Peff

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-01 21:46 [PATCH 0/4] run-command: new check_command helper Felipe Contreras
  2013-04-01 21:46 ` [PATCH 1/4] run-command: add " Felipe Contreras
@ 2013-04-01 21:46 ` Felipe Contreras
  2013-04-01 23:33   ` Jeff King
  2013-04-02  0:26   ` Junio C Hamano
  2013-04-01 21:46 ` [PATCH 3/4] tmp: remote-helper: add timers to catch errors Felipe Contreras
  2013-04-01 21:46 ` [PATCH 4/4] tmp: run-command: code to exercise check_command Felipe Contreras
  3 siblings, 2 replies; 21+ messages in thread
From: Felipe Contreras @ 2013-04-01 21:46 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner,
	Felipe Contreras

Otherwise transport-helper will continue checking for refs and other
things what will confuse the user more.
---
 git-remote-testgit        | 11 +++++++++++
 t/t5801-remote-helpers.sh | 19 +++++++++++++++++++
 transport-helper.c        |  8 ++++++++
 3 files changed, 38 insertions(+)

diff --git a/git-remote-testgit b/git-remote-testgit
index b395c8d..ca0cf09 100755
--- a/git-remote-testgit
+++ b/git-remote-testgit
@@ -61,12 +61,23 @@ do
 			echo "feature import-marks=$gitmarks"
 			echo "feature export-marks=$gitmarks"
 		fi
+
+		if test -n "$GIT_REMOTE_TESTGIT_FAILURE"
+		then
+			exit -1
+		fi
+
 		echo "feature done"
 		git fast-export "${testgitmarks_args[@]}" $refs |
 		sed -e "s#refs/heads/#${prefix}/heads/#g"
 		echo "done"
 		;;
 	export)
+		if test -n "$GIT_REMOTE_TESTGIT_FAILURE"
+		then
+			exit -1
+		fi
+
 		before=$(git for-each-ref --format='%(refname) %(objectname)')
 
 		git fast-import "${testgitmarks_args[@]}" --quiet
diff --git a/t/t5801-remote-helpers.sh b/t/t5801-remote-helpers.sh
index f387027..26e9a5b 100755
--- a/t/t5801-remote-helpers.sh
+++ b/t/t5801-remote-helpers.sh
@@ -166,4 +166,23 @@ test_expect_success 'push ref with existing object' '
 	compare_refs local dup server dup
 '
 
+test_expect_success 'proper failure checks for fetching' '
+	(GIT_REMOTE_TESTGIT_FAILURE=1 &&
+	export GIT_REMOTE_TESTGIT_FAILURE &&
+	cd local &&
+	test_must_fail git fetch 2>&1 | \
+		grep "Error while running helper"
+	)
+'
+
+# We sleep to give fast-export a chance to catch the SIGPIPE
+test_expect_failure 'proper failure checks for pushing' '
+	(GIT_REMOTE_TESTGIT_FAILURE=1 &&
+	export GIT_REMOTE_TESTGIT_FAILURE &&
+	cd local &&
+	test_must_fail git push --all 2>&1 | \
+		grep "Error while running helper"
+	)
+'
+
 test_done
diff --git a/transport-helper.c b/transport-helper.c
index cb3ef7d..dfdfa7a 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -460,6 +460,10 @@ static int fetch_with_import(struct transport *transport,
 
 	if (finish_command(&fastimport))
 		die("Error while running fast-import");
+
+	if (!check_command(data->helper))
+		die("Error while running helper");
+
 	argv_array_free_detached(fastimport.argv);
 
 	/*
@@ -818,6 +822,10 @@ static int push_refs_with_export(struct transport *transport,
 
 	if (finish_command(&exporter))
 		die("Error while running fast-export");
+
+	if (!check_command(data->helper))
+		die("Error while running helper");
+
 	push_update_refs_status(data, remote_refs);
 	return 0;
 }
-- 
1.8.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-01 21:46 ` [PATCH 2/4] transport-helper: check if remote helper is alive Felipe Contreras
@ 2013-04-01 23:33   ` Jeff King
  2013-04-02  0:12     ` Felipe Contreras
  2013-04-02  0:26   ` Junio C Hamano
  1 sibling, 1 reply; 21+ messages in thread
From: Jeff King @ 2013-04-01 23:33 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 01, 2013 at 03:46:42PM -0600, Felipe Contreras wrote:

> Otherwise transport-helper will continue checking for refs and other
> things what will confuse the user more.
> [...]
> diff --git a/transport-helper.c b/transport-helper.c
> index cb3ef7d..dfdfa7a 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -460,6 +460,10 @@ static int fetch_with_import(struct transport *transport,
>  
>  	if (finish_command(&fastimport))
>  		die("Error while running fast-import");
> +
> +	if (!check_command(data->helper))
> +		die("Error while running helper");
> +
>  	argv_array_free_detached(fastimport.argv);

Can you be more specific about what happens when we miss the death here,
what happens next, etc?

Checking asynchronously for death like this is subject to a race
condition; the helper may be about to die but not have died yet. In
practice we may catch some cases, but this seems like an indication that
the protocol is not well thought-out. Usually we would wait for a
confirmation over the read pipe from a child, and know that the child
failed when either:

  1. It tells us so on the pipe.

  2. The pipe closes (at which point we know it is time to reap the
     child).

Why doesn't that scheme work here? I am not doubting you that it does
not; the import helper protocol is a bit of a mess, and I can easily
believe it has such a problem. But I'm wondering if it's possible to
improve it in a more robust way.

-Peff

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-01 23:33   ` Jeff King
@ 2013-04-02  0:12     ` Felipe Contreras
  2013-04-02  2:30       ` Jeff King
  0 siblings, 1 reply; 21+ messages in thread
From: Felipe Contreras @ 2013-04-02  0:12 UTC (permalink / raw)
  To: Jeff King
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 1, 2013 at 5:33 PM, Jeff King <peff@peff.net> wrote:
> On Mon, Apr 01, 2013 at 03:46:42PM -0600, Felipe Contreras wrote:
>
>> Otherwise transport-helper will continue checking for refs and other
>> things what will confuse the user more.
>> [...]
>> diff --git a/transport-helper.c b/transport-helper.c
>> index cb3ef7d..dfdfa7a 100644
>> --- a/transport-helper.c
>> +++ b/transport-helper.c
>> @@ -460,6 +460,10 @@ static int fetch_with_import(struct transport *transport,
>>
>>       if (finish_command(&fastimport))
>>               die("Error while running fast-import");
>> +
>> +     if (!check_command(data->helper))
>> +             die("Error while running helper");
>> +
>>       argv_array_free_detached(fastimport.argv);
>
> Can you be more specific about what happens when we miss the death here,
> what happens next, etc?

I have seen problems sporadically, like git trying to update refs to
object that don't exist. I also remember seeing mismatches between the
marks and the remote branches refs.

> Checking asynchronously for death like this is subject to a rac
> condition; the helper may be about to die but not have died yet. In
> practice we may catch some cases, but this seems like an indication that
> the protocol is not well thought-out. Usually we would wait for a
> confirmation over the read pipe from a child, and know that the child
> failed when either:
>
>   1. It tells us so on the pipe.
>
>   2. The pipe closes (at which point we know it is time to reap the
>      child).
>
> Why doesn't that scheme work here? I am not doubting you that it does
> not; the import helper protocol is a bit of a mess, and I can easily
> believe it has such a problem. But I'm wondering if it's possible to
> improve it in a more robust way.

The pipe is between fast-export and the remote-helper, "we"
(transport-helper) are not part of the pipe any more. That's the
problem.

Cheers.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-02  0:12     ` Felipe Contreras
@ 2013-04-02  2:30       ` Jeff King
  2013-04-02  4:51         ` Felipe Contreras
  0 siblings, 1 reply; 21+ messages in thread
From: Jeff King @ 2013-04-02  2:30 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 01, 2013 at 06:12:45PM -0600, Felipe Contreras wrote:

> > Checking asynchronously for death like this is subject to a rac
> > condition; the helper may be about to die but not have died yet. In
> > practice we may catch some cases, but this seems like an indication that
> > the protocol is not well thought-out. Usually we would wait for a
> > confirmation over the read pipe from a child, and know that the child
> > failed when either:
> >
> >   1. It tells us so on the pipe.
> >
> >   2. The pipe closes (at which point we know it is time to reap the
> >      child).
> >
> > Why doesn't that scheme work here? I am not doubting you that it does
> > not; the import helper protocol is a bit of a mess, and I can easily
> > believe it has such a problem. But I'm wondering if it's possible to
> > improve it in a more robust way.
> 
> The pipe is between fast-export and the remote-helper, "we"
> (transport-helper) are not part of the pipe any more. That's the
> problem.

So in fetch_with_import, we have a remote-helper, and we have a
bidirectional pipe to it. We then call get_importer, which starts
fast-import, whose stdin is connected to the stdout of the remote
helper. We tell the remote-helper to run the import, then we wait for
fast-import to finish (and complain if it fails).

Then what? We seem to do some more work, which I think is what causes
the errors you see; but should we instead be reaping the helper at this
point unconditionally? Its stdout has presumably been flushed out to
fast-import; is there anything else for us to get from it besides its
exit code?

-Peff

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-02  2:30       ` Jeff King
@ 2013-04-02  4:51         ` Felipe Contreras
  2013-04-02  5:01           ` Jeff King
  0 siblings, 1 reply; 21+ messages in thread
From: Felipe Contreras @ 2013-04-02  4:51 UTC (permalink / raw)
  To: Jeff King
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 1, 2013 at 8:30 PM, Jeff King <peff@peff.net> wrote:
> On Mon, Apr 01, 2013 at 06:12:45PM -0600, Felipe Contreras wrote:
>
>> > Checking asynchronously for death like this is subject to a rac
>> > condition; the helper may be about to die but not have died yet. In
>> > practice we may catch some cases, but this seems like an indication that
>> > the protocol is not well thought-out. Usually we would wait for a
>> > confirmation over the read pipe from a child, and know that the child
>> > failed when either:
>> >
>> >   1. It tells us so on the pipe.
>> >
>> >   2. The pipe closes (at which point we know it is time to reap the
>> >      child).
>> >
>> > Why doesn't that scheme work here? I am not doubting you that it does
>> > not; the import helper protocol is a bit of a mess, and I can easily
>> > believe it has such a problem. But I'm wondering if it's possible to
>> > improve it in a more robust way.
>>
>> The pipe is between fast-export and the remote-helper, "we"
>> (transport-helper) are not part of the pipe any more. That's the
>> problem.
>
> So in fetch_with_import, we have a remote-helper, and we have a
> bidirectional pipe to it. We then call get_importer, which starts
> fast-import, whose stdin is connected to the stdout of the remote
> helper. We tell the remote-helper to run the import, then we wait for
> fast-import to finish (and complain if it fails).
>
> Then what? We seem to do some more work, which I think is what causes
> the errors you see; but should we instead be reaping the helper at this
> point unconditionally? Its stdout has presumably been flushed out to
> fast-import; is there anything else for us to get from it besides its
> exit code?

The problem is not with import, since fast-import would generally wait
properly for a 'done' status, the problem is with export. Also, the
design is such that the remote-helper stays alive, even after
fast-export has finished.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-02  4:51         ` Felipe Contreras
@ 2013-04-02  5:01           ` Jeff King
  2013-04-02  5:19             ` Felipe Contreras
  0 siblings, 1 reply; 21+ messages in thread
From: Jeff King @ 2013-04-02  5:01 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 01, 2013 at 10:51:20PM -0600, Felipe Contreras wrote:

> > So in fetch_with_import, we have a remote-helper, and we have a
> > bidirectional pipe to it. We then call get_importer, which starts
> > fast-import, whose stdin is connected to the stdout of the remote
> > helper. We tell the remote-helper to run the import, then we wait for
> > fast-import to finish (and complain if it fails).
> >
> > Then what? We seem to do some more work, which I think is what causes
> > the errors you see; but should we instead be reaping the helper at this
> > point unconditionally? Its stdout has presumably been flushed out to
> > fast-import; is there anything else for us to get from it besides its
> > exit code?
> 
> The problem is not with import, since fast-import would generally wait
> properly for a 'done' status, the problem is with export.

Your patch modified fetch_with_import. Are you saying that it isn't
necessary to do so?

> Also, the design is such that the remote-helper stays alive, even
> after fast-export has finished.

So if we expect to be able to communicate with the remote-helper after
fast-export has exited, is it a protocol failure that the helper does
not say "yes, I finished the export" or similar? If so, can we fix that?

I am not too familiar with this protocol, but it looks like we read from
helper->out right after closing the exporter, to get the ref statuses.
Shouldn't we be detecting the error if the helper hangs up there?

-Peff

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-02  5:01           ` Jeff King
@ 2013-04-02  5:19             ` Felipe Contreras
  2013-04-02  9:36               ` Felipe Contreras
  0 siblings, 1 reply; 21+ messages in thread
From: Felipe Contreras @ 2013-04-02  5:19 UTC (permalink / raw)
  To: Jeff King
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 1, 2013 at 11:01 PM, Jeff King <peff@peff.net> wrote:
> On Mon, Apr 01, 2013 at 10:51:20PM -0600, Felipe Contreras wrote:
>
>> > So in fetch_with_import, we have a remote-helper, and we have a
>> > bidirectional pipe to it. We then call get_importer, which starts
>> > fast-import, whose stdin is connected to the stdout of the remote
>> > helper. We tell the remote-helper to run the import, then we wait for
>> > fast-import to finish (and complain if it fails).
>> >
>> > Then what? We seem to do some more work, which I think is what causes
>> > the errors you see; but should we instead be reaping the helper at this
>> > point unconditionally? Its stdout has presumably been flushed out to
>> > fast-import; is there anything else for us to get from it besides its
>> > exit code?
>>
>> The problem is not with import, since fast-import would generally wait
>> properly for a 'done' status, the problem is with export.
>
> Your patch modified fetch_with_import. Are you saying that it isn't
> necessary to do so?

It's not, I added it for symmetry. But that's the case *if* the
remote-helper is properly using the "done" feature.

>> Also, the design is such that the remote-helper stays alive, even
>> after fast-export has finished.
>
> So if we expect to be able to communicate with the remote-helper after
> fast-export has exited, is it a protocol failure that the helper does
> not say "yes, I finished the export" or similar? If so, can we fix that?
>
> I am not too familiar with this protocol, but it looks like we read from
> helper->out right after closing the exporter, to get the ref statuses.
> Shouldn't we be detecting the error if the helper hangs up there?

I guess that should be possible, I'll give that a try.

Cheers.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-02  5:19             ` Felipe Contreras
@ 2013-04-02  9:36               ` Felipe Contreras
  0 siblings, 0 replies; 21+ messages in thread
From: Felipe Contreras @ 2013-04-02  9:36 UTC (permalink / raw)
  To: Jeff King
  Cc: git, Junio C Hamano, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner

On Mon, Apr 1, 2013 at 11:19 PM, Felipe Contreras
<felipe.contreras@gmail.com> wrote:
> On Mon, Apr 1, 2013 at 11:01 PM, Jeff King <peff@peff.net> wrote:
>> On Mon, Apr 01, 2013 at 10:51:20PM -0600, Felipe Contreras wrote:
>>
>>> > So in fetch_with_import, we have a remote-helper, and we have a
>>> > bidirectional pipe to it. We then call get_importer, which starts
>>> > fast-import, whose stdin is connected to the stdout of the remote
>>> > helper. We tell the remote-helper to run the import, then we wait for
>>> > fast-import to finish (and complain if it fails).
>>> >
>>> > Then what? We seem to do some more work, which I think is what causes
>>> > the errors you see; but should we instead be reaping the helper at this
>>> > point unconditionally? Its stdout has presumably been flushed out to
>>> > fast-import; is there anything else for us to get from it besides its
>>> > exit code?
>>>
>>> The problem is not with import, since fast-import would generally wait
>>> properly for a 'done' status, the problem is with export.
>>
>> Your patch modified fetch_with_import. Are you saying that it isn't
>> necessary to do so?
>
> It's not, I added it for symmetry. But that's the case *if* the
> remote-helper is properly using the "done" feature.

Actually, it is a problem, because without this check the
transport-helper just goes on without realizing the whole thing has
failed and doesn't produce a proper error message:

fatal: bad object 0000000000000000000000000000000000000000
error: testgit::/home/felipec/dev/git/t/trash
directory.t5801-remote-helpers/server did not send all necessary
objects

It's possible to send a ping command to the remote-helper, but doing
so triggers a SIGPIPE. I would rather show a proper error message as
my patch suggests by just checking if the command is running.

>>> Also, the design is such that the remote-helper stays alive, even
>>> after fast-export has finished.
>>
>> So if we expect to be able to communicate with the remote-helper after
>> fast-export has exited, is it a protocol failure that the helper does
>> not say "yes, I finished the export" or similar? If so, can we fix that?
>>
>> I am not too familiar with this protocol, but it looks like we read from
>> helper->out right after closing the exporter, to get the ref statuses.
>> Shouldn't we be detecting the error if the helper hangs up there?
>
> I guess that should be possible, I'll give that a try.

I gave this a try and it does work, but it seems rather convoluted to me:

diff --git a/transport-helper.c b/transport-helper.c
index f0d28aa..10b7842 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -25,7 +25,8 @@ struct helper_data {
 		option : 1,
 		push : 1,
 		connect : 1,
-		no_disconnect_req : 1;
+		no_disconnect_req : 1,
+		done_export : 1;
 	char *export_marks;
 	char *import_marks;
 	/* These go from remote name (as in "list") to private name */
@@ -46,7 +47,7 @@ static void sendline(struct helper_data *helper,
struct strbuf *buffer)
 		die_errno("Full write to remote helper failed");
 }

-static int recvline_fh(FILE *helper, struct strbuf *buffer)
+static int recvline_fh(FILE *helper, struct strbuf *buffer, int safe)
 {
 	strbuf_reset(buffer);
 	if (debug)
@@ -54,7 +55,10 @@ static int recvline_fh(FILE *helper, struct strbuf *buffer)
 	if (strbuf_getline(buffer, helper, '\n') == EOF) {
 		if (debug)
 			fprintf(stderr, "Debug: Remote helper quit.\n");
-		exit(128);
+		if (safe)
+			return -1;
+		else
+			exit(128);
 	}

 	if (debug)
@@ -64,7 +68,12 @@ static int recvline_fh(FILE *helper, struct strbuf *buffer)

 static int recvline(struct helper_data *helper, struct strbuf *buffer)
 {
-	return recvline_fh(helper->out, buffer);
+	return recvline_fh(helper->out, buffer, 0);
+}
+
+static int recvline_safe(struct helper_data *helper, struct strbuf *buffer)
+{
+	return recvline_fh(helper->out, buffer, 1);
 }

 static void xchgline(struct helper_data *helper, struct strbuf *buffer)
@@ -201,6 +210,8 @@ static struct child_process *get_helper(struct
transport *transport)
 			strbuf_addstr(&arg, "--import-marks=");
 			strbuf_addstr(&arg, capname + strlen("import-marks "));
 			data->import_marks = strbuf_detach(&arg, NULL);
+		} else if (!strcmp(capname, "done-export")) {
+			data->done_export = 1;
 		} else if (mandatory) {
 			die("Unknown mandatory capability %s. This remote "
 			    "helper probably needs newer version of Git.",
@@ -540,7 +551,7 @@ static int process_connect_service(struct
transport *transport,
 		goto exit;

 	sendline(data, &cmdbuf);
-	recvline_fh(input, &cmdbuf);
+	recvline_fh(input, &cmdbuf, 0);
 	if (!strcmp(cmdbuf.buf, "")) {
 		data->no_disconnect_req = 1;
 		if (debug)
@@ -704,19 +715,30 @@ static void push_update_ref_status(struct strbuf *buf,
 	(*ref)->remote_status = msg;
 }

-static void push_update_refs_status(struct helper_data *data,
+static int push_update_refs_status(struct helper_data *data,
 				    struct ref *remote_refs)
 {
 	struct strbuf buf = STRBUF_INIT;
 	struct ref *ref = remote_refs;
+	int done = 0;
+
 	for (;;) {
-		recvline(data, &buf);
-		if (!buf.len)
+		if (recvline_safe(data, &buf) || !buf.len)
+			break;
+
+		if (!strncmp(buf.buf, "done", 4)) {
+			done = 1;
 			break;
+		}

 		push_update_ref_status(&buf, &ref, remote_refs);
 	}
 	strbuf_release(&buf);
+
+	if (!data->done_export)
+		return 0;
+
+	return !done;
 }

 static int push_refs_with_push(struct transport *transport,
@@ -776,8 +798,7 @@ static int push_refs_with_push(struct transport *transport,
 	sendline(data, &buf);
 	strbuf_release(&buf);

-	push_update_refs_status(data, remote_refs);
-	return 0;
+	return push_update_refs_status(data, remote_refs);
 }

 static int push_refs_with_export(struct transport *transport,
@@ -829,8 +850,7 @@ static int push_refs_with_export(struct transport
*transport,
 	if (!check_command(data->helper))
 		die("Error while running helper");

-	push_update_refs_status(data, remote_refs);
-	return 0;
+	return push_update_refs_status(data, remote_refs);
 }

 static int push_refs(struct transport *transport,

-- 
Felipe Contreras

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-01 21:46 ` [PATCH 2/4] transport-helper: check if remote helper is alive Felipe Contreras
  2013-04-01 23:33   ` Jeff King
@ 2013-04-02  0:26   ` Junio C Hamano
  2013-04-02  4:58     ` Felipe Contreras
  1 sibling, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2013-04-02  0:26 UTC (permalink / raw)
  To: Felipe Contreras
  Cc: git, Jeff King, Johannes Sixt, Aaron Schrab, Clemens Buchacher,
	David Michael Barr, Florian Achleitner

Felipe Contreras <felipe.contreras@gmail.com> writes:

> Otherwise transport-helper will continue checking for refs and other
> things what will confuse the user more.
> ---

Sign-off?

>  git-remote-testgit        | 11 +++++++++++
>  t/t5801-remote-helpers.sh | 19 +++++++++++++++++++
>  transport-helper.c        |  8 ++++++++
>  3 files changed, 38 insertions(+)
>
> diff --git a/git-remote-testgit b/git-remote-testgit
> index b395c8d..ca0cf09 100755
> --- a/git-remote-testgit
> +++ b/git-remote-testgit
> @@ -61,12 +61,23 @@ do
>  			echo "feature import-marks=$gitmarks"
>  			echo "feature export-marks=$gitmarks"
>  		fi
> +
> +		if test -n "$GIT_REMOTE_TESTGIT_FAILURE"
> +		then
> +			exit -1
> +		fi
> +
>  		echo "feature done"
>  		git fast-export "${testgitmarks_args[@]}" $refs |
>  		sed -e "s#refs/heads/#${prefix}/heads/#g"
>  		echo "done"
>  		;;
>  	export)
> +		if test -n "$GIT_REMOTE_TESTGIT_FAILURE"
> +		then
> +			exit -1
> +		fi
> +
>  		before=$(git for-each-ref --format='%(refname) %(objectname)')
>  
>  		git fast-import "${testgitmarks_args[@]}" --quiet
> diff --git a/t/t5801-remote-helpers.sh b/t/t5801-remote-helpers.sh
> index f387027..26e9a5b 100755
> --- a/t/t5801-remote-helpers.sh
> +++ b/t/t5801-remote-helpers.sh
> @@ -166,4 +166,23 @@ test_expect_success 'push ref with existing object' '
>  	compare_refs local dup server dup
>  '
>  
> +test_expect_success 'proper failure checks for fetching' '
> +	(GIT_REMOTE_TESTGIT_FAILURE=1 &&
> +	export GIT_REMOTE_TESTGIT_FAILURE &&
> +	cd local &&
> +	test_must_fail git fetch 2>&1 | \
> +		grep "Error while running helper"

This will not care if "git fetch" succeeds or fails and returns the
exit code from grep.  Perhaps something like this instead?

	(
		GIT_REMOTE_TESTGIT_FAILURE=1 &&
		export GIT_REMOTE_TESTGIT_FAILURE &&
		cd local &&
		test_must_fail git fetch 2>error &&
		grep "Error while running helper" error
	)


> +# We sleep to give fast-export a chance to catch the SIGPIPE
> +test_expect_failure 'proper failure checks for pushing' '
> +	(GIT_REMOTE_TESTGIT_FAILURE=1 &&
> +	export GIT_REMOTE_TESTGIT_FAILURE &&
> +	cd local &&
> +	test_must_fail git push --all 2>&1 | \
> +		grep "Error while running helper"

Ditto.

> +	)
> +'
> +
>  test_done
> diff --git a/transport-helper.c b/transport-helper.c
> index cb3ef7d..dfdfa7a 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -460,6 +460,10 @@ static int fetch_with_import(struct transport *transport,
>  
>  	if (finish_command(&fastimport))
>  		die("Error while running fast-import");
> +
> +	if (!check_command(data->helper))
> +		die("Error while running helper");
> +
>  	argv_array_free_detached(fastimport.argv);
>  
>  	/*
> @@ -818,6 +822,10 @@ static int push_refs_with_export(struct transport *transport,
>  
>  	if (finish_command(&exporter))
>  		die("Error while running fast-export");
> +
> +	if (!check_command(data->helper))
> +		die("Error while running helper");
> +
>  	push_update_refs_status(data, remote_refs);
>  	return 0;
>  }

OK, so the idea is that fetch_with_import() does

 - get_helper(transport), which spawns a helper process;

 - get_importer(transport, &fastimport), which spawns a fast-import
   and make it read from the output of the helper process;

 - we did finish_command() to wait for the fast-import to finish,
   expecting that the fast-import would finish when the helper stops
   feeding it, which in turn would mean the helper would have died.

The same for the pushing side.

Shouldn't transport_disconnect() have called release_helper() which
in turn calls disconnect_helper() to call finish_command() on the
helper to wait for that procesanyway?  Is somebody discarding return
value from transport_disconnect() or the current calling site of
transport_disconnect() is too late to notice the error?

Puzzled...

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 2/4] transport-helper: check if remote helper is alive
  2013-04-02  0:26   ` Junio C Hamano
@ 2013-04-02  4:58     ` Felipe Contreras
  0 siblings, 0 replies; 21+ messages in thread
From: Felipe Contreras @ 2013-04-02  4:58 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: git, Jeff King, Johannes Sixt, Aaron Schrab, Clemens Buchacher,
	David Michael Barr, Florian Achleitner

On Mon, Apr 1, 2013 at 6:26 PM, Junio C Hamano <gitster@pobox.com> wrote:

> OK, so the idea is that fetch_with_import() does
>
>  - get_helper(transport), which spawns a helper process;
>
>  - get_importer(transport, &fastimport), which spawns a fast-import
>    and make it read from the output of the helper process;
>
>  - we did finish_command() to wait for the fast-import to finish,
>    expecting that the fast-import would finish when the helper stops
>    feeding it, which in turn would mean the helper would have died.
>
> The same for the pushing side.

The difference with the pushing side is that it's the helper the one
waiting for fast-export and it can easily die.

> Shouldn't transport_disconnect() have called release_helper() which
> in turn calls disconnect_helper() to call finish_command() on the
> helper to wait for that procesanyway?  Is somebody discarding return
> value from transport_disconnect() or the current calling site of
> transport_disconnect() is too late to notice the error?

It's too late to notice the error. However, only in the case of pushing.

-- 
Felipe Contreras

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 3/4] tmp: remote-helper: add timers to catch errors
  2013-04-01 21:46 [PATCH 0/4] run-command: new check_command helper Felipe Contreras
  2013-04-01 21:46 ` [PATCH 1/4] run-command: add " Felipe Contreras
  2013-04-01 21:46 ` [PATCH 2/4] transport-helper: check if remote helper is alive Felipe Contreras
@ 2013-04-01 21:46 ` Felipe Contreras
  2013-04-01 21:46 ` [PATCH 4/4] tmp: run-command: code to exercise check_command Felipe Contreras
  3 siblings, 0 replies; 21+ messages in thread
From: Felipe Contreras @ 2013-04-01 21:46 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner,
	Felipe Contreras

This way the test reliably succeeds (in catching the failure).

Not sure what's the proper way to do this, but here it is for the
record.

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
---
 git-remote-testgit        | 1 +
 t/t5801-remote-helpers.sh | 2 +-
 transport-helper.c        | 3 +++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/git-remote-testgit b/git-remote-testgit
index ca0cf09..6ae1b7f 100755
--- a/git-remote-testgit
+++ b/git-remote-testgit
@@ -75,6 +75,7 @@ do
 	export)
 		if test -n "$GIT_REMOTE_TESTGIT_FAILURE"
 		then
+			sleep 1 # don't let fast-export get SIGPIPE
 			exit -1
 		fi
 
diff --git a/t/t5801-remote-helpers.sh b/t/t5801-remote-helpers.sh
index 26e9a5b..5bb2ca4 100755
--- a/t/t5801-remote-helpers.sh
+++ b/t/t5801-remote-helpers.sh
@@ -176,7 +176,7 @@ test_expect_success 'proper failure checks for fetching' '
 '
 
 # We sleep to give fast-export a chance to catch the SIGPIPE
-test_expect_failure 'proper failure checks for pushing' '
+test_expect_success 'proper failure checks for pushing' '
 	(GIT_REMOTE_TESTGIT_FAILURE=1 &&
 	export GIT_REMOTE_TESTGIT_FAILURE &&
 	cd local &&
diff --git a/transport-helper.c b/transport-helper.c
index dfdfa7a..f0d28aa 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -823,6 +823,9 @@ static int push_refs_with_export(struct transport *transport,
 	if (finish_command(&exporter))
 		die("Error while running fast-export");
 
+	if (getenv("GIT_REMOTE_TESTGIT_FAILURE"))
+		sleep(2);
+
 	if (!check_command(data->helper))
 		die("Error while running helper");
 
-- 
1.8.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 4/4] tmp: run-command: code to exercise check_command
  2013-04-01 21:46 [PATCH 0/4] run-command: new check_command helper Felipe Contreras
                   ` (2 preceding siblings ...)
  2013-04-01 21:46 ` [PATCH 3/4] tmp: remote-helper: add timers to catch errors Felipe Contreras
@ 2013-04-01 21:46 ` Felipe Contreras
  3 siblings, 0 replies; 21+ messages in thread
From: Felipe Contreras @ 2013-04-01 21:46 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Jeff King, Johannes Sixt, Aaron Schrab,
	Clemens Buchacher, David Michael Barr, Florian Achleitner,
	Felipe Contreras

Do not merge!

Signed-off-by: Felipe Contreras <felipe.contreras@gmail.com>
---
 run-command.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/run-command.c b/run-command.c
index 16833df..6f20d5f 100644
--- a/run-command.c
+++ b/run-command.c
@@ -234,6 +234,7 @@ static pid_t persistent_waitpid(struct child_process *cmd, pid_t pid, int *stat_
 		return errno ? -1 : pid;
 	} else {
 		pid_t waiting;
+		die("not supposed to happen");
 		while ((waiting = waitpid(pid, stat_loc, 0)) < 0 && errno == EINTR)
 			;	/* nothing */
 		return waiting;
@@ -246,6 +247,11 @@ static int wait_or_whine(struct child_process *cmd, pid_t pid, const char *argv0
 	pid_t waiting;
 	int failed_errno = 0;
 
+	do {
+		if (!check_command(cmd))
+			break;
+	} while(1);
+
 	waiting = persistent_waitpid(cmd, pid, &status);
 
 	if (waiting < 0) {
-- 
1.8.2

^ permalink raw reply related	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2013-04-02  9:37 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-04-01 21:46 [PATCH 0/4] run-command: new check_command helper Felipe Contreras
2013-04-01 21:46 ` [PATCH 1/4] run-command: add " Felipe Contreras
2013-04-01 23:23   ` Jeff King
2013-04-01 23:58     ` Felipe Contreras
2013-04-02  2:22       ` Jeff King
2013-04-02  5:11         ` Felipe Contreras
2013-04-02  5:14           ` Jeff King
2013-04-02  5:22             ` Felipe Contreras
2013-04-02  5:26               ` Jeff King
2013-04-01 21:46 ` [PATCH 2/4] transport-helper: check if remote helper is alive Felipe Contreras
2013-04-01 23:33   ` Jeff King
2013-04-02  0:12     ` Felipe Contreras
2013-04-02  2:30       ` Jeff King
2013-04-02  4:51         ` Felipe Contreras
2013-04-02  5:01           ` Jeff King
2013-04-02  5:19             ` Felipe Contreras
2013-04-02  9:36               ` Felipe Contreras
2013-04-02  0:26   ` Junio C Hamano
2013-04-02  4:58     ` Felipe Contreras
2013-04-01 21:46 ` [PATCH 3/4] tmp: remote-helper: add timers to catch errors Felipe Contreras
2013-04-01 21:46 ` [PATCH 4/4] tmp: run-command: code to exercise check_command Felipe Contreras

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).