* [PATCH 1/3] git-daemon: logging done right
@ 2008-08-13 8:43 Stephen R. van den Berg
2008-08-13 8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg
` (2 more replies)
0 siblings, 3 replies; 16+ messages in thread
From: Stephen R. van den Berg @ 2008-08-13 8:43 UTC (permalink / raw)
To: git
Make git-daemon a proper syslogging citizen with PID-info.
Simplify the overzealous double buffering in the logroutine.
Call logerror() instead of error().
Signed-off-by: Stephen R. van den Berg <srb@cuci.nl>
---
daemon.c | 49 +++++++++++++++++--------------------------------
1 files changed, 17 insertions(+), 32 deletions(-)
diff --git a/daemon.c b/daemon.c
index 1c00305..774b2ce 100644
--- a/daemon.c
+++ b/daemon.c
@@ -78,38 +78,19 @@ static struct interp interp_table[] = {
static void logreport(int priority, const char *err, va_list params)
{
- /* We should do a single write so that it is atomic and output
- * of several processes do not get intermingled. */
- char buf[1024];
- int buflen;
- int maxlen, msglen;
-
- /* sizeof(buf) should be big enough for "[pid] \n" */
- buflen = snprintf(buf, sizeof(buf), "[%ld] ", (long) getpid());
-
- maxlen = sizeof(buf) - buflen - 1; /* -1 for our own LF */
- msglen = vsnprintf(buf + buflen, maxlen, err, params);
-
if (log_syslog) {
+ char buf[1024];
+ vsnprintf(buf, sizeof(buf), err, params);
syslog(priority, "%s", buf);
- return;
}
-
- /* maxlen counted our own LF but also counts space given to
- * vsnprintf for the terminating NUL. We want to make sure that
- * we have space for our own LF and NUL after the "meat" of the
- * message, so truncate it at maxlen - 1.
- */
- if (msglen > maxlen - 1)
- msglen = maxlen - 1;
- else if (msglen < 0)
- msglen = 0; /* Protect against weird return values. */
- buflen += msglen;
-
- buf[buflen++] = '\n';
- buf[buflen] = '\0';
-
- write_in_full(2, buf, buflen);
+ else {
+ /* Since stderr is set to linebuffered mode, the
+ * logging of different processes will not overlap
+ */
+ fprintf(stderr, "[%d] ", (int)getpid());
+ vfprintf(stderr, err, params);
+ fputc('\n', stderr);
+ }
}
static void logerror(const char *err, ...)
@@ -836,7 +817,7 @@ static int socksetup(char *listen_addr, int listen_port, int **socklist_p)
if (sockfd < 0)
continue;
if (sockfd >= FD_SETSIZE) {
- error("too large socket descriptor.");
+ logerror("too large socket descriptor.");
close(sockfd);
continue;
}
@@ -1178,9 +1159,11 @@ int main(int argc, char **argv)
}
if (log_syslog) {
- openlog("git-daemon", 0, LOG_DAEMON);
+ openlog("git-daemon", LOG_PID, LOG_DAEMON);
set_die_routine(daemon_die);
}
+ else /* so that logging into a file is atomic */
+ setlinebuf(stderr);
if (inetd_mode && (group_name || user_name))
die("--user and --group are incompatible with --inetd");
@@ -1233,8 +1216,10 @@ int main(int argc, char **argv)
return execute(peer);
}
- if (detach)
+ if (detach) {
daemonize();
+ loginfo("Ready to rumble");
+ }
else
sanitize_stdfds();
^ permalink raw reply related [flat|nested] 16+ messages in thread* [PATCH 2/3] git-daemon: make the signal handler almost a no-op 2008-08-13 8:43 [PATCH 1/3] git-daemon: logging done right Stephen R. van den Berg @ 2008-08-13 8:43 ` Stephen R. van den Berg 2008-08-14 0:09 ` Junio C Hamano 2008-08-14 13:41 ` Johannes Schindelin 2008-08-13 8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg 2008-08-13 23:13 ` [PATCH 1/3] git-daemon: logging done right Junio C Hamano 2 siblings, 2 replies; 16+ messages in thread From: Stephen R. van den Berg @ 2008-08-13 8:43 UTC (permalink / raw) To: git by exploiting the fact that systemcalls get interrupted by signals; and even they aren't, all zombies will be collected before the next accept(). Fix another error() -> logerror() call. Signed-off-by: Stephen R. van den Berg <srb@cuci.nl> --- daemon.c | 67 +++++++++++++++++++++++++++++--------------------------------- 1 files changed, 31 insertions(+), 36 deletions(-) diff --git a/daemon.c b/daemon.c index 774b2ce..19d620c 100644 --- a/daemon.c +++ b/daemon.c @@ -16,7 +16,6 @@ static int log_syslog; static int verbose; static int reuseaddr; -static int child_handler_pipe[2]; static const char daemon_usage[] = "git daemon [--verbose] [--syslog] [--export-all]\n" @@ -680,6 +679,21 @@ static void check_dead_children(void) { unsigned spawned, reaped, deleted; + for (;;) { + int status; + pid_t pid = waitpid(-1, &status, WNOHANG); + + if (pid > 0) { + unsigned reaped = children_reaped; + if (!WIFEXITED(status) || WEXITSTATUS(status) > 0) + pid = -pid; + dead_child[reaped % MAX_CHILDREN] = pid; + children_reaped = reaped + 1; + continue; + } + break; + } + spawned = children_spawned; reaped = children_reaped; deleted = children_deleted; @@ -760,21 +774,10 @@ static void handle(int incoming, struct sockaddr *addr, int addrlen) static void child_handler(int signo) { - for (;;) { - int status; - pid_t pid = waitpid(-1, &status, WNOHANG); - - if (pid > 0) { - unsigned reaped = children_reaped; - if (!WIFEXITED(status) || WEXITSTATUS(status) > 0) - pid = -pid; - dead_child[reaped % MAX_CHILDREN] = pid; - children_reaped = reaped + 1; - write(child_handler_pipe[1], &status, 1); - continue; - } - break; - } + /* Otherwise empty handler because systemcalls will get interrupted + * upon signal receipt + * SysV needs the handler to be reinstated + */ signal(SIGCHLD, child_handler); } @@ -917,35 +920,30 @@ static int service_loop(int socknum, int *socklist) struct pollfd *pfd; int i; - if (pipe(child_handler_pipe) < 0) - die ("Could not set up pipe for child handler"); - - pfd = xcalloc(socknum + 1, sizeof(struct pollfd)); + pfd = xcalloc(socknum, sizeof(struct pollfd)); for (i = 0; i < socknum; i++) { pfd[i].fd = socklist[i]; pfd[i].events = POLLIN; } - pfd[socknum].fd = child_handler_pipe[0]; - pfd[socknum].events = POLLIN; - - child_handler(0); for (;;) { int i; + int olderrno; - if (poll(pfd, socknum + 1, -1) < 0) { - if (errno != EINTR) { - error("poll failed, resuming: %s", - strerror(errno)); + i = poll(pfd, socknum, -1); + olderrno = errno; + + check_dead_children(); + + if (i < 0) { + if (olderrno != EINTR) { + logerror("poll failed, resuming: %s", + strerror(olderrno)); sleep(1); } continue; } - if (pfd[socknum].revents & POLLIN) { - read(child_handler_pipe[0], &i, 1); - check_dead_children(); - } for (i = 0; i < socknum; i++) { if (pfd[i].revents & POLLIN) { @@ -1036,10 +1034,7 @@ int main(int argc, char **argv) gid_t gid = 0; int i; - /* Without this we cannot rely on waitpid() to tell - * what happened to our children. - */ - signal(SIGCHLD, SIG_DFL); + child_handler(0); for (i = 1; i < argc; i++) { char *arg = argv[i]; ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op 2008-08-13 8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg @ 2008-08-14 0:09 ` Junio C Hamano 2008-08-14 0:18 ` Stephen R. van den Berg 2008-08-14 13:41 ` Johannes Schindelin 1 sibling, 1 reply; 16+ messages in thread From: Junio C Hamano @ 2008-08-14 0:09 UTC (permalink / raw) To: Stephen R. van den Berg; +Cc: git, Johannes Schindelin "Stephen R. van den Berg" <srb@cuci.nl> writes: > by exploiting the fact that systemcalls get interrupted by signals; > and even they aren't, all zombies will be collected before the next > accept(). Dscho may want to say something about "even they aren't..." part, after he comes back to the keyboard. > Fix another error() -> logerror() call. which should have been in 1/3, I suppose. > @@ -1036,10 +1034,7 @@ int main(int argc, char **argv) > gid_t gid = 0; > int i; > > - /* Without this we cannot rely on waitpid() to tell > - * what happened to our children. > - */ > - signal(SIGCHLD, SIG_DFL); > + child_handler(0); Why? child_handler() logically is a two step process: * We are just informed that somebody died; let's do something about the corpse; * On some systems we need to rearm signals once they fired, so let's do that if necessary. With your change, the first part happens to be almost no-op, but I do not think it justifies this hunk. After all, we might even want to do something like: static void child_handler(int signo) { if (USE_SYSV_SIGNAL_SEMANTICS) signal(SIGCHLD, child_handler); } and have the compiler optimize out the signal rearming with cc CFLAGS=-DUSE_SYSV_SIGNAL_SEMANTICS=0 on suitable platforms in the future. But you still want the initial signal set-up to happen unconditionally. At this point, we aren't informed by the system that somebody died, and we would want to arm the signal regardless of the platform's signal semantics. The rest of the patch looked sane, although I did not read it very carefully. Thanks. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op 2008-08-14 0:09 ` Junio C Hamano @ 2008-08-14 0:18 ` Stephen R. van den Berg 2008-08-14 1:09 ` Junio C Hamano 0 siblings, 1 reply; 16+ messages in thread From: Stephen R. van den Berg @ 2008-08-14 0:18 UTC (permalink / raw) To: Junio C Hamano; +Cc: git, Johannes Schindelin Junio C Hamano wrote: >"Stephen R. van den Berg" <srb@cuci.nl> writes: >> by exploiting the fact that systemcalls get interrupted by signals; >> and even they aren't, all zombies will be collected before the next >> accept(). >Dscho may want to say something about "even they aren't..." part, after he >comes back to the keyboard. That should have read "even if they aren't". Nonetheless, I don't know systems where it doesn't work this way, but even if a system resisted, the problem is mitigated by the fact that we reap the children before every accept. >> Fix another error() -> logerror() call. >which should have been in 1/3, I suppose. Sort of, yes, it was a bit messy to get it out in one piece. >> @@ -1036,10 +1034,7 @@ int main(int argc, char **argv) >> gid_t gid = 0; >> int i; >> - /* Without this we cannot rely on waitpid() to tell >> - * what happened to our children. >> - */ >> - signal(SIGCHLD, SIG_DFL); >> + child_handler(0); >Why? child_handler() now does barely more than setup the signal handler, which is exactly what we want to do here. >With your change, the first part happens to be almost no-op, but I do not >think it justifies this hunk. >After all, we might even want to do something like: > static void child_handler(int signo) > { > if (USE_SYSV_SIGNAL_SEMANTICS) > signal(SIGCHLD, child_handler); >and have the compiler optimize out the signal rearming with > cc CFLAGS=-DUSE_SYSV_SIGNAL_SEMANTICS=0 In return I ask: why? There is no particular performance reason to optimise this. So in order to keep the code simpler, it might make an extra unneeded systemcall on some systems when the signal is processed. I don't think it's worth our while to optimise this further. -- Sincerely, Stephen R. van den Berg. "And now for something *completely* different!" ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op 2008-08-14 0:18 ` Stephen R. van den Berg @ 2008-08-14 1:09 ` Junio C Hamano 2008-08-14 7:47 ` Stephen R. van den Berg 0 siblings, 1 reply; 16+ messages in thread From: Junio C Hamano @ 2008-08-14 1:09 UTC (permalink / raw) To: Stephen R. van den Berg; +Cc: git, Johannes Schindelin "Stephen R. van den Berg" <srb@cuci.nl> writes: >>> @@ -1036,10 +1034,7 @@ int main(int argc, char **argv) >>> gid_t gid = 0; >>> int i; > >>> - /* Without this we cannot rely on waitpid() to tell >>> - * what happened to our children. >>> - */ >>> - signal(SIGCHLD, SIG_DFL); >>> + child_handler(0); > >>Why? > > child_handler() now does barely more than setup the signal handler, > which is exactly what we want to do here. > >>With your change, the first part happens to be almost no-op, but I do not >>think it justifies this hunk. > >>After all, we might even want to do something like: > >> static void child_handler(int signo) >> { >> if (USE_SYSV_SIGNAL_SEMANTICS) >> signal(SIGCHLD, child_handler); > >>and have the compiler optimize out the signal rearming with > >> cc CFLAGS=-DUSE_SYSV_SIGNAL_SEMANTICS=0 > > In return I ask: why? Please read the part you omitted from your quote again. I agree it would be very meaningless change as an optimization, but I am concerned more about robustness and what makes sense. Do you agree that "child_handler()" is a signal handler to handle SIGCHLD, and such a signal handler conceptually consists of two parts? i.e. static void child_handler() { reap_dead_children(); rearm_signal_as_needed(); } Your argument is it is Ok to call this function when you are arming the signal for the first time, because reap_dead_children() happens to be empty, and your rearm_signal_as_needed() happens to be the same as arm_signal_always(). Yes, it happens to be _Ok_ now. But is it an improvement in the longer term? I do not think so. I do not see why you think it is better to rely on these two assumptions than being explicit and say "we set up the signal for the first time always on any platform", especially when the latter is much more direct way to say what your intention is. Or are you gaining something by not explicitly calling signal() for the first time? I may be missing some benefit for doing so. It is a trade-off between that some benefit I am not seeing, and downside that your version can be broken more easily by future changes to child_handler(), because you are assuming more about what it happens to do currently. That's the kind of thing maintainers worry more about. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op 2008-08-14 1:09 ` Junio C Hamano @ 2008-08-14 7:47 ` Stephen R. van den Berg 2008-08-14 8:26 ` Junio C Hamano 0 siblings, 1 reply; 16+ messages in thread From: Stephen R. van den Berg @ 2008-08-14 7:47 UTC (permalink / raw) To: Junio C Hamano; +Cc: git, Johannes Schindelin Junio C Hamano wrote: >"Stephen R. van den Berg" <srb@cuci.nl> writes: >>>> - signal(SIGCHLD, SIG_DFL); >>>> + child_handler(0); >>>After all, we might even want to do something like: >>> static void child_handler(int signo) >>> { >>> if (USE_SYSV_SIGNAL_SEMANTICS) >>> signal(SIGCHLD, child_handler); >> In return I ask: why? >I agree it would be very meaningless change as an optimization, but I am >concerned more about robustness and what makes sense. Well, even if robustness and "the principle of least surprise" are the prime concerns, my change could still be considered worthwhile, but it depends on your viewpoint. >Do you agree that "child_handler()" is a signal handler to handle SIGCHLD, >and such a signal handler conceptually consists of two parts? i.e. Yes. > static void child_handler() > { > reap_dead_children(); > rearm_signal_as_needed(); >Your argument is it is Ok to call this function when you are arming the >signal for the first time, because reap_dead_children() happens to be >empty, and your rearm_signal_as_needed() happens to be the same as >arm_signal_always(). Well, not quite, that is part of the argument, the other parts are implicit. >Yes, it happens to be _Ok_ now. But is it an improvement in the longer >term? I do not think so. >I do not see why you think it is better to rely on these two assumptions >than being explicit and say "we set up the signal for the first time >always on any platform", especially when the latter is much more direct >way to say what your intention is. Renaming the function could do it. > Or are you gaining something by not >explicitly calling signal() for the first time? I may be missing some >benefit for doing so. Well, strictly speaking the benefits you're overlooking is: It centralises the spot where the systemcall is made to arm the handler. This means that if the setup needed to arm the handler ever becomes more complicated in future OSes, it only needs to be updated in one place. This is a direct maintainability benefit. >It is a trade-off between that some benefit I am not seeing, and downside >that your version can be broken more easily by future changes to >child_handler(), because you are assuming more about what it happens to do >currently. >That's the kind of thing maintainers worry more about. Well, I see two solutions which increase maintainability: Solution A: ================================== void child_handler(int signo) { signal(SIGCHLD, child_handler); /* rearm always for portability */ } main() { ... signal(SIGCHLD, child_handler); ... } ================================== Solution B: ================================== void setup_child_handler(int signo) { signal(SIGCHLD, setup_child_handler); /* rearm always for portability */ } main() { ... setup_child_handler(0); ... } ================================== Solution C: ================================== void setup_child_handler(void); void child_handler(int signo) { setup_child_handler(); /* rearm always for portability */ } void setup_child_handler(void) { signal(SIGCHLD, child_handler); } main() { ... setup_child_handler(); ... } ================================== Solution A is what you propose, but which I find less appealing because any future magic to actually setup the handler needs to be maintained and updated in two places. Solution C is what follows your train of thought better, because it future-proofs the setup as well as the handler. Solution B is what I consider most elegant and maintainable, because at this point in time I cannot imagine what extra handling would be required inside the handler which would require a setup as complicated as solution C; so in order to keep it as simple as possible and eliminate forward declarations and minimise systemcalls I suggest we pick solution B until the need for solution C ever arises (I don't think it ever will). But, in any case, you're the maintainer here, not I, so it's your call. I vote for B, but just tell me which solution you prefer and I'll adapt the code? -- Sincerely, Stephen R. van den Berg. "Hold still, while I inject you with SQL." ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op 2008-08-14 7:47 ` Stephen R. van den Berg @ 2008-08-14 8:26 ` Junio C Hamano 2008-08-14 10:13 ` Stephen R. van den Berg 0 siblings, 1 reply; 16+ messages in thread From: Junio C Hamano @ 2008-08-14 8:26 UTC (permalink / raw) To: Stephen R. van den Berg; +Cc: git, Johannes Schindelin "Stephen R. van den Berg" <srb@cuci.nl> writes: > Solution A is what you propose, but which I find less appealing because > any future magic to actually setup the handler needs to be maintained > and updated in two places. Well, A is attractive because it leaves the window open for us to later not rearming it unconditionally from child_handler(). I happen to think that the interface we will use to call "signal()" is much less likely to change than what we would do in child_handler(). > Solution C is what follows your train of thought better, because it > future-proofs the setup as well as the handler. Surely, we could take this route as the logical conclusion from my maintainability concerns, except that, if we are to make things as fine grained as you suggest, we would definitely will not have a single "setup", but "setup" and "rearm" as separate functions. Perhaps "setup" may start using sigaction() with SA_RESETHAND cleared, and we can make "rearm" truly a no-op everywhere. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op 2008-08-14 8:26 ` Junio C Hamano @ 2008-08-14 10:13 ` Stephen R. van den Berg 0 siblings, 0 replies; 16+ messages in thread From: Stephen R. van den Berg @ 2008-08-14 10:13 UTC (permalink / raw) To: Junio C Hamano; +Cc: git, Johannes Schindelin Junio C Hamano wrote: >"Stephen R. van den Berg" <srb@cuci.nl> writes: >> Solution A is what you propose, but which I find less appealing because >> any future magic to actually setup the handler needs to be maintained >> and updated in two places. >Well, A is attractive because it leaves the window open for us to later >not rearming it unconditionally from child_handler(). I happen to think >that the interface we will use to call "signal()" is much less likely to >change than what we would do in child_handler(). Then that is the fundamental difference in maintainability-views. I find it more likely that we change the interface to setup the signal handler (in a future OS or by changing the way you invoke it), than that the actual work done by the signal handler is changed. But since your view carries more weight here, I'll adapt the patch to solution A. >> Solution C is what follows your train of thought better, because it >> future-proofs the setup as well as the handler. >"setup", but "setup" and "rearm" as separate functions. Perhaps "setup" >may start using sigaction() with SA_RESETHAND cleared, and we can make >"rearm" truly a no-op everywhere. >From a portability standpoint using sigaction is worse than signal, and rearming the handler even when it is not required is not a big deal. -- Sincerely, Stephen R. van den Berg. "Hold still, while I inject you with SQL." ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/3] git-daemon: make the signal handler almost a no-op 2008-08-13 8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg 2008-08-14 0:09 ` Junio C Hamano @ 2008-08-14 13:41 ` Johannes Schindelin 1 sibling, 0 replies; 16+ messages in thread From: Johannes Schindelin @ 2008-08-14 13:41 UTC (permalink / raw) To: Stephen R. van den Berg; +Cc: git Hi, On Wed, 13 Aug 2008, Stephen R. van den Berg wrote: > by exploiting the fact that systemcalls get interrupted by signals; and > even they aren't, all zombies will be collected before the next > accept(). Fix another error() -> logerror() call. I find this commit message more than cryptic. So much so that I would have to study the patch in detail to _understand_ what it does. Not having the time, I refuse, Dscho P.S.: Oh, and I might have been a little appalled by the language of your commit messages. ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH 3/3] git-daemon: rewrite kindergarden 2008-08-13 8:43 [PATCH 1/3] git-daemon: logging done right Stephen R. van den Berg 2008-08-13 8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg @ 2008-08-13 8:43 ` Stephen R. van den Berg 2008-08-13 8:58 ` Stephen R. van den Berg ` (2 more replies) 2008-08-13 23:13 ` [PATCH 1/3] git-daemon: logging done right Junio C Hamano 2 siblings, 3 replies; 16+ messages in thread From: Stephen R. van den Berg @ 2008-08-13 8:43 UTC (permalink / raw) To: git Get rid of the silly fixed array of children and make max-connections dynamic and configurable in the process. Fix the killing code to actually be smart instead of the pseudo-random mess. Avoid forking if too busy already. Signed-off-by: Stephen R. van den Berg <srb@cuci.nl> --- Documentation/git-daemon.txt | 9 +- daemon.c | 215 +++++++++++++++--------------------------- 2 files changed, 84 insertions(+), 140 deletions(-) diff --git a/Documentation/git-daemon.txt b/Documentation/git-daemon.txt index 4ba4b75..b08a08c 100644 --- a/Documentation/git-daemon.txt +++ b/Documentation/git-daemon.txt @@ -9,8 +9,9 @@ SYNOPSIS -------- [verse] 'git daemon' [--verbose] [--syslog] [--export-all] - [--timeout=n] [--init-timeout=n] [--strict-paths] - [--base-path=path] [--user-path | --user-path=path] + [--timeout=n] [--init-timeout=n] [--max-connections=n] + [--strict-paths] [--base-path=path] [--base-path-relaxed] + [--user-path | --user-path=path] [--interpolated-path=pathtemplate] [--reuseaddr] [--detach] [--pid-file=file] [--enable=service] [--disable=service] @@ -99,6 +100,10 @@ OPTIONS it takes for the server to process the sub-request and time spent waiting for next client's request. +--max-connections:: + Maximum number of concurrent clients, defaults to 32. Set it to + zero for no limit. + --syslog:: Log to syslog instead of stderr. Note that this option does not imply --verbose, thus by default only error conditions will be logged. diff --git a/daemon.c b/daemon.c index 19d620c..a7a735c 100644 --- a/daemon.c +++ b/daemon.c @@ -19,8 +19,8 @@ static int reuseaddr; static const char daemon_usage[] = "git daemon [--verbose] [--syslog] [--export-all]\n" -" [--timeout=n] [--init-timeout=n] [--strict-paths]\n" -" [--base-path=path] [--base-path-relaxed]\n" +" [--timeout=n] [--init-timeout=n] [--max-connections=n]\n" +" [--strict-paths] [--base-path=path] [--base-path-relaxed]\n" " [--user-path | --user-path=path]\n" " [--interpolated-path=path]\n" " [--reuseaddr] [--detach] [--pid-file=file]\n" @@ -584,40 +584,37 @@ static int execute(struct sockaddr *addr) return -1; } +static int max_connections = 32; -/* - * We count spawned/reaped separately, just to avoid any - * races when updating them from signals. The SIGCHLD handler - * will only update children_reaped, and the fork logic will - * only update children_spawned. - * - * MAX_CHILDREN should be a power-of-two to make the modulus - * operation cheap. It should also be at least twice - * the maximum number of connections we will ever allow. - */ -#define MAX_CHILDREN 128 - -static int max_connections = 25; - -/* These are updated by the signal handler */ -static volatile unsigned int children_reaped; -static pid_t dead_child[MAX_CHILDREN]; - -/* These are updated by the main loop */ -static unsigned int children_spawned; -static unsigned int children_deleted; +static unsigned int live_children; static struct child { + struct child*next; pid_t pid; - int addrlen; struct sockaddr_storage address; -} live_child[MAX_CHILDREN]; +} *firstborn; -static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen) +static void add_child(pid_t pid, struct sockaddr *addr, int addrlen) { - live_child[idx].pid = pid; - live_child[idx].addrlen = addrlen; - memcpy(&live_child[idx].address, addr, addrlen); + struct child*newborn; + newborn = xcalloc(1, sizeof *newborn); + if (newborn) { + struct child **cradle, *blanket; + + live_children++; + newborn->pid = pid; + memcpy(&newborn->address, addr, addrlen); + for (cradle = &firstborn; + (blanket = *cradle); + cradle = &blanket->next) + if (!memcmp(&blanket->address, &newborn->address, + sizeof newborn->address)) + break; + newborn->next = blanket; + *cradle = newborn; + } + else + logerror("Out of memory spawning new child"); } /* @@ -626,142 +623,78 @@ static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen) * We move everything up by one, since the new "deleted" will * be one higher. */ -static void remove_child(pid_t pid, unsigned deleted, unsigned spawned) +static void remove_child(pid_t pid) { - struct child n; + struct child **cradle, *blanket; - deleted %= MAX_CHILDREN; - spawned %= MAX_CHILDREN; - if (live_child[deleted].pid == pid) { - live_child[deleted].pid = -1; - return; - } - n = live_child[deleted]; - for (;;) { - struct child m; - deleted = (deleted + 1) % MAX_CHILDREN; - if (deleted == spawned) - die("could not find dead child %d\n", pid); - m = live_child[deleted]; - live_child[deleted] = n; - if (m.pid == pid) - return; - n = m; - } + for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next) + if (blanket->pid == pid) { + *cradle = blanket->next; + live_children--; + free(blanket); + break; + } } /* * This gets called if the number of connections grows * past "max_connections". * - * We _should_ start off by searching for connections - * from the same IP, and if there is some address wth - * multiple connections, we should kill that first. - * - * As it is, we just "randomly" kill 25% of the connections, - * and our pseudo-random generator sucks too. I have no - * shame. - * - * Really, this is just a place-holder for a _real_ algorithm. + * We kill the newest connection from a duplicate IP. */ -static void kill_some_children(int signo, unsigned start, unsigned stop) +static void kill_some_child() { - start %= MAX_CHILDREN; - stop %= MAX_CHILDREN; - while (start != stop) { - if (!(start & 3)) - kill(live_child[start].pid, signo); - start = (start + 1) % MAX_CHILDREN; - } -} - -static void check_dead_children(void) -{ - unsigned spawned, reaped, deleted; - - for (;;) { - int status; - pid_t pid = waitpid(-1, &status, WNOHANG); - - if (pid > 0) { - unsigned reaped = children_reaped; - if (!WIFEXITED(status) || WEXITSTATUS(status) > 0) - pid = -pid; - dead_child[reaped % MAX_CHILDREN] = pid; - children_reaped = reaped + 1; - continue; - } - break; - } + const struct child *blanket; - spawned = children_spawned; - reaped = children_reaped; - deleted = children_deleted; + if ((blanket = firstborn)) { + const struct child *next; - while (deleted < reaped) { - pid_t pid = dead_child[deleted % MAX_CHILDREN]; - const char *dead = pid < 0 ? " (with error)" : ""; - - if (pid < 0) - pid = -pid; - - /* XXX: Custom logging, since we don't wanna getpid() */ - if (verbose) { - if (log_syslog) - syslog(LOG_INFO, "[%d] Disconnected%s", - pid, dead); - else - fprintf(stderr, "[%d] Disconnected%s\n", - pid, dead); - } - remove_child(pid, deleted, spawned); - deleted++; + for (; (next = blanket->next); blanket = next) + if (!memcmp(&blanket->address, &next->address, + sizeof next->address)) { + kill(blanket->pid, SIGTERM); + break; + } } - children_deleted = deleted; } -static void check_max_connections(void) +static void check_dead_children(void) { - for (;;) { - int active; - unsigned spawned, deleted; - - check_dead_children(); - - spawned = children_spawned; - deleted = children_deleted; - - active = spawned - deleted; - if (active <= max_connections) - break; - - /* Kill some unstarted connections with SIGTERM */ - kill_some_children(SIGTERM, deleted, spawned); - if (active <= max_connections << 1) - break; + int status; + pid_t pid; - /* If the SIGTERM thing isn't helping use SIGKILL */ - kill_some_children(SIGKILL, deleted, spawned); - sleep(1); + while ((pid = waitpid(-1, &status, WNOHANG))>0) { + const char *dead = ""; + remove_child(pid); + if (!WIFEXITED(status) || WEXITSTATUS(status) > 0) + dead = " (with error)"; + loginfo("[%d] Disconnected%s", (int)pid, dead); } } static void handle(int incoming, struct sockaddr *addr, int addrlen) { - pid_t pid = fork(); + pid_t pid; - if (pid) { - unsigned idx; + if (max_connections && live_children >= max_connections) { + kill_some_child(); + sleep(1); /* give it some time to die */ + check_dead_children(); + if (live_children >= max_connections) { + close(incoming); + logerror("Too many children, dropping connection"); + return; + } + } + if ((pid = fork())) { close(incoming); - if (pid < 0) + if (pid < 0) { + logerror("Couldn't fork %s", strerror(errno)); return; + } - idx = children_spawned % MAX_CHILDREN; - children_spawned++; - add_child(idx, pid, addr, addrlen); - - check_max_connections(); + add_child(pid, addr, addrlen); return; } @@ -1081,6 +1014,12 @@ int main(int argc, char **argv) init_timeout = atoi(arg+15); continue; } + if (!prefixcmp(arg, "--max-connections=")) { + max_connections = atoi(arg+18); + if (max_connections<=0) + max_connections=0; /* unlimited */ + continue; + } if (!strcmp(arg, "--strict-paths")) { strict_paths = 1; continue; ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 3/3] git-daemon: rewrite kindergarden 2008-08-13 8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg @ 2008-08-13 8:58 ` Stephen R. van den Berg 2008-08-13 9:00 ` [PATCH] " Stephen R. van den Berg 2008-08-13 9:05 ` [PATCH 3/3] git-daemon: rewrite kindergarden Petr Baudis 2 siblings, 0 replies; 16+ messages in thread From: Stephen R. van den Berg @ 2008-08-13 8:58 UTC (permalink / raw) To: git >+ if (max_connections<=0) >+ max_connections=0; /* unlimited */ Hmmm, slight formatting error. Sorry about that. I'll resend this last patch. -- Sincerely, Stephen R. van den Berg. "And now for something *completely* different!" ^ permalink raw reply [flat|nested] 16+ messages in thread
* [PATCH] git-daemon: rewrite kindergarden 2008-08-13 8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg 2008-08-13 8:58 ` Stephen R. van den Berg @ 2008-08-13 9:00 ` Stephen R. van den Berg 2008-08-13 10:40 ` [PATCH] git-daemon: rewrite kindergarden, new option --max-connections Stephen R. van den Berg 2008-08-13 9:05 ` [PATCH 3/3] git-daemon: rewrite kindergarden Petr Baudis 2 siblings, 1 reply; 16+ messages in thread From: Stephen R. van den Berg @ 2008-08-13 9:00 UTC (permalink / raw) To: git Get rid of the silly fixed array of children and make max-connections dynamic and configurable in the process. Fix the killing code to actually be smart instead of the pseudo-random mess. Avoid forking if too busy already. Signed-off-by: Stephen R. van den Berg <srb@cuci.nl> --- Documentation/git-daemon.txt | 9 +- daemon.c | 215 +++++++++++++++--------------------------- 2 files changed, 84 insertions(+), 140 deletions(-) diff --git a/Documentation/git-daemon.txt b/Documentation/git-daemon.txt index 4ba4b75..b08a08c 100644 --- a/Documentation/git-daemon.txt +++ b/Documentation/git-daemon.txt @@ -9,8 +9,9 @@ SYNOPSIS -------- [verse] 'git daemon' [--verbose] [--syslog] [--export-all] - [--timeout=n] [--init-timeout=n] [--strict-paths] - [--base-path=path] [--user-path | --user-path=path] + [--timeout=n] [--init-timeout=n] [--max-connections=n] + [--strict-paths] [--base-path=path] [--base-path-relaxed] + [--user-path | --user-path=path] [--interpolated-path=pathtemplate] [--reuseaddr] [--detach] [--pid-file=file] [--enable=service] [--disable=service] @@ -99,6 +100,10 @@ OPTIONS it takes for the server to process the sub-request and time spent waiting for next client's request. +--max-connections:: + Maximum number of concurrent clients, defaults to 32. Set it to + zero for no limit. + --syslog:: Log to syslog instead of stderr. Note that this option does not imply --verbose, thus by default only error conditions will be logged. diff --git a/daemon.c b/daemon.c index 19d620c..a04ad1c 100644 --- a/daemon.c +++ b/daemon.c @@ -19,8 +19,8 @@ static int reuseaddr; static const char daemon_usage[] = "git daemon [--verbose] [--syslog] [--export-all]\n" -" [--timeout=n] [--init-timeout=n] [--strict-paths]\n" -" [--base-path=path] [--base-path-relaxed]\n" +" [--timeout=n] [--init-timeout=n] [--max-connections=n]\n" +" [--strict-paths] [--base-path=path] [--base-path-relaxed]\n" " [--user-path | --user-path=path]\n" " [--interpolated-path=path]\n" " [--reuseaddr] [--detach] [--pid-file=file]\n" @@ -584,40 +584,37 @@ static int execute(struct sockaddr *addr) return -1; } +static int max_connections = 32; -/* - * We count spawned/reaped separately, just to avoid any - * races when updating them from signals. The SIGCHLD handler - * will only update children_reaped, and the fork logic will - * only update children_spawned. - * - * MAX_CHILDREN should be a power-of-two to make the modulus - * operation cheap. It should also be at least twice - * the maximum number of connections we will ever allow. - */ -#define MAX_CHILDREN 128 - -static int max_connections = 25; - -/* These are updated by the signal handler */ -static volatile unsigned int children_reaped; -static pid_t dead_child[MAX_CHILDREN]; - -/* These are updated by the main loop */ -static unsigned int children_spawned; -static unsigned int children_deleted; +static unsigned int live_children; static struct child { + struct child*next; pid_t pid; - int addrlen; struct sockaddr_storage address; -} live_child[MAX_CHILDREN]; +} *firstborn; -static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen) +static void add_child(pid_t pid, struct sockaddr *addr, int addrlen) { - live_child[idx].pid = pid; - live_child[idx].addrlen = addrlen; - memcpy(&live_child[idx].address, addr, addrlen); + struct child*newborn; + newborn = xcalloc(1, sizeof *newborn); + if (newborn) { + struct child **cradle, *blanket; + + live_children++; + newborn->pid = pid; + memcpy(&newborn->address, addr, addrlen); + for (cradle = &firstborn; + (blanket = *cradle); + cradle = &blanket->next) + if (!memcmp(&blanket->address, &newborn->address, + sizeof newborn->address)) + break; + newborn->next = blanket; + *cradle = newborn; + } + else + logerror("Out of memory spawning new child"); } /* @@ -626,142 +623,78 @@ static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen) * We move everything up by one, since the new "deleted" will * be one higher. */ -static void remove_child(pid_t pid, unsigned deleted, unsigned spawned) +static void remove_child(pid_t pid) { - struct child n; + struct child **cradle, *blanket; - deleted %= MAX_CHILDREN; - spawned %= MAX_CHILDREN; - if (live_child[deleted].pid == pid) { - live_child[deleted].pid = -1; - return; - } - n = live_child[deleted]; - for (;;) { - struct child m; - deleted = (deleted + 1) % MAX_CHILDREN; - if (deleted == spawned) - die("could not find dead child %d\n", pid); - m = live_child[deleted]; - live_child[deleted] = n; - if (m.pid == pid) - return; - n = m; - } + for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next) + if (blanket->pid == pid) { + *cradle = blanket->next; + live_children--; + free(blanket); + break; + } } /* * This gets called if the number of connections grows * past "max_connections". * - * We _should_ start off by searching for connections - * from the same IP, and if there is some address wth - * multiple connections, we should kill that first. - * - * As it is, we just "randomly" kill 25% of the connections, - * and our pseudo-random generator sucks too. I have no - * shame. - * - * Really, this is just a place-holder for a _real_ algorithm. + * We kill the newest connection from a duplicate IP. */ -static void kill_some_children(int signo, unsigned start, unsigned stop) +static void kill_some_child() { - start %= MAX_CHILDREN; - stop %= MAX_CHILDREN; - while (start != stop) { - if (!(start & 3)) - kill(live_child[start].pid, signo); - start = (start + 1) % MAX_CHILDREN; - } -} - -static void check_dead_children(void) -{ - unsigned spawned, reaped, deleted; - - for (;;) { - int status; - pid_t pid = waitpid(-1, &status, WNOHANG); - - if (pid > 0) { - unsigned reaped = children_reaped; - if (!WIFEXITED(status) || WEXITSTATUS(status) > 0) - pid = -pid; - dead_child[reaped % MAX_CHILDREN] = pid; - children_reaped = reaped + 1; - continue; - } - break; - } + const struct child *blanket; - spawned = children_spawned; - reaped = children_reaped; - deleted = children_deleted; + if ((blanket = firstborn)) { + const struct child *next; - while (deleted < reaped) { - pid_t pid = dead_child[deleted % MAX_CHILDREN]; - const char *dead = pid < 0 ? " (with error)" : ""; - - if (pid < 0) - pid = -pid; - - /* XXX: Custom logging, since we don't wanna getpid() */ - if (verbose) { - if (log_syslog) - syslog(LOG_INFO, "[%d] Disconnected%s", - pid, dead); - else - fprintf(stderr, "[%d] Disconnected%s\n", - pid, dead); - } - remove_child(pid, deleted, spawned); - deleted++; + for (; (next = blanket->next); blanket = next) + if (!memcmp(&blanket->address, &next->address, + sizeof next->address)) { + kill(blanket->pid, SIGTERM); + break; + } } - children_deleted = deleted; } -static void check_max_connections(void) +static void check_dead_children(void) { - for (;;) { - int active; - unsigned spawned, deleted; - - check_dead_children(); - - spawned = children_spawned; - deleted = children_deleted; - - active = spawned - deleted; - if (active <= max_connections) - break; - - /* Kill some unstarted connections with SIGTERM */ - kill_some_children(SIGTERM, deleted, spawned); - if (active <= max_connections << 1) - break; + int status; + pid_t pid; - /* If the SIGTERM thing isn't helping use SIGKILL */ - kill_some_children(SIGKILL, deleted, spawned); - sleep(1); + while ((pid = waitpid(-1, &status, WNOHANG))>0) { + const char *dead = ""; + remove_child(pid); + if (!WIFEXITED(status) || WEXITSTATUS(status) > 0) + dead = " (with error)"; + loginfo("[%d] Disconnected%s", (int)pid, dead); } } static void handle(int incoming, struct sockaddr *addr, int addrlen) { - pid_t pid = fork(); + pid_t pid; - if (pid) { - unsigned idx; + if (max_connections && live_children >= max_connections) { + kill_some_child(); + sleep(1); /* give it some time to die */ + check_dead_children(); + if (live_children >= max_connections) { + close(incoming); + logerror("Too many children, dropping connection"); + return; + } + } + if ((pid = fork())) { close(incoming); - if (pid < 0) + if (pid < 0) { + logerror("Couldn't fork %s", strerror(errno)); return; + } - idx = children_spawned % MAX_CHILDREN; - children_spawned++; - add_child(idx, pid, addr, addrlen); - - check_max_connections(); + add_child(pid, addr, addrlen); return; } @@ -1081,6 +1014,12 @@ int main(int argc, char **argv) init_timeout = atoi(arg+15); continue; } + if (!prefixcmp(arg, "--max-connections=")) { + max_connections = atoi(arg+18); + if (max_connections < 0) + max_connections = 0; /* unlimited */ + continue; + } if (!strcmp(arg, "--strict-paths")) { strict_paths = 1; continue; ^ permalink raw reply related [flat|nested] 16+ messages in thread
* [PATCH] git-daemon: rewrite kindergarden, new option --max-connections 2008-08-13 9:00 ` [PATCH] " Stephen R. van den Berg @ 2008-08-13 10:40 ` Stephen R. van den Berg 0 siblings, 0 replies; 16+ messages in thread From: Stephen R. van den Berg @ 2008-08-13 10:40 UTC (permalink / raw) To: git Get rid of the silly fixed array of children and make max-connections dynamic and configurable in the process. Fix the killing code to actually be smart instead of the pseudo-random mess. Avoid forking if too busy already. Signed-off-by: Stephen R. van den Berg <srb@cuci.nl> --- Documentation/git-daemon.txt | 9 +- daemon.c | 213 +++++++++++++++--------------------------- 2 files changed, 82 insertions(+), 140 deletions(-) diff --git a/Documentation/git-daemon.txt b/Documentation/git-daemon.txt index 4ba4b75..b08a08c 100644 --- a/Documentation/git-daemon.txt +++ b/Documentation/git-daemon.txt @@ -9,8 +9,9 @@ SYNOPSIS -------- [verse] 'git daemon' [--verbose] [--syslog] [--export-all] - [--timeout=n] [--init-timeout=n] [--strict-paths] - [--base-path=path] [--user-path | --user-path=path] + [--timeout=n] [--init-timeout=n] [--max-connections=n] + [--strict-paths] [--base-path=path] [--base-path-relaxed] + [--user-path | --user-path=path] [--interpolated-path=pathtemplate] [--reuseaddr] [--detach] [--pid-file=file] [--enable=service] [--disable=service] @@ -99,6 +100,10 @@ OPTIONS it takes for the server to process the sub-request and time spent waiting for next client's request. +--max-connections:: + Maximum number of concurrent clients, defaults to 32. Set it to + zero for no limit. + --syslog:: Log to syslog instead of stderr. Note that this option does not imply --verbose, thus by default only error conditions will be logged. diff --git a/daemon.c b/daemon.c index 19d620c..05a4328 100644 --- a/daemon.c +++ b/daemon.c @@ -19,8 +19,8 @@ static int reuseaddr; static const char daemon_usage[] = "git daemon [--verbose] [--syslog] [--export-all]\n" -" [--timeout=n] [--init-timeout=n] [--strict-paths]\n" -" [--base-path=path] [--base-path-relaxed]\n" +" [--timeout=n] [--init-timeout=n] [--max-connections=n]\n" +" [--strict-paths] [--base-path=path] [--base-path-relaxed]\n" " [--user-path | --user-path=path]\n" " [--interpolated-path=path]\n" " [--reuseaddr] [--detach] [--pid-file=file]\n" @@ -584,40 +584,35 @@ static int execute(struct sockaddr *addr) return -1; } +static int max_connections = 32; -/* - * We count spawned/reaped separately, just to avoid any - * races when updating them from signals. The SIGCHLD handler - * will only update children_reaped, and the fork logic will - * only update children_spawned. - * - * MAX_CHILDREN should be a power-of-two to make the modulus - * operation cheap. It should also be at least twice - * the maximum number of connections we will ever allow. - */ -#define MAX_CHILDREN 128 - -static int max_connections = 25; - -/* These are updated by the signal handler */ -static volatile unsigned int children_reaped; -static pid_t dead_child[MAX_CHILDREN]; - -/* These are updated by the main loop */ -static unsigned int children_spawned; -static unsigned int children_deleted; +static unsigned int live_children; static struct child { + struct child *next; pid_t pid; - int addrlen; struct sockaddr_storage address; -} live_child[MAX_CHILDREN]; +} *firstborn; -static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen) +static void add_child(pid_t pid, struct sockaddr *addr, int addrlen) { - live_child[idx].pid = pid; - live_child[idx].addrlen = addrlen; - memcpy(&live_child[idx].address, addr, addrlen); + struct child *newborn; + newborn = xcalloc(1, sizeof *newborn); + if (newborn) { + struct child **cradle; + + live_children++; + newborn->pid = pid; + memcpy(&newborn->address, addr, addrlen); + for (cradle = &firstborn; *cradle; cradle = &(*cradle)->next) + if (!memcmp(&(*cradle)->address, &newborn->address, + sizeof newborn->address)) + break; + newborn->next = *cradle; + *cradle = newborn; + } + else + logerror("Out of memory spawning new child"); } /* @@ -626,142 +621,78 @@ static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen) * We move everything up by one, since the new "deleted" will * be one higher. */ -static void remove_child(pid_t pid, unsigned deleted, unsigned spawned) +static void remove_child(pid_t pid) { - struct child n; + struct child **cradle, *blanket; - deleted %= MAX_CHILDREN; - spawned %= MAX_CHILDREN; - if (live_child[deleted].pid == pid) { - live_child[deleted].pid = -1; - return; - } - n = live_child[deleted]; - for (;;) { - struct child m; - deleted = (deleted + 1) % MAX_CHILDREN; - if (deleted == spawned) - die("could not find dead child %d\n", pid); - m = live_child[deleted]; - live_child[deleted] = n; - if (m.pid == pid) - return; - n = m; - } + for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next) + if (blanket->pid == pid) { + *cradle = blanket->next; + live_children--; + free(blanket); + break; + } } /* * This gets called if the number of connections grows * past "max_connections". * - * We _should_ start off by searching for connections - * from the same IP, and if there is some address wth - * multiple connections, we should kill that first. - * - * As it is, we just "randomly" kill 25% of the connections, - * and our pseudo-random generator sucks too. I have no - * shame. - * - * Really, this is just a place-holder for a _real_ algorithm. + * We kill the newest connection from a duplicate IP. */ -static void kill_some_children(int signo, unsigned start, unsigned stop) +static void kill_some_child() { - start %= MAX_CHILDREN; - stop %= MAX_CHILDREN; - while (start != stop) { - if (!(start & 3)) - kill(live_child[start].pid, signo); - start = (start + 1) % MAX_CHILDREN; - } -} - -static void check_dead_children(void) -{ - unsigned spawned, reaped, deleted; - - for (;;) { - int status; - pid_t pid = waitpid(-1, &status, WNOHANG); - - if (pid > 0) { - unsigned reaped = children_reaped; - if (!WIFEXITED(status) || WEXITSTATUS(status) > 0) - pid = -pid; - dead_child[reaped % MAX_CHILDREN] = pid; - children_reaped = reaped + 1; - continue; - } - break; - } + const struct child *blanket; - spawned = children_spawned; - reaped = children_reaped; - deleted = children_deleted; + if ((blanket = firstborn)) { + const struct child *next; - while (deleted < reaped) { - pid_t pid = dead_child[deleted % MAX_CHILDREN]; - const char *dead = pid < 0 ? " (with error)" : ""; - - if (pid < 0) - pid = -pid; - - /* XXX: Custom logging, since we don't wanna getpid() */ - if (verbose) { - if (log_syslog) - syslog(LOG_INFO, "[%d] Disconnected%s", - pid, dead); - else - fprintf(stderr, "[%d] Disconnected%s\n", - pid, dead); - } - remove_child(pid, deleted, spawned); - deleted++; + for (; (next = blanket->next); blanket = next) + if (!memcmp(&blanket->address, &next->address, + sizeof next->address)) { + kill(blanket->pid, SIGTERM); + break; + } } - children_deleted = deleted; } -static void check_max_connections(void) +static void check_dead_children(void) { - for (;;) { - int active; - unsigned spawned, deleted; - - check_dead_children(); - - spawned = children_spawned; - deleted = children_deleted; - - active = spawned - deleted; - if (active <= max_connections) - break; - - /* Kill some unstarted connections with SIGTERM */ - kill_some_children(SIGTERM, deleted, spawned); - if (active <= max_connections << 1) - break; + int status; + pid_t pid; - /* If the SIGTERM thing isn't helping use SIGKILL */ - kill_some_children(SIGKILL, deleted, spawned); - sleep(1); + while ((pid = waitpid(-1, &status, WNOHANG))>0) { + const char *dead = ""; + remove_child(pid); + if (!WIFEXITED(status) || WEXITSTATUS(status) > 0) + dead = " (with error)"; + loginfo("[%d] Disconnected%s", (int)pid, dead); } } static void handle(int incoming, struct sockaddr *addr, int addrlen) { - pid_t pid = fork(); + pid_t pid; - if (pid) { - unsigned idx; + if (max_connections && live_children >= max_connections) { + kill_some_child(); + sleep(1); /* give it some time to die */ + check_dead_children(); + if (live_children >= max_connections) { + close(incoming); + logerror("Too many children, dropping connection"); + return; + } + } + if ((pid = fork())) { close(incoming); - if (pid < 0) + if (pid < 0) { + logerror("Couldn't fork %s", strerror(errno)); return; + } - idx = children_spawned % MAX_CHILDREN; - children_spawned++; - add_child(idx, pid, addr, addrlen); - - check_max_connections(); + add_child(pid, addr, addrlen); return; } @@ -1081,6 +1012,12 @@ int main(int argc, char **argv) init_timeout = atoi(arg+15); continue; } + if (!prefixcmp(arg, "--max-connections=")) { + max_connections = atoi(arg+18); + if (max_connections < 0) + max_connections = 0; /* unlimited */ + continue; + } if (!strcmp(arg, "--strict-paths")) { strict_paths = 1; continue; ^ permalink raw reply related [flat|nested] 16+ messages in thread
* Re: [PATCH 3/3] git-daemon: rewrite kindergarden 2008-08-13 8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg 2008-08-13 8:58 ` Stephen R. van den Berg 2008-08-13 9:00 ` [PATCH] " Stephen R. van den Berg @ 2008-08-13 9:05 ` Petr Baudis 2008-08-13 10:37 ` Stephen R. van den Berg 2 siblings, 1 reply; 16+ messages in thread From: Petr Baudis @ 2008-08-13 9:05 UTC (permalink / raw) To: Stephen R. van den Berg; +Cc: git On Wed, Aug 13, 2008 at 10:43:31AM +0200, Stephen R. van den Berg wrote: > Get rid of the silly fixed array of children and make > max-connections dynamic and configurable in the process. > Fix the killing code to actually be smart instead of the > pseudo-random mess. > Avoid forking if too busy already. > > Signed-off-by: Stephen R. van den Berg <srb@cuci.nl> I would somehow mention the string '--max-connections' in the log message; that is really useful when looking when some option was introduced. > diff --git a/daemon.c b/daemon.c > index 19d620c..a7a735c 100644 > --- a/daemon.c > +++ b/daemon.c > @@ -584,40 +584,37 @@ static int execute(struct sockaddr *addr) > return -1; > } > > +static int max_connections = 32; > > -/* > - * We count spawned/reaped separately, just to avoid any > - * races when updating them from signals. The SIGCHLD handler > - * will only update children_reaped, and the fork logic will > - * only update children_spawned. > - * > - * MAX_CHILDREN should be a power-of-two to make the modulus > - * operation cheap. It should also be at least twice > - * the maximum number of connections we will ever allow. > - */ > -#define MAX_CHILDREN 128 > - > -static int max_connections = 25; > - > -/* These are updated by the signal handler */ > -static volatile unsigned int children_reaped; > -static pid_t dead_child[MAX_CHILDREN]; > - > -/* These are updated by the main loop */ > -static unsigned int children_spawned; > -static unsigned int children_deleted; > +static unsigned int live_children; > > static struct child { > + struct child*next; struct child *next; > pid_t pid; > - int addrlen; > struct sockaddr_storage address; > -} live_child[MAX_CHILDREN]; > +} *firstborn; > > -static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen) > +static void add_child(pid_t pid, struct sockaddr *addr, int addrlen) > { > - live_child[idx].pid = pid; > - live_child[idx].addrlen = addrlen; > - memcpy(&live_child[idx].address, addr, addrlen); > + struct child*newborn; struct child *newborn; > + newborn = xcalloc(1, sizeof *newborn); > + if (newborn) { > + struct child **cradle, *blanket; > + > + live_children++; > + newborn->pid = pid; > + memcpy(&newborn->address, addr, addrlen); > + for (cradle = &firstborn; > + (blanket = *cradle); > + cradle = &blanket->next) > + if (!memcmp(&blanket->address, &newborn->address, > + sizeof newborn->address)) > + break; > + newborn->next = blanket; > + *cradle = newborn; So, I can always excuse myself by mentioning that it's early in the morning (for me ;-) but what do you actually need the blanket for, in this warm digital world? The current for statement is *really* cryptic... What about: + struct child **cradle; + + live_children++; + newborn->pid = pid; + memcpy(&newborn->address, addr, addrlen); + for (cradle = &firstborn; *cradle; cradle = &(*cradle)->next) + if (!memcmp(&(*cradle)->address, &newborn->address, + sizeof newborn->address)) + break; + newborn->next = *cradle; + *cradle = newborn; > + } > + else > + logerror("Out of memory spawning new child"); > } > > /* > @@ -626,142 +623,78 @@ static void add_child(int idx, pid_t pid, struct sockaddr *addr, int addrlen) > * We move everything up by one, since the new "deleted" will > * be one higher. > */ > -static void remove_child(pid_t pid, unsigned deleted, unsigned spawned) > +static void remove_child(pid_t pid) > { > - struct child n; > + struct child **cradle, *blanket; > > + for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next) > + if (blanket->pid == pid) { > + *cradle = blanket->next; > + live_children--; > + free(blanket); > + break; > + } > } Same here. You just need a temporary variable in the innermost block. > +static void kill_some_child() > { > + const struct child *blanket; > > + if ((blanket = firstborn)) { > + const struct child *next; > > + for (; (next = blanket->next); blanket = next) > + if (!memcmp(&blanket->address, &next->address, > + sizeof next->address)) { > + kill(blanket->pid, SIGTERM); > + break; > + } I think using cradle instead of blanket in this for loop would be more consistent, if perhaps somewhat more morbid. -- Petr "Pasky" Baudis The next generation of interesting software will be done on the Macintosh, not the IBM PC. -- Bill Gates ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 3/3] git-daemon: rewrite kindergarden 2008-08-13 9:05 ` [PATCH 3/3] git-daemon: rewrite kindergarden Petr Baudis @ 2008-08-13 10:37 ` Stephen R. van den Berg 0 siblings, 0 replies; 16+ messages in thread From: Stephen R. van den Berg @ 2008-08-13 10:37 UTC (permalink / raw) To: Petr Baudis; +Cc: git Petr Baudis wrote: >On Wed, Aug 13, 2008 at 10:43:31AM +0200, Stephen R. van den Berg wrote: >I would somehow mention the string '--max-connections' in the log >message; that is really useful when looking when some option was >introduced. Fixed. >> + struct child*next; >struct child *next; >> + struct child*newborn; >struct child *newborn; Fixed. >So, I can always excuse myself by mentioning that it's early in the >morning (for me ;-) but what do you actually need the blanket for, in >this warm digital world? This seems to be a case of code restructuring and forgetting to optimise. There was a reason for the blanket before, but I am warming up to the idea to go without blanket in this case. >The current for statement is *really* cryptic... What about: Probably ok, I'll refactor it accordingly. >> -static void remove_child(pid_t pid, unsigned deleted, unsigned spawned) >> +static void remove_child(pid_t pid) >> { >> - struct child n; >> + struct child **cradle, *blanket; >> + for (cradle = &firstborn; (blanket = *cradle); cradle = &blanket->next) >> + if (blanket->pid == pid) { >> + *cradle = blanket->next; >> + live_children--; >> + free(blanket); >> + break; >> + } >Same here. You just need a temporary variable in the innermost block. Yes, but here using the blanket eliminates some * operators. So I'd prefer to keep it. >> +static void kill_some_child() >> { >> + const struct child *blanket; >> + if ((blanket = firstborn)) { >> + const struct child *next; >> + for (; (next = blanket->next); blanket = next) >> + if (!memcmp(&blanket->address, &next->address, >> + sizeof next->address)) { >> + kill(blanket->pid, SIGTERM); >> + break; >> + } >I think using cradle instead of blanket in this for loop would be more >consistent, if perhaps somewhat more morbid. I kept the cradle for the double indirections (you have to actually reach deeper into a cradle), a blanket can be touched without reaching deep, so I'd prefer to keep a blanket here. -- Sincerely, Stephen R. van den Berg. "And now for something *completely* different!" ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/3] git-daemon: logging done right 2008-08-13 8:43 [PATCH 1/3] git-daemon: logging done right Stephen R. van den Berg 2008-08-13 8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg 2008-08-13 8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg @ 2008-08-13 23:13 ` Junio C Hamano 2 siblings, 0 replies; 16+ messages in thread From: Junio C Hamano @ 2008-08-13 23:13 UTC (permalink / raw) To: Stephen R. van den Berg; +Cc: git "Stephen R. van den Berg" <srb@cuci.nl> writes: > Make git-daemon a proper syslogging citizen with PID-info. In general, please try to avoid using the wording like that in the commit log message, _without_ defining what you think is the proper way and why you think the current behaviour is improper. Do you mean "if we use syslog(), we can ask it to add pid information to the output and we do not have to prepend pid information ourselves"? For the same reason, "logging done right" is somewhat a suboptimal title for the patch. If the title describes concisely the reason why the new way was chosen by you over the old way, and the readers can judge for themselves if they agree with your reasoning and if the new way is better. For example, you could have said "git-daemon: let syslog() to add our pid to the messages". Yes, it is _not_ the only change you are making with this patch, and the example message won't describe what you did fully. It may be an indication that you are doing too many things in one patch, unless other changes are "well, they are not a big deal but while we are at it why not fix them" kind of changes. > Simplify the overzealous double buffering in the logroutine. Is there any overzealous double buffering involved? I thought it just does s*printf() twice into the single buf[]? Are you referring to the trick of setting stderr to line-buffered output? It does remove the need for these two s*printf(), and is an improvement. I am however not as sure as you seem to be that these two changes make the difference between "done right" and "done wrong" --- at most I'd say that these fall into "improve the way the log is done" category. > Call logerror() instead of error(). Calls to die() are covered by setting die-routine to daemon_die(), but the error() calls are lost to bitbucket. This is a real fix to keep otherwise lost error message to the log stream. Don't you want to also send the "poll failed" error to the log stream as well? Thanks. ^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2008-08-14 13:37 UTC | newest] Thread overview: 16+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2008-08-13 8:43 [PATCH 1/3] git-daemon: logging done right Stephen R. van den Berg 2008-08-13 8:43 ` [PATCH 2/3] git-daemon: make the signal handler almost a no-op Stephen R. van den Berg 2008-08-14 0:09 ` Junio C Hamano 2008-08-14 0:18 ` Stephen R. van den Berg 2008-08-14 1:09 ` Junio C Hamano 2008-08-14 7:47 ` Stephen R. van den Berg 2008-08-14 8:26 ` Junio C Hamano 2008-08-14 10:13 ` Stephen R. van den Berg 2008-08-14 13:41 ` Johannes Schindelin 2008-08-13 8:43 ` [PATCH 3/3] git-daemon: rewrite kindergarden Stephen R. van den Berg 2008-08-13 8:58 ` Stephen R. van den Berg 2008-08-13 9:00 ` [PATCH] " Stephen R. van den Berg 2008-08-13 10:40 ` [PATCH] git-daemon: rewrite kindergarden, new option --max-connections Stephen R. van den Berg 2008-08-13 9:05 ` [PATCH 3/3] git-daemon: rewrite kindergarden Petr Baudis 2008-08-13 10:37 ` Stephen R. van den Berg 2008-08-13 23:13 ` [PATCH 1/3] git-daemon: logging done right Junio C Hamano
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).