* [PATCH] Limit file descriptors used by packs
@ 2011-02-28 20:27 Shawn O. Pearce
2011-02-28 20:35 ` Bernhard R. Link
2011-02-28 20:38 ` Junio C Hamano
0 siblings, 2 replies; 10+ messages in thread
From: Shawn O. Pearce @ 2011-02-28 20:27 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Rather than using 'errno == EMFILE' after a failed open() call
to indicate the process is out of file descriptors and an LRU
pack window should be closed, place a hard upper limit on the
number of open packs based on the actual rlimit of the process.
By using a hard upper limit that is below the rlimit of the current
process, it is not necessary to check for EMFILE on every single
fd-allocating system call. Instead reserving 8 file descriptors
makes it safe to assume the system call won't fail due to being
over limit in the filedescriptor limit.
This fixes a case where running `git gc --auto` in a repository
with more than 1024 packs (but an rlimit of 1024 open fds) fails
due to the temporary output file not being able to allocate a
file descriptor. The output file is opened by pack-objects after
object enumeration and delete compression are done, both of which
have already opened all of the packs and fully populated the file
descriptor table.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
sha1_file.c | 49 ++++++++++++++++++++++++++++++++++++-------------
1 files changed, 36 insertions(+), 13 deletions(-)
diff --git a/sha1_file.c b/sha1_file.c
index d949b35..8863ff6 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -418,6 +418,8 @@ static unsigned int pack_used_ctr;
static unsigned int pack_mmap_calls;
static unsigned int peak_pack_open_windows;
static unsigned int pack_open_windows;
+static unsigned int pack_open_fds;
+static unsigned int pack_max_fds;
static size_t peak_pack_mapped;
static size_t pack_mapped;
struct packed_git *packed_git;
@@ -597,6 +599,7 @@ static int unuse_one_window(struct packed_git *current, int keep_fd)
lru_p->windows = lru_w->next;
if (!lru_p->windows && lru_p->pack_fd != keep_fd) {
close(lru_p->pack_fd);
+ pack_open_fds--;
lru_p->pack_fd = -1;
}
}
@@ -681,8 +684,10 @@ void free_pack_by_name(const char *pack_name)
if (strcmp(pack_name, p->pack_name) == 0) {
clear_delta_base_cache();
close_pack_windows(p);
- if (p->pack_fd != -1)
+ if (p->pack_fd != -1) {
close(p->pack_fd);
+ pack_open_fds--;
+ }
close_pack_index(p);
free(p->bad_object_sha1);
*pp = p->next;
@@ -708,9 +713,35 @@ static int open_packed_git_1(struct packed_git *p)
if (!p->index_data && open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
+ if (!pack_max_fds) {
+ struct rlimit lim;
+ unsigned int max_fds;
+
+ if (getrlimit(RLIMIT_NOFILE, &lim))
+ die_errno("cannot get RLIMIT_NOFILE");
+
+ max_fds = lim.rlim_cur;
+
+ if (lim.rlim_cur < lim.rlim_max) {
+ lim.rlim_cur = lim.rlim_max;
+ if (!setrlimit(RLIMIT_NOFILE, &lim))
+ max_fds = lim.rlim_max;
+ }
+
+ /* Save 3 for stdin/stdout/stderr, 5 for work */
+ if (8 < max_fds)
+ pack_max_fds = max_fds - 8;
+ else
+ pack_max_fds = 1;
+ }
+
+ while (pack_max_fds <= pack_open_fds && unuse_one_window(NULL, -1))
+ /* nothing */;
+
p->pack_fd = git_open_noatime(p->pack_name, p);
if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
return -1;
+ pack_open_fds++;
/* If we created the struct before we had the pack we lack size. */
if (!p->pack_size) {
@@ -762,6 +793,7 @@ static int open_packed_git(struct packed_git *p)
return 0;
if (p->pack_fd != -1) {
close(p->pack_fd);
+ pack_open_fds--;
p->pack_fd = -1;
}
return -1;
@@ -919,6 +951,9 @@ struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
void install_packed_git(struct packed_git *pack)
{
+ if (pack->pack_fd != -1)
+ pack_open_fds++;
+
pack->next = packed_git;
packed_git = pack;
}
@@ -936,8 +971,6 @@ static void prepare_packed_git_one(char *objdir, int local)
sprintf(path, "%s/pack", objdir);
len = strlen(path);
dir = opendir(path);
- while (!dir && errno == EMFILE && unuse_one_window(NULL, -1))
- dir = opendir(path);
if (!dir) {
if (errno != ENOENT)
error("unable to open object pack directory: %s: %s",
@@ -1093,14 +1126,6 @@ static int git_open_noatime(const char *name, struct packed_git *p)
if (fd >= 0)
return fd;
- /* Might the failure be insufficient file descriptors? */
- if (errno == EMFILE) {
- if (unuse_one_window(p, -1))
- continue;
- else
- return -1;
- }
-
/* Might the failure be due to O_NOATIME? */
if (errno != ENOENT && sha1_file_open_flag) {
sha1_file_open_flag = 0;
@@ -2360,8 +2385,6 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
filename = sha1_file_name(sha1);
fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
- while (fd < 0 && errno == EMFILE && unuse_one_window(NULL, -1))
- fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
if (fd < 0) {
if (errno == EACCES)
return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
--
1.7.4.1.249.g4aa7
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] Limit file descriptors used by packs
2011-02-28 20:27 [PATCH] Limit file descriptors used by packs Shawn O. Pearce
@ 2011-02-28 20:35 ` Bernhard R. Link
2011-02-28 20:44 ` Shawn O. Pearce
2011-02-28 20:38 ` Junio C Hamano
1 sibling, 1 reply; 10+ messages in thread
From: Bernhard R. Link @ 2011-02-28 20:35 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: Junio C Hamano, git
* Shawn O. Pearce <spearce@spearce.org> [110228 21:27]:
> By using a hard upper limit that is below the rlimit of the current
> process, it is not necessary to check for EMFILE on every single
> fd-allocating system call. Instead reserving 8 file descriptors
> makes it safe to assume the system call won't fail due to being
> over limit in the filedescriptor limit.
Isn't 8 quite a bit low for a reserve? Couldn't some libc stuff
(especially nss modules perhaps activated by something) easily surpass
that?
Bernhard R. Link
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Limit file descriptors used by packs
2011-02-28 20:27 [PATCH] Limit file descriptors used by packs Shawn O. Pearce
2011-02-28 20:35 ` Bernhard R. Link
@ 2011-02-28 20:38 ` Junio C Hamano
2011-02-28 20:47 ` Shawn O. Pearce
1 sibling, 1 reply; 10+ messages in thread
From: Junio C Hamano @ 2011-02-28 20:38 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: git
"Shawn O. Pearce" <spearce@spearce.org> writes:
> ... The output file is opened by pack-objects after
> object enumeration and delete compression are done, ...
s/delete/deflate/, I guess.
> diff --git a/sha1_file.c b/sha1_file.c
> index d949b35..8863ff6 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -708,9 +713,35 @@ static int open_packed_git_1(struct packed_git *p)
> if (!p->index_data && open_pack_index(p))
> return error("packfile %s index unavailable", p->pack_name);
>
> + if (!pack_max_fds) {
> + ...
> + if (lim.rlim_cur < lim.rlim_max) {
> + lim.rlim_cur = lim.rlim_max;
> + if (!setrlimit(RLIMIT_NOFILE, &lim))
> + max_fds = lim.rlim_max;
> + }
This is somewhat questionable, isn't it? We don't know why the user chose
to ulimit the process yet forcibly bust that limit without telling him?
Other than that it looks sensible. Thanks.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Limit file descriptors used by packs
2011-02-28 20:35 ` Bernhard R. Link
@ 2011-02-28 20:44 ` Shawn O. Pearce
0 siblings, 0 replies; 10+ messages in thread
From: Shawn O. Pearce @ 2011-02-28 20:44 UTC (permalink / raw)
To: Bernhard R. Link; +Cc: Junio C Hamano, git
"Bernhard R. Link" <brl+ccmadness@pcpool00.mathematik.uni-freiburg.de> wrote:
> * Shawn O. Pearce <spearce@spearce.org> [110228 21:27]:
> > By using a hard upper limit that is below the rlimit of the current
> > process, it is not necessary to check for EMFILE on every single
> > fd-allocating system call. Instead reserving 8 file descriptors
> > makes it safe to assume the system call won't fail due to being
> > over limit in the filedescriptor limit.
>
> Isn't 8 quite a bit low for a reserve? Couldn't some libc stuff
> (especially nss modules perhaps activated by something) easily surpass
> that?
Originally I proposed 25 to Junio, but he scoffed and said that
was quite high. So I went with 8, 3 for std{in,out,err} and 5 as
a WAG for everything else.
Its arbitrary, 25 might be a better WAG than 8...
--
Shawn.
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Limit file descriptors used by packs
2011-02-28 20:38 ` Junio C Hamano
@ 2011-02-28 20:47 ` Shawn O. Pearce
2011-02-28 20:52 ` [PATCH v2] " Shawn O. Pearce
2011-03-01 14:24 ` [PATCH] " Junio C Hamano
0 siblings, 2 replies; 10+ messages in thread
From: Shawn O. Pearce @ 2011-02-28 20:47 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano <gitster@pobox.com> wrote:
> "Shawn O. Pearce" <spearce@spearce.org> writes:
>
> > ... The output file is opened by pack-objects after
> > object enumeration and delete compression are done, ...
>
> s/delete/deflate/, I guess.
s/delete/delta/ is what I meant. "Compressing objects" is the
delta compression phase. The fact that we save some small deltas in
deflated format during this phase is an uninteresting implementation
detail not worth mentioning in the comments.
> > diff --git a/sha1_file.c b/sha1_file.c
> > index d949b35..8863ff6 100644
> > --- a/sha1_file.c
> > +++ b/sha1_file.c
> > @@ -708,9 +713,35 @@ static int open_packed_git_1(struct packed_git *p)
> > if (!p->index_data && open_pack_index(p))
> > return error("packfile %s index unavailable", p->pack_name);
> >
> > + if (!pack_max_fds) {
> > + ...
> > + if (lim.rlim_cur < lim.rlim_max) {
> > + lim.rlim_cur = lim.rlim_max;
> > + if (!setrlimit(RLIMIT_NOFILE, &lim))
> > + max_fds = lim.rlim_max;
> > + }
>
> This is somewhat questionable, isn't it? We don't know why the user chose
> to ulimit the process yet forcibly bust that limit without telling him?
Maybe you are right.
In network server code is somewhat common to push the rlim_cur to
rlim_max if its not already there, since you might need to use a
lot of fds to handle a lot of concurrent clients. So habit sort of
caused me to just do this out of instinct.
In this particular part of C Git, if we are bumping up against the
hard pack_max_fds limit we're already into some pretty difficult
computation. Trying to push the rlimit higher in order to avoid
close/open calls as we cycle through fds isn't really going to make
a huge difference on end-user latency for the command to finish
its task. So maybe we are better off honoring the rlim_cur that we
inherited from the user/environment.
I'll respin a v2 for you.
--
Shawn.
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2] Limit file descriptors used by packs
2011-02-28 20:47 ` Shawn O. Pearce
@ 2011-02-28 20:52 ` Shawn O. Pearce
2011-02-28 21:13 ` Erik Faye-Lund
2011-03-01 14:24 ` [PATCH] " Junio C Hamano
1 sibling, 1 reply; 10+ messages in thread
From: Shawn O. Pearce @ 2011-02-28 20:52 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Rather than using 'errno == EMFILE' after a failed open() call
to indicate the process is out of file descriptors and an LRU
pack window should be closed, place a hard upper limit on the
number of open packs based on the actual rlimit of the process.
By using a hard upper limit that is below the rlimit of the current
process it is not necessary to check for EMFILE on every single
fd-allocating system call. Instead reserving 25 file descriptors
makes it safe to assume the system call won't fail due to being over
the filedescriptor limit. Here 25 is chosen as a WAG, but considers
3 for stdin/stdout/stderr, and at least a few for other Git code
to operate on temporary files. An additional 20 is reserved as it
is not known what the C library needs to perform other services on
Git's behalf, such as nsswitch or name resolution.
This fixes a case where running `git gc --auto` in a repository
with more than 1024 packs (but an rlimit of 1024 open fds) fails
due to the temporary output file not being able to allocate a
file descriptor. The output file is opened by pack-objects after
object enumeration and delta compression are done, both of which
have already opened all of the packs and fully populated the file
descriptor table.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
sha1_file.c | 43 ++++++++++++++++++++++++++++++-------------
1 files changed, 30 insertions(+), 13 deletions(-)
diff --git a/sha1_file.c b/sha1_file.c
index d949b35..7850c18 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -418,6 +418,8 @@ static unsigned int pack_used_ctr;
static unsigned int pack_mmap_calls;
static unsigned int peak_pack_open_windows;
static unsigned int pack_open_windows;
+static unsigned int pack_open_fds;
+static unsigned int pack_max_fds;
static size_t peak_pack_mapped;
static size_t pack_mapped;
struct packed_git *packed_git;
@@ -597,6 +599,7 @@ static int unuse_one_window(struct packed_git *current, int keep_fd)
lru_p->windows = lru_w->next;
if (!lru_p->windows && lru_p->pack_fd != keep_fd) {
close(lru_p->pack_fd);
+ pack_open_fds--;
lru_p->pack_fd = -1;
}
}
@@ -681,8 +684,10 @@ void free_pack_by_name(const char *pack_name)
if (strcmp(pack_name, p->pack_name) == 0) {
clear_delta_base_cache();
close_pack_windows(p);
- if (p->pack_fd != -1)
+ if (p->pack_fd != -1) {
close(p->pack_fd);
+ pack_open_fds--;
+ }
close_pack_index(p);
free(p->bad_object_sha1);
*pp = p->next;
@@ -708,9 +713,29 @@ static int open_packed_git_1(struct packed_git *p)
if (!p->index_data && open_pack_index(p))
return error("packfile %s index unavailable", p->pack_name);
+ if (!pack_max_fds) {
+ struct rlimit lim;
+ unsigned int max_fds;
+
+ if (getrlimit(RLIMIT_NOFILE, &lim))
+ die_errno("cannot get RLIMIT_NOFILE");
+
+ max_fds = lim.rlim_cur;
+
+ /* Save 3 for stdin/stdout/stderr, 22 for work */
+ if (25 < max_fds)
+ pack_max_fds = max_fds - 25;
+ else
+ pack_max_fds = 1;
+ }
+
+ while (pack_max_fds <= pack_open_fds && unuse_one_window(NULL, -1))
+ /* nothing */;
+
p->pack_fd = git_open_noatime(p->pack_name, p);
if (p->pack_fd < 0 || fstat(p->pack_fd, &st))
return -1;
+ pack_open_fds++;
/* If we created the struct before we had the pack we lack size. */
if (!p->pack_size) {
@@ -762,6 +787,7 @@ static int open_packed_git(struct packed_git *p)
return 0;
if (p->pack_fd != -1) {
close(p->pack_fd);
+ pack_open_fds--;
p->pack_fd = -1;
}
return -1;
@@ -919,6 +945,9 @@ struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path)
void install_packed_git(struct packed_git *pack)
{
+ if (pack->pack_fd != -1)
+ pack_open_fds++;
+
pack->next = packed_git;
packed_git = pack;
}
@@ -936,8 +965,6 @@ static void prepare_packed_git_one(char *objdir, int local)
sprintf(path, "%s/pack", objdir);
len = strlen(path);
dir = opendir(path);
- while (!dir && errno == EMFILE && unuse_one_window(NULL, -1))
- dir = opendir(path);
if (!dir) {
if (errno != ENOENT)
error("unable to open object pack directory: %s: %s",
@@ -1093,14 +1120,6 @@ static int git_open_noatime(const char *name, struct packed_git *p)
if (fd >= 0)
return fd;
- /* Might the failure be insufficient file descriptors? */
- if (errno == EMFILE) {
- if (unuse_one_window(p, -1))
- continue;
- else
- return -1;
- }
-
/* Might the failure be due to O_NOATIME? */
if (errno != ENOENT && sha1_file_open_flag) {
sha1_file_open_flag = 0;
@@ -2360,8 +2379,6 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
filename = sha1_file_name(sha1);
fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
- while (fd < 0 && errno == EMFILE && unuse_one_window(NULL, -1))
- fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
if (fd < 0) {
if (errno == EACCES)
return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
--
1.7.4.1.249.g4aa7
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH v2] Limit file descriptors used by packs
2011-02-28 20:52 ` [PATCH v2] " Shawn O. Pearce
@ 2011-02-28 21:13 ` Erik Faye-Lund
0 siblings, 0 replies; 10+ messages in thread
From: Erik Faye-Lund @ 2011-02-28 21:13 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: Junio C Hamano, git
On Mon, Feb 28, 2011 at 9:52 PM, Shawn O. Pearce <spearce@spearce.org> wrote:
> Rather than using 'errno == EMFILE' after a failed open() call
> to indicate the process is out of file descriptors and an LRU
> pack window should be closed, place a hard upper limit on the
> number of open packs based on the actual rlimit of the process.
>
> By using a hard upper limit that is below the rlimit of the current
> process it is not necessary to check for EMFILE on every single
> fd-allocating system call. Instead reserving 25 file descriptors
> makes it safe to assume the system call won't fail due to being over
> the filedescriptor limit. Here 25 is chosen as a WAG, but considers
> 3 for stdin/stdout/stderr, and at least a few for other Git code
> to operate on temporary files. An additional 20 is reserved as it
> is not known what the C library needs to perform other services on
> Git's behalf, such as nsswitch or name resolution.
>
> This fixes a case where running `git gc --auto` in a repository
> with more than 1024 packs (but an rlimit of 1024 open fds) fails
> due to the temporary output file not being able to allocate a
> file descriptor. The output file is opened by pack-objects after
> object enumeration and delta compression are done, both of which
> have already opened all of the packs and fully populated the file
> descriptor table.
>
> Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
> ---
> sha1_file.c | 43 ++++++++++++++++++++++++++++++-------------
> 1 files changed, 30 insertions(+), 13 deletions(-)
>
> diff --git a/sha1_file.c b/sha1_file.c
> index d949b35..7850c18 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -418,6 +418,8 @@ static unsigned int pack_used_ctr;
> static unsigned int pack_mmap_calls;
> static unsigned int peak_pack_open_windows;
> static unsigned int pack_open_windows;
> +static unsigned int pack_open_fds;
> +static unsigned int pack_max_fds;
> static size_t peak_pack_mapped;
> static size_t pack_mapped;
> struct packed_git *packed_git;
> @@ -597,6 +599,7 @@ static int unuse_one_window(struct packed_git *current, int keep_fd)
> lru_p->windows = lru_w->next;
> if (!lru_p->windows && lru_p->pack_fd != keep_fd) {
> close(lru_p->pack_fd);
> + pack_open_fds--;
> lru_p->pack_fd = -1;
> }
> }
> @@ -681,8 +684,10 @@ void free_pack_by_name(const char *pack_name)
> if (strcmp(pack_name, p->pack_name) == 0) {
> clear_delta_base_cache();
> close_pack_windows(p);
> - if (p->pack_fd != -1)
> + if (p->pack_fd != -1) {
> close(p->pack_fd);
> + pack_open_fds--;
> + }
> close_pack_index(p);
> free(p->bad_object_sha1);
> *pp = p->next;
> @@ -708,9 +713,29 @@ static int open_packed_git_1(struct packed_git *p)
> if (!p->index_data && open_pack_index(p))
> return error("packfile %s index unavailable", p->pack_name);
>
> + if (!pack_max_fds) {
> + struct rlimit lim;
> + unsigned int max_fds;
> +
> + if (getrlimit(RLIMIT_NOFILE, &lim))
> + die_errno("cannot get RLIMIT_NOFILE");
> +
We don't have getrlimit on Windows :(
I guess something like should work, but untested. Limit of 2048 taken from MSDN:
http://msdn.microsoft.com/en-us/library/6e3b887c(v=vs.71).aspx
---8<---
diff --git a/compat/mingw.h b/compat/mingw.h
index 9c00e75..9155ce3 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -234,6 +234,22 @@ int mingw_getpagesize(void);
#define getpagesize mingw_getpagesize
#endif
+struct rlimit {
+ unsigned int rlim_cur;
+};
+#define RLIMIT_NOFILE 0
+
+static inline int getrlimit(int resource, struct rlimit *rlp)
+{
+ if (resource != RLIMIT_NOFILE) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ rlp->rlim_cur = 2048;
+ return 0;
+}
+
/* Use mingw_lstat() instead of lstat()/stat() and
* mingw_fstat() instead of fstat() on Windows.
*/
---8<---
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH] Limit file descriptors used by packs
2011-02-28 20:47 ` Shawn O. Pearce
2011-02-28 20:52 ` [PATCH v2] " Shawn O. Pearce
@ 2011-03-01 14:24 ` Junio C Hamano
2011-03-01 14:58 ` Shawn Pearce
1 sibling, 1 reply; 10+ messages in thread
From: Junio C Hamano @ 2011-03-01 14:24 UTC (permalink / raw)
To: Shawn O. Pearce; +Cc: git
"Shawn O. Pearce" <spearce@spearce.org> writes:
> In this particular part of C Git, if we are bumping up against the
> hard pack_max_fds limit we're already into some pretty difficult
> computation. Trying to push the rlimit higher in order to avoid
> close/open calls as we cycle through fds isn't really going to make
> a huge difference on end-user latency for the command to finish
> its task. So maybe we are better off honoring the rlim_cur that we
> inherited from the user/environment.
Let's step back a bit.
You are holding too many file descriptors open because you have too many
pack-files in your repository.
I am not going to question why they aren't repacked before their number
gets too large---that is not a valid question to ask in the context of
this discussion. But it is a valid question to ask why we need an open
file descriptor for each of them to begin with, and if we really need to,
isn't it?
We keep one file descriptor open for each .pack in which we have pack
window(s). The reason we keep one file descriptor open is because we
might want to mmap() different portions of a .pack file that is already in
use through the file descriptor to open new window(s) on demand.
For a .pack that fits inside a single pack window, however, can't we close
the file descriptor immediately after mmap() it to obtain a sole window
into it? For such a .pack, we would either have one window into it, or
the .pack is not in use and have no window into it. When the number of
windows drops to zero, the current code closes the file descriptor, and
upon next use, we already let the caller access the .pack correctly, so
we already should know when to re-open a file descriptor to it as needed.
I am wondering if it is a viable approach to, inside open_packed_git_1(),
- mark a packfile that is small enough (i.e. st.st_size aka p->pack_size
is smaller than packed_git_window_size) as "persistently mapped";
- keep a window that covers the entire thing in p->windows as a single
and sole window into it for such a pack; and
- close the file descriptor when we did the above
and have use_pack() notice that single window and use it.
This is not an alternate proposal to your patch, but the approach may
alleviate the resource pressure in the first place, no?
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH] Limit file descriptors used by packs
2011-03-01 14:24 ` [PATCH] " Junio C Hamano
@ 2011-03-01 14:58 ` Shawn Pearce
2011-03-02 18:01 ` [PATCH 2/1] sha1_file.c: Don't retain open fds on small packs Shawn O. Pearce
0 siblings, 1 reply; 10+ messages in thread
From: Shawn Pearce @ 2011-03-01 14:58 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
On Tue, Mar 1, 2011 at 06:24, Junio C Hamano <gitster@pobox.com> wrote:
> "Shawn O. Pearce" <spearce@spearce.org> writes:
>
>> In this particular part of C Git, if we are bumping up against the
>> hard pack_max_fds limit we're already into some pretty difficult
>> computation. Trying to push the rlimit higher in order to avoid
>> close/open calls as we cycle through fds isn't really going to make
>> a huge difference on end-user latency for the command to finish
>> its task. So maybe we are better off honoring the rlim_cur that we
>> inherited from the user/environment.
>
> Let's step back a bit.
...
> For a .pack that fits inside a single pack window, however, can't we close
> the file descriptor immediately after mmap() it to obtain a sole window
> into it?
Yes. And its unrelated to this patch. You can still run out of file
descriptors because you have a lot of large packs. :-)
I've considered this in the past, but avoided it because I thought the
unuse_one_window() function might become more complex. But its not, we
can just keep popping windows until the condition is met, which for a
file descriptor is that we are below the limit.
I'll send a follow-up patch that builds on top of this one to close
the pack fd if the entire thing fits into one window.
--
Shawn.
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH 2/1] sha1_file.c: Don't retain open fds on small packs
2011-03-01 14:58 ` Shawn Pearce
@ 2011-03-02 18:01 ` Shawn O. Pearce
0 siblings, 0 replies; 10+ messages in thread
From: Shawn O. Pearce @ 2011-03-02 18:01 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
If a pack file is small enough that its entire contents fits within
one mmap window, mmap the file and then immediately close its file
descriptor. This reduces the number of file descriptors that are
needed to read from repositories with many tiny pack files, such
as one that has received 1000 pushes (and created 1000 small pack
files) since its last repack.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
cache.h | 3 ++-
fast-import.c | 1 +
sha1_file.c | 41 ++++++++++++++++++++++++++++++++++++-----
3 files changed, 39 insertions(+), 6 deletions(-)
diff --git a/cache.h b/cache.h
index 08a9022..1d362c4 100644
--- a/cache.h
+++ b/cache.h
@@ -914,7 +914,8 @@ extern struct packed_git {
time_t mtime;
int pack_fd;
unsigned pack_local:1,
- pack_keep:1;
+ pack_keep:1,
+ do_not_close:1;
unsigned char sha1[20];
/* something like ".git/objects/pack/xxxxx.pack" */
char pack_name[FLEX_ARRAY]; /* more */
diff --git a/fast-import.c b/fast-import.c
index 3886a1b..4916a9d 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -872,6 +872,7 @@ static void start_packfile(void)
p = xcalloc(1, sizeof(*p) + strlen(tmpfile) + 2);
strcpy(p->pack_name, tmpfile);
p->pack_fd = pack_fd;
+ p->do_not_close = 1;
pack_file = sha1fd(pack_fd, p->pack_name);
hdr.hdr_signature = htonl(PACK_SIGNATURE);
diff --git a/sha1_file.c b/sha1_file.c
index 7850c18..fbb178a 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -597,7 +597,8 @@ static int unuse_one_window(struct packed_git *current, int keep_fd)
lru_l->next = lru_w->next;
else {
lru_p->windows = lru_w->next;
- if (!lru_p->windows && lru_p->pack_fd != keep_fd) {
+ if (!lru_p->windows && lru_p->pack_fd != -1
+ && lru_p->pack_fd != keep_fd) {
close(lru_p->pack_fd);
pack_open_fds--;
lru_p->pack_fd = -1;
@@ -813,14 +814,13 @@ unsigned char *use_pack(struct packed_git *p,
{
struct pack_window *win = *w_cursor;
- if (p->pack_fd == -1 && open_packed_git(p))
- die("packfile %s cannot be accessed", p->pack_name);
-
/* Since packfiles end in a hash of their content and it's
* pointless to ask for an offset into the middle of that
* hash, and the in_window function above wouldn't match
* don't allow an offset too close to the end of the file.
*/
+ if (!p->pack_size && p->pack_fd == -1 && open_packed_git(p))
+ die("packfile %s cannot be accessed", p->pack_name);
if (offset > (p->pack_size - 20))
die("offset beyond end of packfile (truncated pack?)");
@@ -834,6 +834,10 @@ unsigned char *use_pack(struct packed_git *p,
if (!win) {
size_t window_align = packed_git_window_size / 2;
off_t len;
+
+ if (p->pack_fd == -1 && open_packed_git(p))
+ die("packfile %s cannot be accessed", p->pack_name);
+
win = xcalloc(1, sizeof(*win));
win->offset = (offset / window_align) * window_align;
len = p->pack_size - win->offset;
@@ -851,6 +855,12 @@ unsigned char *use_pack(struct packed_git *p,
die("packfile %s cannot be mapped: %s",
p->pack_name,
strerror(errno));
+ if (!win->offset && win->len == p->pack_size
+ && !p->do_not_close) {
+ close(p->pack_fd);
+ pack_open_fds--;
+ p->pack_fd = -1;
+ }
pack_mmap_calls++;
pack_open_windows++;
if (pack_mapped > peak_pack_mapped)
@@ -1951,6 +1961,27 @@ off_t find_pack_entry_one(const unsigned char *sha1,
return 0;
}
+static int is_pack_valid(struct packed_git *p)
+{
+ /* An already open pack is known to be valid. */
+ if (p->pack_fd != -1)
+ return 1;
+
+ /* If the pack has one window completely covering the
+ * file size, the pack is known to be valid even if
+ * the descriptor is not currently open.
+ */
+ if (p->windows) {
+ struct pack_window *w = p->windows;
+
+ if (!w->offset && w->len == p->pack_size)
+ return 1;
+ }
+
+ /* Force the pack to open to prove its valid. */
+ return !open_packed_git(p);
+}
+
static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
{
static struct packed_git *last_found = (void *)1;
@@ -1980,7 +2011,7 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
* it may have been deleted since the index
* was loaded!
*/
- if (p->pack_fd == -1 && open_packed_git(p)) {
+ if (!is_pack_valid(p)) {
error("packfile %s cannot be accessed", p->pack_name);
goto next;
}
--
1.7.4.1.249.g4aa7
^ permalink raw reply related [flat|nested] 10+ messages in thread
end of thread, other threads:[~2011-03-02 18:02 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-28 20:27 [PATCH] Limit file descriptors used by packs Shawn O. Pearce
2011-02-28 20:35 ` Bernhard R. Link
2011-02-28 20:44 ` Shawn O. Pearce
2011-02-28 20:38 ` Junio C Hamano
2011-02-28 20:47 ` Shawn O. Pearce
2011-02-28 20:52 ` [PATCH v2] " Shawn O. Pearce
2011-02-28 21:13 ` Erik Faye-Lund
2011-03-01 14:24 ` [PATCH] " Junio C Hamano
2011-03-01 14:58 ` Shawn Pearce
2011-03-02 18:01 ` [PATCH 2/1] sha1_file.c: Don't retain open fds on small packs Shawn O. Pearce
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).