git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH, maint] setup: make sure git_dir path is in a permanent buffer, getenv(3) case
@ 2010-11-11 18:08 Kirill Smelkov
  2010-11-11 18:17 ` Jonathan Nieder
  0 siblings, 1 reply; 6+ messages in thread
From: Kirill Smelkov @ 2010-11-11 18:08 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, msysgit, Kirill Smelkov, Jonathan Nieder

getenv(3) returns not-permanent buffer which may be changed by e.g.
putenv(3) call (*).

In practice I've noticed this when trying to do `git commit -m abc`
inside msysgit under wine, getting

    $ git commit -m abc
    fatal: could not open 'DIR=.git/COMMIT_EDITMSG': No such file or directory
                           ^^^^
    (notice introduced 'DIR=' artifact.)

The problem was showing itself only with -m option, and actually, as
debugging showed, originally

    git_dir = getenv("GIT_DIR")

returned pointer to

        "GIT_DIR=.git\0"
                 ^
               git_dir

, we stored it in git_dir, than, after processing -m git-commit option,
we did setenv("GIT_EDITOR", ":") which as (*) says changed environment
variables memory layout - something like this

       "...\0GIT_DIR=.git\0"
                 ^
               git_dir

and oops - we got wrong git_dir.

Avoid that by strdupping getenv("GIT_DIR") result like we did in 06f354
(setup: make sure git dir path is in a permanent buffer). Unfortunately
this also shows that other getenv usage inside git needs auditing...

(*) from man 3 getenv:

       The implementation of getenv() is not required to  be  reentrant.   The
       string  pointed  to  by  the return value of getenv() may be statically
       allocated, and can be  modified  by  a  subsequent  call  to  getenv(),
       putenv(3), setenv(3), or unsetenv(3).

Cc: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Kirill Smelkov <kirr@mns.spb.ru>
---
 environment.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/environment.c b/environment.c
index eaf908b..d5021e8 100644
--- a/environment.c
+++ b/environment.c
@@ -88,6 +88,7 @@ const char * const local_repo_env[LOCAL_REPO_ENV_SIZE + 1] = {
 static void setup_git_env(void)
 {
 	git_dir = getenv(GIT_DIR_ENVIRONMENT);
+	git_dir = git_dir ? xstrdup(git_dir) : NULL;
 	if (!git_dir) {
 		git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
 		git_dir = git_dir ? xstrdup(git_dir) : NULL;
-- 
1.7.3.2.161.g3089c

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH, maint] setup: make sure git_dir path is in a permanent buffer, getenv(3) case
  2010-11-11 18:08 [PATCH, maint] setup: make sure git_dir path is in a permanent buffer, getenv(3) case Kirill Smelkov
@ 2010-11-11 18:17 ` Jonathan Nieder
  2010-11-12 14:03   ` Kirill Smelkov
  0 siblings, 1 reply; 6+ messages in thread
From: Jonathan Nieder @ 2010-11-11 18:17 UTC (permalink / raw)
  To: Kirill Smelkov; +Cc: Junio C Hamano, git, msysgit

Kirill Smelkov wrote:

> getenv(3) returns not-permanent buffer which may be changed by e.g.
> putenv(3) call (*).

Yikes.  Thanks for the example.

> --- a/environment.c
> +++ b/environment.c
> @@ -88,6 +88,7 @@ const char * const local_repo_env[LOCAL_REPO_ENV_SIZE + 1] = {
>  static void setup_git_env(void)
>  {
>  	git_dir = getenv(GIT_DIR_ENVIRONMENT);
> +	git_dir = git_dir ? xstrdup(git_dir) : NULL;
>  	if (!git_dir) {
>  		git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
>  		git_dir = git_dir ? xstrdup(git_dir) : NULL;

Maybe we can avoid (some) repetition like this?

diff --git a/environment.c b/environment.c
index de5581f..942f1e4 100644
--- a/environment.c
+++ b/environment.c
@@ -87,25 +87,31 @@ const char * const local_repo_env[LOCAL_REPO_ENV_SIZE + 1] = {
 static void setup_git_env(void)
 {
 	git_dir = getenv(GIT_DIR_ENVIRONMENT);
-	if (!git_dir) {
-		git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
-		git_dir = git_dir ? xstrdup(git_dir) : NULL;
-	}
 	if (!git_dir)
+		git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
+	if (git_dir)
+		git_dir = xstrdup(git_dir);
+	else
 		git_dir = DEFAULT_GIT_DIR_ENVIRONMENT;
+
 	git_object_dir = getenv(DB_ENVIRONMENT);
-	if (!git_object_dir) {
-		git_object_dir = xmalloc(strlen(git_dir) + 9);
-		sprintf(git_object_dir, "%s/objects", git_dir);
-	}
+	if (git_object_dir)
+		git_object_dir = xstrdup(git_object_dir);
+	else
+		git_object_dir = git_pathdup("objects");
+
 	git_index_file = getenv(INDEX_ENVIRONMENT);
-	if (!git_index_file) {
-		git_index_file = xmalloc(strlen(git_dir) + 7);
-		sprintf(git_index_file, "%s/index", git_dir);
-	}
+	if (git_index_file)
+		git_index_file = xstrdup(git_index_file);
+	else
+		git_index_file = git_pathdup("index");
+
 	git_graft_file = getenv(GRAFT_ENVIRONMENT);
-	if (!git_graft_file)
+	if (git_graft_file)
+		git_graft_file = xstrdup(git_graft_file);
+	else
 		git_graft_file = git_pathdup("info/grafts");
+
 	if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT))
 		read_replace_refs = 0;
 }

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH, maint] setup: make sure git_dir path is in a permanent buffer, getenv(3) case
  2010-11-11 18:17 ` Jonathan Nieder
@ 2010-11-12 14:03   ` Kirill Smelkov
  2010-11-12 16:03     ` Jonathan Nieder
  0 siblings, 1 reply; 6+ messages in thread
From: Kirill Smelkov @ 2010-11-12 14:03 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Junio C Hamano, git, msysgit

On Thu, Nov 11, 2010 at 12:17:28PM -0600, Jonathan Nieder wrote:
> Kirill Smelkov wrote:
> 
> > getenv(3) returns not-permanent buffer which may be changed by e.g.
> > putenv(3) call (*).
> 
> Yikes.  Thanks for the example.

Nevermind. However it was not so fun to debug :)

> > --- a/environment.c
> > +++ b/environment.c
> > @@ -88,6 +88,7 @@ const char * const local_repo_env[LOCAL_REPO_ENV_SIZE + 1] = {
> >  static void setup_git_env(void)
> >  {
> >  	git_dir = getenv(GIT_DIR_ENVIRONMENT);
> > +	git_dir = git_dir ? xstrdup(git_dir) : NULL;
> >  	if (!git_dir) {
> >  		git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
> >  		git_dir = git_dir ? xstrdup(git_dir) : NULL;
> 
> Maybe we can avoid (some) repetition like this?
> 
> diff --git a/environment.c b/environment.c
> index de5581f..942f1e4 100644
> --- a/environment.c
> +++ b/environment.c
> @@ -87,25 +87,31 @@ const char * const local_repo_env[LOCAL_REPO_ENV_SIZE + 1] = {
>  static void setup_git_env(void)
>  {
>  	git_dir = getenv(GIT_DIR_ENVIRONMENT);
> -	if (!git_dir) {
> -		git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
> -		git_dir = git_dir ? xstrdup(git_dir) : NULL;
> -	}
>  	if (!git_dir)
> +		git_dir = read_gitfile_gently(DEFAULT_GIT_DIR_ENVIRONMENT);
> +	if (git_dir)
> +		git_dir = xstrdup(git_dir);
> +	else
>  		git_dir = DEFAULT_GIT_DIR_ENVIRONMENT;
> +
>  	git_object_dir = getenv(DB_ENVIRONMENT);
> -	if (!git_object_dir) {
> -		git_object_dir = xmalloc(strlen(git_dir) + 9);
> -		sprintf(git_object_dir, "%s/objects", git_dir);
> -	}
> +	if (git_object_dir)
> +		git_object_dir = xstrdup(git_object_dir);
> +	else
> +		git_object_dir = git_pathdup("objects");
> +
>  	git_index_file = getenv(INDEX_ENVIRONMENT);
> -	if (!git_index_file) {
> -		git_index_file = xmalloc(strlen(git_dir) + 7);
> -		sprintf(git_index_file, "%s/index", git_dir);
> -	}
> +	if (git_index_file)
> +		git_index_file = xstrdup(git_index_file);
> +	else
> +		git_index_file = git_pathdup("index");
> +
>  	git_graft_file = getenv(GRAFT_ENVIRONMENT);
> -	if (!git_graft_file)
> +	if (git_graft_file)
> +		git_graft_file = xstrdup(git_graft_file);
> +	else
>  		git_graft_file = git_pathdup("info/grafts");
> +

To me it gets hairy and we don't cover all and even most getenv cases.
Look e.g. in commit.c:

    static void determine_author_info(void)
    {
            char *name, *email, *date;
    
            name = getenv("GIT_AUTHOR_NAME");
            email = getenv("GIT_AUTHOR_EMAIL");
            date = getenv("GIT_AUTHOR_DATE");

            /* ... */

            if (signoff) {
                    struct strbuf sob = STRBUF_INIT;
                    int i;
    
                    strbuf_addstr(&sob, sign_off_header);
                    strbuf_addstr(&sob, fmt_name(getenv("GIT_COMMITTER_NAME"),
                                                 getenv("GIT_COMMITTER_EMAIL")));

            /* ... */

notes.c:

    struct notes_rewrite_cfg *init_copy_notes_for_rewrite(const char *cmd)
    {
            struct notes_rewrite_cfg *c = xmalloc(sizeof(struct notes_rewrite_cfg));
            const char *rewrite_mode_env = getenv(GIT_NOTES_REWRITE_MODE_ENVIRONMENT);
            const char *rewrite_refs_env = getenv(GIT_NOTES_REWRITE_REF_ENVIRONMENT);


editor.c:

    const char *git_editor(void)
    {
            const char *editor = getenv("GIT_EDITOR");
            const char *terminal = getenv("TERM");

http-backend.c:

    static void run_service(const char **argv)
    {
            const char *encoding = getenv("HTTP_CONTENT_ENCODING");
            const char *user = getenv("REMOTE_USER");
            const char *host = getenv("REMOTE_ADDR");


etc...


To me, it's very unfortunate that subsequent getenv() could overwrite
previous getenv() result, but according to `man 3 getenv` all these
places are buggy.

Maybe we'll need something like our own xgetenv() which will keep vars
in some kind of hash tab so that get/put on other vars do not interfere
with what was originally returned by xgetenv().

I don't know.

Unfortunately I can't afford myself to dive into all this, so please
choose what you like more.


Thanks,
Kirill

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, maint] setup: make sure git_dir path is in a permanent buffer, getenv(3) case
  2010-11-12 14:03   ` Kirill Smelkov
@ 2010-11-12 16:03     ` Jonathan Nieder
  2010-11-12 17:20       ` Kirill Smelkov
  0 siblings, 1 reply; 6+ messages in thread
From: Jonathan Nieder @ 2010-11-12 16:03 UTC (permalink / raw)
  To: Kirill Smelkov; +Cc: Junio C Hamano, git, msysgit

Kirill Smelkov wrote:

>     static void run_service(const char **argv)
>     {
>             const char *encoding = getenv("HTTP_CONTENT_ENCODING");
>             const char *user = getenv("REMOTE_USER");
>             const char *host = getenv("REMOTE_ADDR");
> 
> 
> etc...
> 
> 
> To me, it's very unfortunate that subsequent getenv() could overwrite
> previous getenv() result, but according to `man 3 getenv` all these
> places are buggy.

Right, but do we know of any platforms that work that way currently?
We could make getenv() rotate between a few buffers on such platforms
(probably 10 or so would take care of the longest runs).

> Maybe we'll need something like our own xgetenv() which will keep vars
> in some kind of hash tab so that get/put on other vars do not interfere
> with what was originally returned by xgetenv().

For examples that store the result like you pointed out (which store the
result from getenv), something like that would be needed if we want
them to work on platforms where putenv shifts everything.

> Unfortunately I can't afford myself to dive into all this, so please
> choose what you like more.

I think we ought to fix this properly in the end.  But if you want a
quick workaround, maybe the vcs-svn/string_pool lib could help you.

Hope that helps,
Jonathan

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, maint] setup: make sure git_dir path is in a permanent buffer, getenv(3) case
  2010-11-12 16:03     ` Jonathan Nieder
@ 2010-11-12 17:20       ` Kirill Smelkov
  2010-11-12 18:59         ` [PATCH] tests: add GETENV_POISON option to simulate unfriendly getenv() Jonathan Nieder
  0 siblings, 1 reply; 6+ messages in thread
From: Kirill Smelkov @ 2010-11-12 17:20 UTC (permalink / raw)
  To: Jonathan Nieder; +Cc: Junio C Hamano, git, msysgit

On Fri, Nov 12, 2010 at 10:03:32AM -0600, Jonathan Nieder wrote:
> Kirill Smelkov wrote:
> 
> >     static void run_service(const char **argv)
> >     {
> >             const char *encoding = getenv("HTTP_CONTENT_ENCODING");
> >             const char *user = getenv("REMOTE_USER");
> >             const char *host = getenv("REMOTE_ADDR");
> > 
> > 
> > etc...
> > 
> > 
> > To me, it's very unfortunate that subsequent getenv() could overwrite
> > previous getenv() result, but according to `man 3 getenv` all these
> > places are buggy.
> 
> Right, but do we know of any platforms that work that way currently?

I don't. Actually I was really surprised after reading getenv manual
about that.

> We could make getenv() rotate between a few buffers on such platforms
> (probably 10 or so would take care of the longest runs).

I think it would be hard to get right (is 10 enough? on which platform?
this rarely happens after all...), and also why introduce special case?

> > Maybe we'll need something like our own xgetenv() which will keep vars
> > in some kind of hash tab so that get/put on other vars do not interfere
> > with what was originally returned by xgetenv().
> 
> For examples that store the result like you pointed out (which store the
> result from getenv), something like that would be needed if we want
> them to work on platforms where putenv shifts everything.
> 
> > Unfortunately I can't afford myself to dive into all this, so please
> > choose what you like more.
> 
> I think we ought to fix this properly in the end.  But if you want a
> quick workaround, maybe the vcs-svn/string_pool lib could help you.

No, I'm not in a hurry - better to fix this properly. Though personally,
I've already scratched my itch here.


Thanks,
Kirill

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] tests: add GETENV_POISON option to simulate unfriendly getenv()
  2010-11-12 17:20       ` Kirill Smelkov
@ 2010-11-12 18:59         ` Jonathan Nieder
  0 siblings, 0 replies; 6+ messages in thread
From: Jonathan Nieder @ 2010-11-12 18:59 UTC (permalink / raw)
  To: Kirill Smelkov; +Cc: Junio C Hamano, git, msysgit

Kirill Smelkov wrote:
> On Fri, Nov 12, 2010 at 10:03:32AM -0600, Jonathan Nieder wrote:

>> Right, but do we know of any platforms that work that way currently?
>
> I don't. Actually I was really surprised after reading getenv manual
> about that.

Here's an artificial one.  If someone shows up interested in cleaning
the getenv() usage, something like this could make it easier to
maintain the result.

Before then, it provides a chance to see how invasive the changes
would need to be to support such a theoretical unfriendly platform.
The results don't look so good.

> No, I'm not in a hurry - better to fix this properly. Though personally,
> I've already scratched my itch here.

Thanks for reporting.

-- 8< --
Subject: add GETENV_POISON option to simulate unfriendly getenv()

Traditionally, getenv() returns a pointer into the environment structure,
and on typical platforms the pointed-to value remains valid until that
environment variable gets a new value.

On some platforms (e.g., wine), unfortunately, it does not remain valid
even after an unrelated setenv() call.

The standard even allows getenv to return its result in a static buffer
(meaning it would not remain valid after another getenv() call).  So if
we want to be maximally portable, we should always copy the return
value from getenv() before fetching another value from the environment.

This patch adds a GETENV_POISON option to demonstrate how hard that
would be.  When GETENV_POISON is set, getenv is replaced with a wrapper
that clobbers its old return value after each call, in the hope that
broken callers might notice.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
 Makefile               |    4 ++++
 cache.h                |   20 --------------------
 compat/getenv-poison.c |   44 ++++++++++++++++++++++++++++++++++++++++++++
 git-compat-util.h      |   25 +++++++++++++++++++++++++
 4 files changed, 73 insertions(+), 20 deletions(-)
 create mode 100644 compat/getenv-poison.c

diff --git a/Makefile b/Makefile
index 1f1ce04..e16d10e 100644
--- a/Makefile
+++ b/Makefile
@@ -1443,6 +1443,10 @@ ifdef INTERNAL_QSORT
 	COMPAT_CFLAGS += -DINTERNAL_QSORT
 	COMPAT_OBJS += compat/qsort.o
 endif
+ifdef GETENV_POISON
+	COMPAT_CFLAGS += -DGETENV_POISON
+	COMPAT_OBJS += compat/getenv-poison.o
+endif
 ifdef RUNTIME_PREFIX
 	COMPAT_CFLAGS += -DRUNTIME_PREFIX
 endif
diff --git a/cache.h b/cache.h
index 33decd9..574dc8f 100644
--- a/cache.h
+++ b/cache.h
@@ -438,26 +438,6 @@ extern void verify_non_filename(const char *prefix, const char *name);
 
 extern int init_db(const char *template_dir, unsigned int flags);
 
-#define alloc_nr(x) (((x)+16)*3/2)
-
-/*
- * Realloc the buffer pointed at by variable 'x' so that it can hold
- * at least 'nr' entries; the number of entries currently allocated
- * is 'alloc', using the standard growing factor alloc_nr() macro.
- *
- * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
- */
-#define ALLOC_GROW(x, nr, alloc) \
-	do { \
-		if ((nr) > alloc) { \
-			if (alloc_nr(alloc) < (nr)) \
-				alloc = (nr); \
-			else \
-				alloc = alloc_nr(alloc); \
-			x = xrealloc((x), alloc * sizeof(*(x))); \
-		} \
-	} while (0)
-
 /* Initialize and use the cache information */
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const char **pathspec);
diff --git a/compat/getenv-poison.c b/compat/getenv-poison.c
new file mode 100644
index 0000000..a88ec85
--- /dev/null
+++ b/compat/getenv-poison.c
@@ -0,0 +1,44 @@
+/*
+ * getenv(3) says:
+ *	The implementation of getenv() is not required to be reentrant.
+ *	The string pointed to by the return value of getenv() may be
+ *	statically allocated, and can be modified by a subsequent call
+ *	to getenv(), putenv(3), setenv(3), or unsetenv(3).
+ *
+ * This file provides an unpleasant but conformant getenv()
+ * implementation, for tests.
+ */
+#include "../git-compat-util.h"
+#undef getenv
+
+static void poison_buffer(char *buf, size_t buflen)
+{
+	if (!buflen)
+		return;
+	memset(buf, '\xa5', buflen - 1);
+	buf[buflen - 1] = '\0';
+}
+
+static void fill_buffer(char **buf, size_t *alloc, const char *str)
+{
+	size_t len = strlen(str) + 1;
+	ALLOC_GROW(*buf, len, *alloc);
+	memcpy(*buf, str, len);
+}
+
+char *gitgetenv(const char *name)
+{
+	static char *envvar_array[2];
+	static size_t envvar_len[2];
+	static unsigned int index;
+	const char *value;
+
+	poison_buffer(envvar_array[index], envvar_len[index]);
+	index = (index + 1) % 2;
+
+	value = getenv(name);
+	if (!value)
+		return NULL;
+	fill_buffer(&envvar_array[index], &envvar_len[index], value);
+	return envvar_array[index];
+}
diff --git a/git-compat-util.h b/git-compat-util.h
index 2af8d3e..1f6a2ce 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -298,6 +298,11 @@ extern ssize_t read_in_full(int fd, void *buf, size_t count);
 extern int gitsetenv(const char *, const char *, int);
 #endif
 
+#ifdef GETENV_POISON
+#define getenv gitgetenv
+extern char *gitgetenv(const char *name);
+#endif
+
 #ifdef NO_MKDTEMP
 #define mkdtemp gitmkdtemp
 extern char *gitmkdtemp(char *);
@@ -421,6 +426,26 @@ static inline int has_extension(const char *filename, const char *ext)
 	return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
 }
 
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x', 'nr', or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+	do { \
+		if ((nr) > alloc) { \
+			if (alloc_nr(alloc) < (nr)) \
+				alloc = (nr); \
+			else \
+				alloc = alloc_nr(alloc); \
+			x = xrealloc((x), alloc * sizeof(*(x))); \
+		} \
+	} while (0)
+
 /* Sane ctype - no locale, and works with signed chars */
 #undef isascii
 #undef isspace
-- 
1.7.2.3.557.gab647.dirty

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-11-12 19:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-11-11 18:08 [PATCH, maint] setup: make sure git_dir path is in a permanent buffer, getenv(3) case Kirill Smelkov
2010-11-11 18:17 ` Jonathan Nieder
2010-11-12 14:03   ` Kirill Smelkov
2010-11-12 16:03     ` Jonathan Nieder
2010-11-12 17:20       ` Kirill Smelkov
2010-11-12 18:59         ` [PATCH] tests: add GETENV_POISON option to simulate unfriendly getenv() Jonathan Nieder

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).