[PATCH v3] http: Add Accept-Language header if possible

git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v3] http: Add Accept-Language header if possible
@ 2014-07-11 16:52 Yi EungJun
  2014-07-11 17:35 ` Jeff King
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Yi EungJun @ 2014-07-11 16:52 UTC (permalink / raw)
  To: git; +Cc: Yi EungJun, Jeff King, Eric Sunshine, Peter Krefting

Add an Accept-Language header which indicates the user's preferred
languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.

Examples:
  LANGUAGE= -> ""
  LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
  LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
  LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"

This gives git servers a chance to display remote error messages in
the user's preferred language.

Signed-off-by: Yi EungJun <eungjun.yi@navercorp.com>
---
 http.c                     | 125 +++++++++++++++++++++++++++++++++++++++++++++
 remote-curl.c              |   2 +
 t/t5550-http-fetch-dumb.sh |  21 ++++++++
 3 files changed, 148 insertions(+)

diff --git a/http.c b/http.c
index 3a28b21..a20f3e2 100644
--- a/http.c
+++ b/http.c
@@ -983,6 +983,129 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
 		strbuf_addstr(charset, "ISO-8859-1");
 }
 
+/*
+ * Guess the user's preferred languages from the value in LANGUAGE environment
+ * variable and LC_MESSAGES locale category.
+ *
+ * The result can be a colon-separated list like "ko:ja:en".
+ */
+static const char* get_preferred_languages() {
+    const char* retval;
+
+	retval = getenv("LANGUAGE");
+	if (retval != NULL && retval[0] != '\0')
+		return retval;
+
+	retval = setlocale(LC_MESSAGES, NULL);
+	if (retval != NULL && retval[0] != '\0'
+		&& strcmp(retval, "C") != 0
+		&& strcmp(retval, "POSIX") != 0)
+		return retval;
+
+	return NULL;
+}
+
+/*
+ * Add an Accept-Language header which indicates user's preferred languages.
+ *
+ * Examples:
+ *   LANGUAGE= -> ""
+ *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
+ *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
+ *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
+ *   LANGUAGE= LANG=C -> ""
+ */
+static struct curl_slist* add_accept_language(struct curl_slist *headers)
+{
+	const char *p1, *p2, *p3;
+	struct strbuf buf = STRBUF_INIT;
+	float q = 1.0;
+	float q_precision = 0.1;
+	int num_langs = 1;
+	char* q_format = "; q=%.1f";
+
+	p1 = get_preferred_languages();
+
+	/* Don't add Accept-Language header if no language is preferred. */
+	if (p1 == NULL || p1[0] == '\0') {
+		strbuf_release(&buf);
+		return headers;
+	}
+
+	/* Count number of preferred languages to decide precision of q-factor */
+	for (p3 = p1; *p3 != '\0'; p3++) {
+		if (*p3 == ':') {
+			num_langs++;
+		}
+	}
+
+	/* Decide the precision for q-factor on number of preferred languages. */
+	if (num_langs + 1 > 100) { /* +1 is for '*' */
+		q_precision = 0.001;
+		q_format = "; q=%.3f";
+	} else if (num_langs + 1 > 10) { /* +1 is for '*' */
+		q_precision = 0.01;
+		q_format = "; q=%.2f";
+	}
+
+	strbuf_addstr(&buf, "Accept-Language: ");
+
+	for (p2 = p1; q > q_precision; p2++) {
+		if ((*p2 == ':' || *p2 == '\0') && p1 != p2) {
+			if (q < 1.0) {
+				strbuf_addstr(&buf, ", ");
+			}
+
+			for (p3 = p1; p3 < p2; p3++) {
+				/* Replace '_' with '-'. */
+				if (*p3 == '_') {
+					strbuf_add(&buf, p1, p3 - p1);
+					strbuf_addstr(&buf, "-");
+					p1 = p3 + 1;
+				}
+
+				/* Chop off anything after '.' or '@'. */
+				if ((*p3 == '.' || *p3 == '@')) {
+					break;
+				}
+			}
+
+			if (p3 > p1) {
+				strbuf_add(&buf, p1, p3 - p1);
+			}
+
+			/* Put the q factor if only it is less than 1.0. */
+			if (q < 1.0) {
+				strbuf_addf(&buf, q_format, q);
+			}
+
+			q -= q_precision;
+			p1 = p2 + 1;
+
+			if (*p2 == '\0') {
+				break;
+			}
+		}
+	}
+
+	/* Don't add Accept-Language header if no language is preferred. */
+	if (q >= 1.0) {
+		strbuf_release(&buf);
+		return headers;
+	}
+
+	/* Add '*' with minimum q-factor greater than 0.0. */
+	strbuf_addstr(&buf, ", *");
+	strbuf_addf(&buf, q_format, q_precision);
+
+	headers = curl_slist_append(headers, buf.buf);
+
+	strbuf_release(&buf);
+
+	return headers;
+}
+
 /* http_request() targets */
 #define HTTP_REQUEST_STRBUF	0
 #define HTTP_REQUEST_FILE	1
@@ -1020,6 +1143,8 @@ static int http_request(const char *url,
 					 fwrite_buffer);
 	}
 
+	headers = add_accept_language(headers);
+
 	strbuf_addstr(&buf, "Pragma:");
 	if (options && options->no_cache)
 		strbuf_addstr(&buf, " no-cache");
diff --git a/remote-curl.c b/remote-curl.c
index 4493b38..07f2a5d 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -946,6 +946,8 @@ int main(int argc, const char **argv)
 	struct strbuf buf = STRBUF_INIT;
 	int nongit;
 
+	git_setup_gettext();
+
 	git_extract_argv0_path(argv[0]);
 	setup_git_directory_gently(&nongit);
 	if (argc < 2) {
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index ac71418..2cc41d2 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -196,5 +196,26 @@ test_expect_success 'reencoding is robust to whitespace oddities' '
 	grep "this is the error message" stderr
 '
 
+test_expect_success 'git client sends Accept-Language based on LANGUAGE, LC_ALL, LC_MESSAGES and LANG' '
+	test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE=ko_KR.UTF-8 LC_ALL=de_DE.UTF-8 LC_MESSAGES=ja_JP.UTF-8 LANG=en_US.UTF-8 git clone "$HTTPD_URL/accept/language" 2>stderr &&
+	grep "^Accept-Language: ko-KR, \\*; q=0.1" stderr &&
+	test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE= LC_ALL=de_DE.UTF-8 LC_MESSAGES=ja_JP.UTF-8 LANG=en_US.UTF-8 git clone "$HTTPD_URL/accept/language" 2>stderr &&
+	grep "^Accept-Language: de-DE, \\*; q=0.1" stderr &&
+	test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE= LC_ALL= LC_MESSAGES=ja_JP.UTF-8 LANG=en_US.UTF-8 git clone "$HTTPD_URL/accept/language" 2>stderr &&
+	grep "^Accept-Language: ja-JP, \\*; q=0.1" stderr &&
+	test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE= LC_ALL= LC_MESSAGES= LANG=en_US.UTF-8 git clone "$HTTPD_URL/accept/language" 2>stderr &&
+	grep "^Accept-Language: en-US, \\*; q=0.1" stderr
+'
+
+test_expect_success 'git client sends Accept-Language with many preferred languages' '
+	test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE=ko_KR.EUC-KR:en_US.UTF-8:fr_CA:de.UTF-8@euro:sr@latin:ja:zh:sv:pt:nb git clone "$HTTPD_URL/accept/language" 2>stderr &&
+	grep "^Accept-Language: ko-KR, en-US; q=0.99, fr-CA; q=0.98, de; q=0.97, sr; q=0.96, ja; q=0.95, zh; q=0.94, sv; q=0.93, pt; q=0.92, nb; q=0.91, \\*; q=0.01" stderr
+'
+
+test_expect_success 'git client does not send Accept-Language' '
+	test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE= git clone "$HTTPD_URL/accept/language" 2>stderr &&
+	! grep "^Accept-Language:" stderr
+'
+
 stop_httpd
 test_done
-- 
2.0.1.473.g4b69a73.dirty

The only difference from the patch v2 is \-escape in the tests. Thanks to Eric Sunshine.

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] http: Add Accept-Language header if possible
  2014-07-11 16:52 [PATCH v3] http: Add Accept-Language header if possible Yi EungJun
@ 2014-07-11 17:35 ` Jeff King
  2014-07-11 22:29   ` Eric Sunshine
  2014-07-12 18:52   ` Yi, EungJun
  2014-07-12 10:16 ` Peter Krefting
  2014-07-13  4:26 ` Eric Sunshine
  2 siblings, 2 replies; 9+ messages in thread
From: Jeff King @ 2014-07-11 17:35 UTC (permalink / raw)
  To: Yi EungJun; +Cc: git, Yi EungJun, Eric Sunshine, Peter Krefting

On Sat, Jul 12, 2014 at 01:52:53AM +0900, Yi EungJun wrote:

> Add an Accept-Language header which indicates the user's preferred
> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
> 
> Examples:
>   LANGUAGE= -> ""
>   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
> 
> This gives git servers a chance to display remote error messages in
> the user's preferred language.

Thanks, this is looking much nicer. Most of my comments are on style:

> +static const char* get_preferred_languages() {
> +    const char* retval;

A few style nits:

  1. We usually put a function's opening brace on a new line.

  2. We usually put the asterisk in a pointer declaration with the
     variable name ("const char *retval"). This one appears elsewhere in
     the patch.

  3. This line seems to be indented with spaces instead of tabs.

> +	retval = getenv("LANGUAGE");
> +	if (retval != NULL && retval[0] != '\0')
> +		return retval;
> +
> +	retval = setlocale(LC_MESSAGES, NULL);
> +	if (retval != NULL && retval[0] != '\0'
> +		&& strcmp(retval, "C") != 0
> +		&& strcmp(retval, "POSIX") != 0)
> +		return retval;

More style nits: we usually avoid writing out explicit comparisons with
NULL (and often with zero). So I'd write this as:

  if (retval && *retval &&
      strcmp(retval, "C") &&
      strcmp(retval, "POSIX"))

but I think the NULL one is the only one we usually enforce explicitly.

> +	const char *p1, *p2, *p3;

I had trouble following the logic with these variable names. Is it
possible to give them more descriptive names?

> +	/* Don't add Accept-Language header if no language is preferred. */
> +	if (p1 == NULL || p1[0] == '\0') {
> +		strbuf_release(&buf);
> +		return headers;
> +	}

No need to strbuf_release a buffer that hasn't been used (assigning
STRBUF_INIT does not count as use).

> +	/* Count number of preferred languages to decide precision of q-factor */
> +	for (p3 = p1; *p3 != '\0'; p3++) {
> +		if (*p3 == ':') {
> +			num_langs++;
> +		}
> +	}

Style: we usually omit braces for one-liners. So:

  for (p3 = p1; *p3; p3++)
	if (*p3 == ':')
		num_langs++;

(and elsewhere).

> +	/* Decide the precision for q-factor on number of preferred languages. */
> +	if (num_langs + 1 > 100) { /* +1 is for '*' */
> +		q_precision = 0.001;
> +		q_format = "; q=%.3f";
> +	} else if (num_langs + 1 > 10) { /* +1 is for '*' */
> +		q_precision = 0.01;
> +		q_format = "; q=%.2f";
> +	}

I don't mind this auto-precision too much, but I'm not sure it buys us
anything. We are still setting a hard-limit at 100, and it just means we
write "0.1" instead of "0.001" sometimes.

> +	headers = curl_slist_append(headers, buf.buf);
> +
> +	strbuf_release(&buf);

Do all versions of curl make a copy of buf.buf here?

I seem to recall that older versions want pointers passed to
curl_easy_setopt to stay around for the duration of the request (I do
not know about curl_slist, though).

> @@ -1020,6 +1143,8 @@ static int http_request(const char *url,
>  					 fwrite_buffer);
>  	}
>  
> +	headers = add_accept_language(headers);

This means we do the whole parsing routine for each request we make (for
dumb-http, this can be quite a lot, though I suppose the parsing is not
especially expensive). Should we perhaps just cache the result in a
static strbuf? That would also make the pointer-lifetime issue above go
away.

> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -946,6 +946,8 @@ int main(int argc, const char **argv)
>  	struct strbuf buf = STRBUF_INIT;
>  	int nongit;
>  
> +	git_setup_gettext();

Oops. We probably should have been doing this all along to localize the
messages on our side.

> +test_expect_success 'git client sends Accept-Language based on LANGUAGE, LC_ALL, LC_MESSAGES and LANG' '
> +	test_must_fail env GIT_CURL_VERBOSE=1 LANGUAGE=ko_KR.UTF-8 LC_ALL=de_DE.UTF-8 LC_MESSAGES=ja_JP.UTF-8 LANG=en_US.UTF-8 git clone "$HTTPD_URL/accept/language" 2>stderr &&

We usually try to avoid long lines (you can break them up with "\").

> +	grep "^Accept-Language: ko-KR, \\*; q=0.1" stderr &&

I see you noticed the extra level of quoting necessary between v2 and
v3. :)

I wonder if these test cases might be more readable with a helper
function like:

  check_language () {
	echo "Accept-Language: $1" >expect &&
	test_must_fail env \
		GIT_CURL_VERBOSE=1 \
		LANGUAGE=$2 \
		LC_ALL=$3 \
		LC_MESSAGES=$4 \
		LANG=$5 \
		git clone "$HTTPD_URL/accept/language" 2>stderr &&
	grep -i ^Accept-Language: stderr >actual &&
	test_cmp expect actual
  }

which lets you write:

  check_language "de-DE, *; q=0.1" "" de_DE.UTF-8 ja_JP.UTF-8 en_US.UTF-8

and so on. I dunno if that is more readable or not.

-Peff

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] http: Add Accept-Language header if possible
  2014-07-11 17:35 ` Jeff King
@ 2014-07-11 22:29   ` Eric Sunshine
  2014-07-12 18:52   ` Yi, EungJun
  1 sibling, 0 replies; 9+ messages in thread
From: Eric Sunshine @ 2014-07-11 22:29 UTC (permalink / raw)
  To: Jeff King; +Cc: Yi EungJun, Git List, Yi EungJun, Peter Krefting

On Fri, Jul 11, 2014 at 1:35 PM, Jeff King <peff@peff.net> wrote:
> On Sat, Jul 12, 2014 at 01:52:53AM +0900, Yi EungJun wrote:
>> Add an Accept-Language header which indicates the user's preferred
>> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
>>
>> Examples:
>>   LANGUAGE= -> ""
>>   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
>>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
>>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
>>
>> This gives git servers a chance to display remote error messages in
>> the user's preferred language.
>
> Thanks, this is looking much nicer. Most of my comments are on style:
>
>> +static const char* get_preferred_languages() {
>> +    const char* retval;
>
> A few style nits:

Also, this is C, not C++, so don't forget void:

    static const char *get_preferred_languages(void)
    {

>   1. We usually put a function's opening brace on a new line.
>
>   2. We usually put the asterisk in a pointer declaration with the
>      variable name ("const char *retval"). This one appears elsewhere in
>      the patch.
>
>   3. This line seems to be indented with spaces instead of tabs.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] http: Add Accept-Language header if possible
  2014-07-11 16:52 [PATCH v3] http: Add Accept-Language header if possible Yi EungJun
  2014-07-11 17:35 ` Jeff King
@ 2014-07-12 10:16 ` Peter Krefting
  2014-07-13  4:26 ` Eric Sunshine
  2 siblings, 0 replies; 9+ messages in thread
From: Peter Krefting @ 2014-07-12 10:16 UTC (permalink / raw)
  To: Yi EungJun; +Cc: Git Mailing List, Jeff King, Eric Sunshine

Yi EungJun:

> Add an Accept-Language header which indicates the user's preferred
> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.

This one seems to fix all the issues I had with the first patch, 
thanks!

-- 
\\// Peter - http://www.softwolves.pp.se/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] http: Add Accept-Language header if possible
  2014-07-11 17:35 ` Jeff King
  2014-07-11 22:29   ` Eric Sunshine
@ 2014-07-12 18:52   ` Yi, EungJun
  1 sibling, 0 replies; 9+ messages in thread
From: Yi, EungJun @ 2014-07-12 18:52 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Yi EungJun, Eric Sunshine, Peter Krefting

Thanks for your detailed review and nice suggestions. I will accept
most of them.

2014-07-12 2:35 GMT+09:00 Jeff King <peff@peff.net>:
>> +     /* Decide the precision for q-factor on number of preferred languages. */
>> +     if (num_langs + 1 > 100) { /* +1 is for '*' */
>> +             q_precision = 0.001;
>> +             q_format = "; q=%.3f";
>> +     } else if (num_langs + 1 > 10) { /* +1 is for '*' */
>> +             q_precision = 0.01;
>> +             q_format = "; q=%.2f";
>> +     }
>
> I don't mind this auto-precision too much, but I'm not sure it buys us
> anything. We are still setting a hard-limit at 100, and it just means we
> write "0.1" instead of "0.001" sometimes.

It means we use "0.1" if possible.

>From my observation, many major web browsers doesn't or didn't send
q-factor of 2 or 3 decimal places. Google chrome doesn't currently and
Mozilla firefox also didn't before 2012 [1]. I think it means some old
and naive web servers may not support q-factor of 2 or 3 decimal
places because major web browsers don't send it. So I think we should
use "0.1" if possible for interoperability with the buggy servers.

But, quite frankly, it is just a possibility and I have no evidence
which proves that such kind of buggy servers really exist. Please let
me know if anybody know about it.

[1]: http://hg.mozilla.org/integration/mozilla-inbound/rev/b0b07ef904ea

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] http: Add Accept-Language header if possible
  2014-07-11 16:52 [PATCH v3] http: Add Accept-Language header if possible Yi EungJun
  2014-07-11 17:35 ` Jeff King
  2014-07-12 10:16 ` Peter Krefting
@ 2014-07-13  4:26 ` Eric Sunshine
  2014-07-13  9:51   ` Yi, EungJun
  2 siblings, 1 reply; 9+ messages in thread
From: Eric Sunshine @ 2014-07-13  4:26 UTC (permalink / raw)
  To: Yi EungJun; +Cc: Git List, Yi EungJun, Jeff King, Peter Krefting

On Fri, Jul 11, 2014 at 12:52 PM, Yi EungJun <semtlenori@gmail.com> wrote:
> Add an Accept-Language header which indicates the user's preferred
> languages defined by $LANGUAGE, $LC_ALL, $LC_MESSAGES and $LANG.
>
> Examples:
>   LANGUAGE= -> ""
>   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
>   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
>   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
>
> This gives git servers a chance to display remote error messages in
> the user's preferred language.
>
> Signed-off-by: Yi EungJun <eungjun.yi@navercorp.com>
> ---
> diff --git a/http.c b/http.c
> index 3a28b21..a20f3e2 100644
> --- a/http.c
> +++ b/http.c
> @@ -983,6 +983,129 @@ static void extract_content_type(struct strbuf *raw, struct strbuf *type,
>                 strbuf_addstr(charset, "ISO-8859-1");
>  }
>
> +/*
> + * Guess the user's preferred languages from the value in LANGUAGE environment
> + * variable and LC_MESSAGES locale category.
> + *
> + * The result can be a colon-separated list like "ko:ja:en".
> + */
> +static const char* get_preferred_languages() {
> +    const char* retval;
> +
> +       retval = getenv("LANGUAGE");
> +       if (retval != NULL && retval[0] != '\0')
> +               return retval;
> +
> +       retval = setlocale(LC_MESSAGES, NULL);
> +       if (retval != NULL && retval[0] != '\0'
> +               && strcmp(retval, "C") != 0
> +               && strcmp(retval, "POSIX") != 0)
> +               return retval;
> +
> +       return NULL;
> +}
> +
> +/*
> + * Add an Accept-Language header which indicates user's preferred languages.
> + *
> + * Examples:
> + *   LANGUAGE= -> ""
> + *   LANGUAGE=ko:en -> "Accept-Language: ko, en; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko_KR.UTF-8:sr@latin -> "Accept-Language: ko-KR, sr; q=0.9, *; q=0.1"
> + *   LANGUAGE=ko LANG=en_US.UTF-8 -> "Accept-Language: ko, *; q=0.1"
> + *   LANGUAGE= LANG=en_US.UTF-8 -> "Accept-Language: en-US, *; q=0.1"
> + *   LANGUAGE= LANG=C -> ""
> + */
> +static struct curl_slist* add_accept_language(struct curl_slist *headers)
> +{
> +       const char *p1, *p2, *p3;
> +       struct strbuf buf = STRBUF_INIT;
> +       float q = 1.0;
> +       float q_precision = 0.1;
> +       int num_langs = 1;
> +       char* q_format = "; q=%.1f";

This can be 'const char *'.

> +
> +       p1 = get_preferred_languages();
> +
> +       /* Don't add Accept-Language header if no language is preferred. */
> +       if (p1 == NULL || p1[0] == '\0') {
> +               strbuf_release(&buf);
> +               return headers;
> +       }
> +
> +       /* Count number of preferred languages to decide precision of q-factor */
> +       for (p3 = p1; *p3 != '\0'; p3++) {
> +               if (*p3 == ':') {
> +                       num_langs++;
> +               }
> +       }
> +
> +       /* Decide the precision for q-factor on number of preferred languages. */
> +       if (num_langs + 1 > 100) { /* +1 is for '*' */
> +               q_precision = 0.001;
> +               q_format = "; q=%.3f";
> +       } else if (num_langs + 1 > 10) { /* +1 is for '*' */
> +               q_precision = 0.01;
> +               q_format = "; q=%.2f";
> +       }

It might make sense to have a final 'else' here which sets these
variables for the 0.1 case so that the reader of the code doesn't have
to refer back to the top of the function to figure out what is going
on.

    } else {
        q_precision = 0.1;
        q_format = "; q=%.1f";
    }

Better yet, would it be possible to compute these values rather than
having to set them manually via a cascading if-chain?

> +
> +       strbuf_addstr(&buf, "Accept-Language: ");
> +
> +       for (p2 = p1; q > q_precision; p2++) {
> +               if ((*p2 == ':' || *p2 == '\0') && p1 != p2) {
> +                       if (q < 1.0) {
> +                               strbuf_addstr(&buf, ", ");
> +                       }
> +
> +                       for (p3 = p1; p3 < p2; p3++) {
> +                               /* Replace '_' with '-'. */
> +                               if (*p3 == '_') {
> +                                       strbuf_add(&buf, p1, p3 - p1);
> +                                       strbuf_addstr(&buf, "-");
> +                                       p1 = p3 + 1;
> +                               }
> +
> +                               /* Chop off anything after '.' or '@'. */
> +                               if ((*p3 == '.' || *p3 == '@')) {
> +                                       break;
> +                               }
> +                       }
> +
> +                       if (p3 > p1) {
> +                               strbuf_add(&buf, p1, p3 - p1);
> +                       }
> +
> +                       /* Put the q factor if only it is less than 1.0. */
> +                       if (q < 1.0) {
> +                               strbuf_addf(&buf, q_format, q);
> +                       }
> +
> +                       q -= q_precision;
> +                       p1 = p2 + 1;
> +
> +                       if (*p2 == '\0') {
> +                               break;
> +                       }
> +               }
> +       }
> +
> +       /* Don't add Accept-Language header if no language is preferred. */
> +       if (q >= 1.0) {
> +               strbuf_release(&buf);
> +               return headers;
> +       }
> +
> +       /* Add '*' with minimum q-factor greater than 0.0. */
> +       strbuf_addstr(&buf, ", *");
> +       strbuf_addf(&buf, q_format, q_precision);
> +
> +       headers = curl_slist_append(headers, buf.buf);
> +
> +       strbuf_release(&buf);
> +
> +       return headers;
> +}
> +

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] http: Add Accept-Language header if possible
  2014-07-13  4:26 ` Eric Sunshine
@ 2014-07-13  9:51   ` Yi, EungJun
  2014-07-13 16:57     ` Junio C Hamano
  0 siblings, 1 reply; 9+ messages in thread
From: Yi, EungJun @ 2014-07-13  9:51 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List, Yi EungJun, Jeff King, Peter Krefting

2014-07-13 13:26 GMT+09:00 Eric Sunshine <sunshine@sunshineco.com>:
>> +       /* Decide the precision for q-factor on number of preferred languages. */
>> +       if (num_langs + 1 > 100) { /* +1 is for '*' */
>> +               q_precision = 0.001;
>> +               q_format = "; q=%.3f";
>> +       } else if (num_langs + 1 > 10) { /* +1 is for '*' */
>> +               q_precision = 0.01;
>> +               q_format = "; q=%.2f";
>> +       }
>
> It might make sense to have a final 'else' here which sets these
> variables for the 0.1 case so that the reader of the code doesn't have
> to refer back to the top of the function to figure out what is going
> on.
>
>     } else {
>         q_precision = 0.1;
>         q_format = "; q=%.1f";
>     }
>
> Better yet, would it be possible to compute these values rather than
> having to set them manually via a cascading if-chain?

I think it is possible like this:

    num_langs += 1; /* for '*' */
    decimal_places = 1 + (num_langs > 10) + (num_langs > 100);
    snprintf(q_format, sizeof(q_format), "; q=%%.%df", decimal_places);
    for (q_precision = 1.0; decimal_places-- > 0;) q_precision /= 10;

Does this one look better than before? I'm not sure which one is better.

ps. The last line can be simpler by using pow() but I'm not sure it is
okay to include math.h.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] http: Add Accept-Language header if possible
  2014-07-13  9:51   ` Yi, EungJun
@ 2014-07-13 16:57     ` Junio C Hamano
  2014-07-14  0:42       ` Yi, EungJun
  0 siblings, 1 reply; 9+ messages in thread
From: Junio C Hamano @ 2014-07-13 16:57 UTC (permalink / raw)
  To: semtlenori; +Cc: Eric Sunshine, Git List, Yi EungJun, Jeff King, Peter Krefting

"Yi, EungJun" <semtlenori@gmail.com> writes:

> I think it is possible like this:
>
>     num_langs += 1; /* for '*' */
>     decimal_places = 1 + (num_langs > 10) + (num_langs > 100);
>     snprintf(q_format, sizeof(q_format), "; q=%%.%df", decimal_places);
>     for (q_precision = 1.0; decimal_places-- > 0;) q_precision /= 10;
>
> Does this one look better than before? I'm not sure which one is better.
>
> ps. The last line can be simpler by using pow() but I'm not sure it is
> okay to include math.h.

If you do not want floating point (and I think we tend to avoid it
when we do not need it), you can realize that in your use of "0.1"
and "0.01" and "0.001" there is nothing fundamentally floating-point;
you can measure how many digits below the two-byte string zero-dot
you would want upfront (by counting num_langs), and show an integer
counter zero-padded to the left to that width.

That would avoid having to even worry about a possible funny case
where subtracting 0.01 ten times from 0.1 may not yield zero (or the
result of subtracting nine times may not reach 0.01) due to rounding
errors accumulating, which was the first thing that came to my mind
when I saw your loop.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] http: Add Accept-Language header if possible
  2014-07-13 16:57     ` Junio C Hamano
@ 2014-07-14  0:42       ` Yi, EungJun
  0 siblings, 0 replies; 9+ messages in thread
From: Yi, EungJun @ 2014-07-14  0:42 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Eric Sunshine, Git List, Yi EungJun, Jeff King, Peter Krefting

2014-07-14 1:57 GMT+09:00 Junio C Hamano <gitster@pobox.com>:
>
> If you do not want floating point (and I think we tend to avoid it
> when we do not need it), you can realize that in your use of "0.1"
> and "0.01" and "0.001" there is nothing fundamentally floating-point;
> you can measure how many digits below the two-byte string zero-dot
> you would want upfront (by counting num_langs), and show an integer
> counter zero-padded to the left to that width.
>
> That would avoid having to even worry about a possible funny case
> where subtracting 0.01 ten times from 0.1 may not yield zero (or the
> result of subtracting nine times may not reach 0.01) due to rounding
> errors accumulating, which was the first thing that came to my mind
> when I saw your loop.

You're right; We don't need floating point numbers. I'll try to fix it.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2014-07-14  0:42 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-07-11 16:52 [PATCH v3] http: Add Accept-Language header if possible Yi EungJun
2014-07-11 17:35 ` Jeff King
2014-07-11 22:29   ` Eric Sunshine
2014-07-12 18:52   ` Yi, EungJun
2014-07-12 10:16 ` Peter Krefting
2014-07-13  4:26 ` Eric Sunshine
2014-07-13  9:51   ` Yi, EungJun
2014-07-13 16:57     ` Junio C Hamano
2014-07-14  0:42       ` Yi, EungJun

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).