git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Improve on 'approxidate'
@ 2009-08-22 22:10 Linus Torvalds
  2009-08-23  1:11 ` Further 'approxidate' improvements Linus Torvalds
  2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
  0 siblings, 2 replies; 6+ messages in thread
From: Linus Torvalds @ 2009-08-22 22:10 UTC (permalink / raw)
  To: Git Mailing List, Junio C Hamano


This is not a new failure mode - approxidate has always been kind of 
random in the input it accepts, but some of the randomness is more 
irritating than others.

For example:

	Jun 6, 5AM -> Mon Jun 22 05:00:00 2009
	5AM Jun 6 -> Sat Jun  6 05:00:00 2009

Whaa? The reason for the above is that approxidate squirrells away the '6' 
from "Jun 6" to see if it's going to be a relative number, and then 
forgets about it when it sees a new number (the '5' in '5AM'). So the odd 
"June 22" date is because today is July 22nd, and if it doesn't have 
another day of the month, it will just pick todays mday - having ignored 
the '6' entirely due to getting all excited about seeing a new number (5).

There are other oddnesses. This does not fix them all, but I think it 
makes for fewer _really_ perplexing cases. At least now we have

	Jun 6, 5AM -> Sat Jun  6 05:00:00 2009
	5AM, Jun 6 -> Sat Jun  6 05:00:00 2009

which makes me happier. I can still point to cases that don't work as 
well, but those are separate issues.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 date.c |   93 +++++++++++++++++++++++++++++++++++++++++++--------------------
 1 files changed, 63 insertions(+), 30 deletions(-)

diff --git a/date.c b/date.c
index 409a17d..51c6461 100644
--- a/date.c
+++ b/date.c
@@ -525,11 +525,8 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
 		}
 	}
 
-	if (num > 0 && num < 32) {
-		tm->tm_mday = num;
-	} else if (num > 0 && num < 13) {
+	if (num > 0 && num < 13 && tm->tm_mon < 0)
 		tm->tm_mon = num-1;
-	}
 
 	return n;
 }
@@ -657,42 +654,59 @@ void datestamp(char *buf, int bufsize)
 	date_string(now, offset, buf, bufsize);
 }
 
-static void update_tm(struct tm *tm, unsigned long sec)
+/*
+ * Relative time update (eg "2 days ago").  If we haven't set the time
+ * yet, we need to set it from current time.
+ */
+static unsigned long update_tm(struct tm *tm, struct tm *now, unsigned long sec)
 {
-	time_t n = mktime(tm) - sec;
+	time_t n;
+
+	if (tm->tm_mday < 0)
+		tm->tm_mday = now->tm_mday;
+	if (tm->tm_mon < 0)
+		tm->tm_mon = now->tm_mon;
+	if (tm->tm_year < 0) {
+		tm->tm_year = now->tm_year;
+		if (tm->tm_mon > now->tm_mon)
+			tm->tm_year--;
+	}
+
+	n = mktime(tm) - sec;
 	localtime_r(&n, tm);
+	return n;
 }
 
-static void date_yesterday(struct tm *tm, int *num)
+static void date_yesterday(struct tm *tm, struct tm *now, int *num)
 {
-	update_tm(tm, 24*60*60);
+	update_tm(tm, now, 24*60*60);
 }
 
-static void date_time(struct tm *tm, int hour)
+static void date_time(struct tm *tm, struct tm *now, int hour)
 {
 	if (tm->tm_hour < hour)
-		date_yesterday(tm, NULL);
+		date_yesterday(tm, now, NULL);
 	tm->tm_hour = hour;
 	tm->tm_min = 0;
 	tm->tm_sec = 0;
 }
 
-static void date_midnight(struct tm *tm, int *num)
+static void date_midnight(struct tm *tm, struct tm *now, int *num)
 {
-	date_time(tm, 0);
+	date_time(tm, now, 0);
 }
 
-static void date_noon(struct tm *tm, int *num)
+static void date_noon(struct tm *tm, struct tm *now, int *num)
 {
-	date_time(tm, 12);
+	date_time(tm, now, 12);
 }
 
-static void date_tea(struct tm *tm, int *num)
+static void date_tea(struct tm *tm, struct tm *now, int *num)
 {
-	date_time(tm, 17);
+	date_time(tm, now, 17);
 }
 
-static void date_pm(struct tm *tm, int *num)
+static void date_pm(struct tm *tm, struct tm *now, int *num)
 {
 	int hour, n = *num;
 	*num = 0;
@@ -706,7 +720,7 @@ static void date_pm(struct tm *tm, int *num)
 	tm->tm_hour = (hour % 12) + 12;
 }
 
-static void date_am(struct tm *tm, int *num)
+static void date_am(struct tm *tm, struct tm *now, int *num)
 {
 	int hour, n = *num;
 	*num = 0;
@@ -720,7 +734,7 @@ static void date_am(struct tm *tm, int *num)
 	tm->tm_hour = (hour % 12);
 }
 
-static void date_never(struct tm *tm, int *num)
+static void date_never(struct tm *tm, struct tm *now, int *num)
 {
 	time_t n = 0;
 	localtime_r(&n, tm);
@@ -728,7 +742,7 @@ static void date_never(struct tm *tm, int *num)
 
 static const struct special {
 	const char *name;
-	void (*fn)(struct tm *, int *);
+	void (*fn)(struct tm *, struct tm *, int *);
 } special[] = {
 	{ "yesterday", date_yesterday },
 	{ "noon", date_noon },
@@ -757,7 +771,7 @@ static const struct typelen {
 	{ NULL }
 };
 
-static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
+static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm *now, int *num)
 {
 	const struct typelen *tl;
 	const struct special *s;
@@ -778,7 +792,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
 	for (s = special; s->name; s++) {
 		int len = strlen(s->name);
 		if (match_string(date, s->name) == len) {
-			s->fn(tm, num);
+			s->fn(tm, now, num);
 			return end;
 		}
 	}
@@ -800,7 +814,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
 	while (tl->type) {
 		int len = strlen(tl->type);
 		if (match_string(date, tl->type) >= len-1) {
-			update_tm(tm, tl->length * *num);
+			update_tm(tm, now, tl->length * *num);
 			*num = 0;
 			return end;
 		}
@@ -818,7 +832,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
 				n++;
 			diff += 7*n;
 
-			update_tm(tm, diff * 24 * 60 * 60);
+			update_tm(tm, now, diff * 24 * 60 * 60);
 			return end;
 		}
 	}
@@ -866,6 +880,22 @@ static const char *approxidate_digit(const char *date, struct tm *tm, int *num)
 	return end;
 }
 
+/*
+ * Do we have a pending number at the end, or when
+ * we see a new one? Let's assume it's a month day,
+ * as in "Dec 6, 1992"
+ */
+static void pending_number(struct tm *tm, int *num)
+{
+	int number = *num;
+
+	if (number) {
+		*num = 0;
+		if (tm->tm_mday < 0 && number < 32)
+			tm->tm_mday = number;
+	}
+}
+
 unsigned long approxidate(const char *date)
 {
 	int number = 0;
@@ -881,21 +911,24 @@ unsigned long approxidate(const char *date)
 	time_sec = tv.tv_sec;
 	localtime_r(&time_sec, &tm);
 	now = tm;
+
+	tm.tm_year = -1;
+	tm.tm_mon = -1;
+	tm.tm_mday = -1;
+
 	for (;;) {
 		unsigned char c = *date;
 		if (!c)
 			break;
 		date++;
 		if (isdigit(c)) {
+			pending_number(&tm, &number);
 			date = approxidate_digit(date-1, &tm, &number);
 			continue;
 		}
 		if (isalpha(c))
-			date = approxidate_alpha(date-1, &tm, &number);
+			date = approxidate_alpha(date-1, &tm, &now, &number);
 	}
-	if (number > 0 && number < 32)
-		tm.tm_mday = number;
-	if (tm.tm_mon > now.tm_mon && tm.tm_year == now.tm_year)
-		tm.tm_year--;
-	return mktime(&tm);
+	pending_number(&tm, &number);
+	return update_tm(&tm, &now, 0);
 }

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Further 'approxidate' improvements
  2009-08-22 22:10 Improve on 'approxidate' Linus Torvalds
@ 2009-08-23  1:11 ` Linus Torvalds
  2009-08-23  2:08   ` Nicolas Pitre
  2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
  1 sibling, 1 reply; 6+ messages in thread
From: Linus Torvalds @ 2009-08-23  1:11 UTC (permalink / raw)
  To: Git Mailing List, Junio C Hamano


The previous patch to improve approxidate got us to the point that a lot 
of the remaining annoyances were due to the 'strict' date handling running 
first, and deciding that it got a good enough date that the approximate 
date routines were never even invoked.

For example, using a date string like

	6AM, June 7, 2009

the strict date logic would be perfectly happy with the "June 7, 2009" 
part, and ignore the 6AM part that it didn't understand - resulting in the 
information getting dropped on the floor:

	6AM, June 7, 2009 -> Sat Jun 6 00:00:00 2009

and the date being calculated as if it was midnight, and the '6AM' having 
confused the date routines into thinking about '6 June' rather than 'June 
7' at 6AM (ie notice how the _day_ was wrong due to this, not just the 
time).

So this makes the strict date routines a bit stricter, and requires that 
not just the date, but also the time, has actually been parsed. With that 
fix, and trivial extension of the approxidate routines, git now properly 
parses the date as

	6AM, June 7, 2009 -> Sun Jun  7 06:00:00 2009

without dropping the fuzzy time ("6AM" or "noon" or any of the other 
non-strict time formats) on the floor.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
On Sat, 22 Aug 2009, Linus Torvalds wrote:
> 
> There are other oddnesses. This does not fix them all, but I think it 
> makes for fewer _really_ perplexing cases. At least now we have
> 
> 	Jun 6, 5AM -> Sat Jun  6 05:00:00 2009
> 	5AM, Jun 6 -> Sat Jun  6 05:00:00 2009
> 
> which makes me happier. I can still point to cases that don't work as 
> well, but those are separate issues.

This gets rid of the remaining "obviously bogus" issues with parsing of 
fuzzy dates. I'm sure there are other issues still remaining, but now I 
can't come up with any trivial cases any more without having clear 
garbage in the string. 

So trying to date-parse nonsensical crud still gives odd results:

	I ate six hot-dogs in June -> Sat Jun  6 18:09:26 2009

because it parses "six" and "June" and then puts it together as a date, 
and then adds the current time (and year) and is happy.

But parsing random things amusingly is a _feature_. Misparsing something 
that makes sense as a date is a bug.

 date.c |   32 +++++++++++++++++++++++++++-----
 1 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/date.c b/date.c
index 51c6461..1de1845 100644
--- a/date.c
+++ b/date.c
@@ -24,6 +24,8 @@ time_t tm_to_time_t(const struct tm *tm)
 		return -1;
 	if (month < 2 || (year + 2) % 4)
 		day--;
+	if (tm->tm_hour < 0 || tm->tm_min < 0 || tm->tm_sec < 0)
+		return -1;
 	return (year * 365 + (year + 1) / 4 + mdays[month] + day) * 24*60*60UL +
 		tm->tm_hour * 60*60 + tm->tm_min * 60 + tm->tm_sec;
 }
@@ -425,13 +427,19 @@ static int match_multi_number(unsigned long num, char c, const char *date, char
 	return end - date;
 }
 
-/* Have we filled in any part of the time/date yet? */
+/*
+ * Have we filled in any part of the time/date yet?
+ * We just do a binary 'and' to see if the sign bit
+ * is set in all the values.
+ */
 static inline int nodate(struct tm *tm)
 {
-	return tm->tm_year < 0 &&
-		tm->tm_mon < 0 &&
-		tm->tm_mday < 0 &&
-		!(tm->tm_hour | tm->tm_min | tm->tm_sec);
+	return (tm->tm_year &
+		tm->tm_mon &
+		tm->tm_mday &
+		tm->tm_hour &
+		tm->tm_min &
+		tm->tm_sec) < 0;
 }
 
 /*
@@ -580,6 +588,9 @@ int parse_date(const char *date, char *result, int maxlen)
 	tm.tm_mon = -1;
 	tm.tm_mday = -1;
 	tm.tm_isdst = -1;
+	tm.tm_hour = -1;
+	tm.tm_min = -1;
+	tm.tm_sec = -1;
 	offset = -1;
 	tm_gmt = 0;
 
@@ -893,6 +904,17 @@ static void pending_number(struct tm *tm, int *num)
 		*num = 0;
 		if (tm->tm_mday < 0 && number < 32)
 			tm->tm_mday = number;
+		else if (tm->tm_mon < 0 && number < 13)
+			tm->tm_mon = number-1;
+		else if (tm->tm_year < 0) {
+			if (number > 1969 && number < 2100)
+				tm->tm_year = number - 1900;
+			else if (number > 69 && number < 100)
+				tm->tm_year = number;
+			else if (number < 38)
+				tm->tm_year = 100 + number;
+			/* We screw up for number = 00 ? */
+		}
 	}
 }
 

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: Further 'approxidate' improvements
  2009-08-23  1:11 ` Further 'approxidate' improvements Linus Torvalds
@ 2009-08-23  2:08   ` Nicolas Pitre
  0 siblings, 0 replies; 6+ messages in thread
From: Nicolas Pitre @ 2009-08-23  2:08 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List, Junio C Hamano

On Sat, 22 Aug 2009, Linus Torvalds wrote:

> So trying to date-parse nonsensical crud still gives odd results:
> 
> 	I ate six hot-dogs in June -> Sat Jun  6 18:09:26 2009
> 
> because it parses "six" and "June" and then puts it together as a date, 
> and then adds the current time (and year) and is happy.
> 
> But parsing random things amusingly is a _feature_. Misparsing something 
> that makes sense as a date is a bug.

Maybe that would be a good idea to write a test just for this, so known 
cases making sense aren't accidentally broken by eventual modifications 
to add more such cases.


Nicolas

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Improve on 'approxidate'
  2009-08-22 22:10 Improve on 'approxidate' Linus Torvalds
  2009-08-23  1:11 ` Further 'approxidate' improvements Linus Torvalds
@ 2009-08-30 22:35 ` Jeff King
  2009-08-31  1:58   ` Jeff King
  2009-09-01  3:27   ` Linus Torvalds
  1 sibling, 2 replies; 6+ messages in thread
From: Jeff King @ 2009-08-30 22:35 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List, Junio C Hamano

On Sat, Aug 22, 2009 at 03:10:07PM -0700, Linus Torvalds wrote:

>  unsigned long approxidate(const char *date)
>  {
>  	int number = 0;
> @@ -881,21 +911,24 @@ unsigned long approxidate(const char *date)
>  	time_sec = tv.tv_sec;
>  	localtime_r(&time_sec, &tm);
>  	now = tm;
> +
> +	tm.tm_year = -1;
> +	tm.tm_mon = -1;
> +	tm.tm_mday = -1;

This breaks relative dates like "3.months.ago", because
approxidate_alpha needs to see the "current" date in tm (and now it sees
-1, subtracts from it, and assumes we are just crossing a year boundary
because of the negative).  3.years.ago is also broken, but I don't think
3.days.ago is.

Probably we just need to pass "now" to approxidate_alpha, and it needs
to call update_tm under the case for "months" and "years" (and I haven't
quite figured out why those are not part of the "tl" list).
Unfortunately, I'm out of time to look at it more right now, but I'll
take a look tonight or tomorrow if you don't beat me to it.

-Peff

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Improve on 'approxidate'
  2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
@ 2009-08-31  1:58   ` Jeff King
  2009-09-01  3:27   ` Linus Torvalds
  1 sibling, 0 replies; 6+ messages in thread
From: Jeff King @ 2009-08-31  1:58 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Git Mailing List, Junio C Hamano

On Sun, Aug 30, 2009 at 06:35:58PM -0400, Jeff King wrote:

> > +	tm.tm_year = -1;
> > +	tm.tm_mon = -1;
> > +	tm.tm_mday = -1;
> 
> This breaks relative dates like "3.months.ago", because
> approxidate_alpha needs to see the "current" date in tm (and now it sees
> -1, subtracts from it, and assumes we are just crossing a year boundary
> because of the negative).  3.years.ago is also broken, but I don't think
> 3.days.ago is.
> 
> Probably we just need to pass "now" to approxidate_alpha, and it needs
> to call update_tm under the case for "months" and "years" (and I haven't
> quite figured out why those are not part of the "tl" list).
> Unfortunately, I'm out of time to look at it more right now, but I'll
> take a look tonight or tomorrow if you don't beat me to it.

OK, I looked at it.

The fix is pretty straightforward. We _do_ already pass "now" to
approxidate_alpha, and it looks like you already fixed the "typelen"
array case (which handles seconds, minutes, hours, days, and weeks) by
calling update_tm. But all of those units are convertible to seconds,
and months and years are not, which explains why they are handled
separately.

So I think we can just "cheat" and call update_tm to fill in the fields
from "now" as we would for the other units, and then tweak the "struct
tm" as we did before. I.e.,:

diff --git a/date.c b/date.c
index 8e57e5e..e9ee4aa 100644
--- a/date.c
+++ b/date.c
@@ -857,7 +857,9 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm
 	}
 
 	if (match_string(date, "months") >= 5) {
-		int n = tm->tm_mon - *num;
+		int n;
+		update_tm(tm, now, 0); /* fill in date fields if needed */
+		n = tm->tm_mon - *num;
 		*num = 0;
 		while (n < 0) {
 			n += 12;
@@ -868,6 +870,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm
 	}
 
 	if (match_string(date, "years") >= 4) {
+		update_tm(tm, now, 0); /* fill in date fields if needed */
 		tm->tm_year -= *num;
 		*num = 0;
 		return end;

I'll wrap this fix up in a commit message with tests and add it to the
"test approxidate" series I'm brewing.

-Peff

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: Improve on 'approxidate'
  2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
  2009-08-31  1:58   ` Jeff King
@ 2009-09-01  3:27   ` Linus Torvalds
  1 sibling, 0 replies; 6+ messages in thread
From: Linus Torvalds @ 2009-09-01  3:27 UTC (permalink / raw)
  To: Jeff King; +Cc: Git Mailing List, Junio C Hamano



On Sun, 30 Aug 2009, Jeff King wrote:
> 
> This breaks relative dates like "3.months.ago", because
> approxidate_alpha needs to see the "current" date in tm (and now it sees
> -1, subtracts from it, and assumes we are just crossing a year boundary
> because of the negative).  3.years.ago is also broken, but I don't think
> 3.days.ago is.

Gaah. Thanks for noticing and the fixes. I had tested the relative modes, 
but only the "fixed offset" ones (days, hours, minutes, seconds), not the 
months and years cases.

			Linus

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2009-09-01  3:27 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-08-22 22:10 Improve on 'approxidate' Linus Torvalds
2009-08-23  1:11 ` Further 'approxidate' improvements Linus Torvalds
2009-08-23  2:08   ` Nicolas Pitre
2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
2009-08-31  1:58   ` Jeff King
2009-09-01  3:27   ` Linus Torvalds

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).