* Improve on 'approxidate'
@ 2009-08-22 22:10 Linus Torvalds
2009-08-23 1:11 ` Further 'approxidate' improvements Linus Torvalds
2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
0 siblings, 2 replies; 6+ messages in thread
From: Linus Torvalds @ 2009-08-22 22:10 UTC (permalink / raw)
To: Git Mailing List, Junio C Hamano
This is not a new failure mode - approxidate has always been kind of
random in the input it accepts, but some of the randomness is more
irritating than others.
For example:
Jun 6, 5AM -> Mon Jun 22 05:00:00 2009
5AM Jun 6 -> Sat Jun 6 05:00:00 2009
Whaa? The reason for the above is that approxidate squirrells away the '6'
from "Jun 6" to see if it's going to be a relative number, and then
forgets about it when it sees a new number (the '5' in '5AM'). So the odd
"June 22" date is because today is July 22nd, and if it doesn't have
another day of the month, it will just pick todays mday - having ignored
the '6' entirely due to getting all excited about seeing a new number (5).
There are other oddnesses. This does not fix them all, but I think it
makes for fewer _really_ perplexing cases. At least now we have
Jun 6, 5AM -> Sat Jun 6 05:00:00 2009
5AM, Jun 6 -> Sat Jun 6 05:00:00 2009
which makes me happier. I can still point to cases that don't work as
well, but those are separate issues.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
date.c | 93 +++++++++++++++++++++++++++++++++++++++++++--------------------
1 files changed, 63 insertions(+), 30 deletions(-)
diff --git a/date.c b/date.c
index 409a17d..51c6461 100644
--- a/date.c
+++ b/date.c
@@ -525,11 +525,8 @@ static int match_digit(const char *date, struct tm *tm, int *offset, int *tm_gmt
}
}
- if (num > 0 && num < 32) {
- tm->tm_mday = num;
- } else if (num > 0 && num < 13) {
+ if (num > 0 && num < 13 && tm->tm_mon < 0)
tm->tm_mon = num-1;
- }
return n;
}
@@ -657,42 +654,59 @@ void datestamp(char *buf, int bufsize)
date_string(now, offset, buf, bufsize);
}
-static void update_tm(struct tm *tm, unsigned long sec)
+/*
+ * Relative time update (eg "2 days ago"). If we haven't set the time
+ * yet, we need to set it from current time.
+ */
+static unsigned long update_tm(struct tm *tm, struct tm *now, unsigned long sec)
{
- time_t n = mktime(tm) - sec;
+ time_t n;
+
+ if (tm->tm_mday < 0)
+ tm->tm_mday = now->tm_mday;
+ if (tm->tm_mon < 0)
+ tm->tm_mon = now->tm_mon;
+ if (tm->tm_year < 0) {
+ tm->tm_year = now->tm_year;
+ if (tm->tm_mon > now->tm_mon)
+ tm->tm_year--;
+ }
+
+ n = mktime(tm) - sec;
localtime_r(&n, tm);
+ return n;
}
-static void date_yesterday(struct tm *tm, int *num)
+static void date_yesterday(struct tm *tm, struct tm *now, int *num)
{
- update_tm(tm, 24*60*60);
+ update_tm(tm, now, 24*60*60);
}
-static void date_time(struct tm *tm, int hour)
+static void date_time(struct tm *tm, struct tm *now, int hour)
{
if (tm->tm_hour < hour)
- date_yesterday(tm, NULL);
+ date_yesterday(tm, now, NULL);
tm->tm_hour = hour;
tm->tm_min = 0;
tm->tm_sec = 0;
}
-static void date_midnight(struct tm *tm, int *num)
+static void date_midnight(struct tm *tm, struct tm *now, int *num)
{
- date_time(tm, 0);
+ date_time(tm, now, 0);
}
-static void date_noon(struct tm *tm, int *num)
+static void date_noon(struct tm *tm, struct tm *now, int *num)
{
- date_time(tm, 12);
+ date_time(tm, now, 12);
}
-static void date_tea(struct tm *tm, int *num)
+static void date_tea(struct tm *tm, struct tm *now, int *num)
{
- date_time(tm, 17);
+ date_time(tm, now, 17);
}
-static void date_pm(struct tm *tm, int *num)
+static void date_pm(struct tm *tm, struct tm *now, int *num)
{
int hour, n = *num;
*num = 0;
@@ -706,7 +720,7 @@ static void date_pm(struct tm *tm, int *num)
tm->tm_hour = (hour % 12) + 12;
}
-static void date_am(struct tm *tm, int *num)
+static void date_am(struct tm *tm, struct tm *now, int *num)
{
int hour, n = *num;
*num = 0;
@@ -720,7 +734,7 @@ static void date_am(struct tm *tm, int *num)
tm->tm_hour = (hour % 12);
}
-static void date_never(struct tm *tm, int *num)
+static void date_never(struct tm *tm, struct tm *now, int *num)
{
time_t n = 0;
localtime_r(&n, tm);
@@ -728,7 +742,7 @@ static void date_never(struct tm *tm, int *num)
static const struct special {
const char *name;
- void (*fn)(struct tm *, int *);
+ void (*fn)(struct tm *, struct tm *, int *);
} special[] = {
{ "yesterday", date_yesterday },
{ "noon", date_noon },
@@ -757,7 +771,7 @@ static const struct typelen {
{ NULL }
};
-static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
+static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm *now, int *num)
{
const struct typelen *tl;
const struct special *s;
@@ -778,7 +792,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
for (s = special; s->name; s++) {
int len = strlen(s->name);
if (match_string(date, s->name) == len) {
- s->fn(tm, num);
+ s->fn(tm, now, num);
return end;
}
}
@@ -800,7 +814,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
while (tl->type) {
int len = strlen(tl->type);
if (match_string(date, tl->type) >= len-1) {
- update_tm(tm, tl->length * *num);
+ update_tm(tm, now, tl->length * *num);
*num = 0;
return end;
}
@@ -818,7 +832,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, int *num)
n++;
diff += 7*n;
- update_tm(tm, diff * 24 * 60 * 60);
+ update_tm(tm, now, diff * 24 * 60 * 60);
return end;
}
}
@@ -866,6 +880,22 @@ static const char *approxidate_digit(const char *date, struct tm *tm, int *num)
return end;
}
+/*
+ * Do we have a pending number at the end, or when
+ * we see a new one? Let's assume it's a month day,
+ * as in "Dec 6, 1992"
+ */
+static void pending_number(struct tm *tm, int *num)
+{
+ int number = *num;
+
+ if (number) {
+ *num = 0;
+ if (tm->tm_mday < 0 && number < 32)
+ tm->tm_mday = number;
+ }
+}
+
unsigned long approxidate(const char *date)
{
int number = 0;
@@ -881,21 +911,24 @@ unsigned long approxidate(const char *date)
time_sec = tv.tv_sec;
localtime_r(&time_sec, &tm);
now = tm;
+
+ tm.tm_year = -1;
+ tm.tm_mon = -1;
+ tm.tm_mday = -1;
+
for (;;) {
unsigned char c = *date;
if (!c)
break;
date++;
if (isdigit(c)) {
+ pending_number(&tm, &number);
date = approxidate_digit(date-1, &tm, &number);
continue;
}
if (isalpha(c))
- date = approxidate_alpha(date-1, &tm, &number);
+ date = approxidate_alpha(date-1, &tm, &now, &number);
}
- if (number > 0 && number < 32)
- tm.tm_mday = number;
- if (tm.tm_mon > now.tm_mon && tm.tm_year == now.tm_year)
- tm.tm_year--;
- return mktime(&tm);
+ pending_number(&tm, &number);
+ return update_tm(&tm, &now, 0);
}
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Further 'approxidate' improvements
2009-08-22 22:10 Improve on 'approxidate' Linus Torvalds
@ 2009-08-23 1:11 ` Linus Torvalds
2009-08-23 2:08 ` Nicolas Pitre
2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
1 sibling, 1 reply; 6+ messages in thread
From: Linus Torvalds @ 2009-08-23 1:11 UTC (permalink / raw)
To: Git Mailing List, Junio C Hamano
The previous patch to improve approxidate got us to the point that a lot
of the remaining annoyances were due to the 'strict' date handling running
first, and deciding that it got a good enough date that the approximate
date routines were never even invoked.
For example, using a date string like
6AM, June 7, 2009
the strict date logic would be perfectly happy with the "June 7, 2009"
part, and ignore the 6AM part that it didn't understand - resulting in the
information getting dropped on the floor:
6AM, June 7, 2009 -> Sat Jun 6 00:00:00 2009
and the date being calculated as if it was midnight, and the '6AM' having
confused the date routines into thinking about '6 June' rather than 'June
7' at 6AM (ie notice how the _day_ was wrong due to this, not just the
time).
So this makes the strict date routines a bit stricter, and requires that
not just the date, but also the time, has actually been parsed. With that
fix, and trivial extension of the approxidate routines, git now properly
parses the date as
6AM, June 7, 2009 -> Sun Jun 7 06:00:00 2009
without dropping the fuzzy time ("6AM" or "noon" or any of the other
non-strict time formats) on the floor.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
On Sat, 22 Aug 2009, Linus Torvalds wrote:
>
> There are other oddnesses. This does not fix them all, but I think it
> makes for fewer _really_ perplexing cases. At least now we have
>
> Jun 6, 5AM -> Sat Jun 6 05:00:00 2009
> 5AM, Jun 6 -> Sat Jun 6 05:00:00 2009
>
> which makes me happier. I can still point to cases that don't work as
> well, but those are separate issues.
This gets rid of the remaining "obviously bogus" issues with parsing of
fuzzy dates. I'm sure there are other issues still remaining, but now I
can't come up with any trivial cases any more without having clear
garbage in the string.
So trying to date-parse nonsensical crud still gives odd results:
I ate six hot-dogs in June -> Sat Jun 6 18:09:26 2009
because it parses "six" and "June" and then puts it together as a date,
and then adds the current time (and year) and is happy.
But parsing random things amusingly is a _feature_. Misparsing something
that makes sense as a date is a bug.
date.c | 32 +++++++++++++++++++++++++++-----
1 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/date.c b/date.c
index 51c6461..1de1845 100644
--- a/date.c
+++ b/date.c
@@ -24,6 +24,8 @@ time_t tm_to_time_t(const struct tm *tm)
return -1;
if (month < 2 || (year + 2) % 4)
day--;
+ if (tm->tm_hour < 0 || tm->tm_min < 0 || tm->tm_sec < 0)
+ return -1;
return (year * 365 + (year + 1) / 4 + mdays[month] + day) * 24*60*60UL +
tm->tm_hour * 60*60 + tm->tm_min * 60 + tm->tm_sec;
}
@@ -425,13 +427,19 @@ static int match_multi_number(unsigned long num, char c, const char *date, char
return end - date;
}
-/* Have we filled in any part of the time/date yet? */
+/*
+ * Have we filled in any part of the time/date yet?
+ * We just do a binary 'and' to see if the sign bit
+ * is set in all the values.
+ */
static inline int nodate(struct tm *tm)
{
- return tm->tm_year < 0 &&
- tm->tm_mon < 0 &&
- tm->tm_mday < 0 &&
- !(tm->tm_hour | tm->tm_min | tm->tm_sec);
+ return (tm->tm_year &
+ tm->tm_mon &
+ tm->tm_mday &
+ tm->tm_hour &
+ tm->tm_min &
+ tm->tm_sec) < 0;
}
/*
@@ -580,6 +588,9 @@ int parse_date(const char *date, char *result, int maxlen)
tm.tm_mon = -1;
tm.tm_mday = -1;
tm.tm_isdst = -1;
+ tm.tm_hour = -1;
+ tm.tm_min = -1;
+ tm.tm_sec = -1;
offset = -1;
tm_gmt = 0;
@@ -893,6 +904,17 @@ static void pending_number(struct tm *tm, int *num)
*num = 0;
if (tm->tm_mday < 0 && number < 32)
tm->tm_mday = number;
+ else if (tm->tm_mon < 0 && number < 13)
+ tm->tm_mon = number-1;
+ else if (tm->tm_year < 0) {
+ if (number > 1969 && number < 2100)
+ tm->tm_year = number - 1900;
+ else if (number > 69 && number < 100)
+ tm->tm_year = number;
+ else if (number < 38)
+ tm->tm_year = 100 + number;
+ /* We screw up for number = 00 ? */
+ }
}
}
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: Further 'approxidate' improvements
2009-08-23 1:11 ` Further 'approxidate' improvements Linus Torvalds
@ 2009-08-23 2:08 ` Nicolas Pitre
0 siblings, 0 replies; 6+ messages in thread
From: Nicolas Pitre @ 2009-08-23 2:08 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List, Junio C Hamano
On Sat, 22 Aug 2009, Linus Torvalds wrote:
> So trying to date-parse nonsensical crud still gives odd results:
>
> I ate six hot-dogs in June -> Sat Jun 6 18:09:26 2009
>
> because it parses "six" and "June" and then puts it together as a date,
> and then adds the current time (and year) and is happy.
>
> But parsing random things amusingly is a _feature_. Misparsing something
> that makes sense as a date is a bug.
Maybe that would be a good idea to write a test just for this, so known
cases making sense aren't accidentally broken by eventual modifications
to add more such cases.
Nicolas
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: Improve on 'approxidate'
2009-08-22 22:10 Improve on 'approxidate' Linus Torvalds
2009-08-23 1:11 ` Further 'approxidate' improvements Linus Torvalds
@ 2009-08-30 22:35 ` Jeff King
2009-08-31 1:58 ` Jeff King
2009-09-01 3:27 ` Linus Torvalds
1 sibling, 2 replies; 6+ messages in thread
From: Jeff King @ 2009-08-30 22:35 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List, Junio C Hamano
On Sat, Aug 22, 2009 at 03:10:07PM -0700, Linus Torvalds wrote:
> unsigned long approxidate(const char *date)
> {
> int number = 0;
> @@ -881,21 +911,24 @@ unsigned long approxidate(const char *date)
> time_sec = tv.tv_sec;
> localtime_r(&time_sec, &tm);
> now = tm;
> +
> + tm.tm_year = -1;
> + tm.tm_mon = -1;
> + tm.tm_mday = -1;
This breaks relative dates like "3.months.ago", because
approxidate_alpha needs to see the "current" date in tm (and now it sees
-1, subtracts from it, and assumes we are just crossing a year boundary
because of the negative). 3.years.ago is also broken, but I don't think
3.days.ago is.
Probably we just need to pass "now" to approxidate_alpha, and it needs
to call update_tm under the case for "months" and "years" (and I haven't
quite figured out why those are not part of the "tl" list).
Unfortunately, I'm out of time to look at it more right now, but I'll
take a look tonight or tomorrow if you don't beat me to it.
-Peff
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: Improve on 'approxidate'
2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
@ 2009-08-31 1:58 ` Jeff King
2009-09-01 3:27 ` Linus Torvalds
1 sibling, 0 replies; 6+ messages in thread
From: Jeff King @ 2009-08-31 1:58 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Git Mailing List, Junio C Hamano
On Sun, Aug 30, 2009 at 06:35:58PM -0400, Jeff King wrote:
> > + tm.tm_year = -1;
> > + tm.tm_mon = -1;
> > + tm.tm_mday = -1;
>
> This breaks relative dates like "3.months.ago", because
> approxidate_alpha needs to see the "current" date in tm (and now it sees
> -1, subtracts from it, and assumes we are just crossing a year boundary
> because of the negative). 3.years.ago is also broken, but I don't think
> 3.days.ago is.
>
> Probably we just need to pass "now" to approxidate_alpha, and it needs
> to call update_tm under the case for "months" and "years" (and I haven't
> quite figured out why those are not part of the "tl" list).
> Unfortunately, I'm out of time to look at it more right now, but I'll
> take a look tonight or tomorrow if you don't beat me to it.
OK, I looked at it.
The fix is pretty straightforward. We _do_ already pass "now" to
approxidate_alpha, and it looks like you already fixed the "typelen"
array case (which handles seconds, minutes, hours, days, and weeks) by
calling update_tm. But all of those units are convertible to seconds,
and months and years are not, which explains why they are handled
separately.
So I think we can just "cheat" and call update_tm to fill in the fields
from "now" as we would for the other units, and then tweak the "struct
tm" as we did before. I.e.,:
diff --git a/date.c b/date.c
index 8e57e5e..e9ee4aa 100644
--- a/date.c
+++ b/date.c
@@ -857,7 +857,9 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm
}
if (match_string(date, "months") >= 5) {
- int n = tm->tm_mon - *num;
+ int n;
+ update_tm(tm, now, 0); /* fill in date fields if needed */
+ n = tm->tm_mon - *num;
*num = 0;
while (n < 0) {
n += 12;
@@ -868,6 +870,7 @@ static const char *approxidate_alpha(const char *date, struct tm *tm, struct tm
}
if (match_string(date, "years") >= 4) {
+ update_tm(tm, now, 0); /* fill in date fields if needed */
tm->tm_year -= *num;
*num = 0;
return end;
I'll wrap this fix up in a commit message with tests and add it to the
"test approxidate" series I'm brewing.
-Peff
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: Improve on 'approxidate'
2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
2009-08-31 1:58 ` Jeff King
@ 2009-09-01 3:27 ` Linus Torvalds
1 sibling, 0 replies; 6+ messages in thread
From: Linus Torvalds @ 2009-09-01 3:27 UTC (permalink / raw)
To: Jeff King; +Cc: Git Mailing List, Junio C Hamano
On Sun, 30 Aug 2009, Jeff King wrote:
>
> This breaks relative dates like "3.months.ago", because
> approxidate_alpha needs to see the "current" date in tm (and now it sees
> -1, subtracts from it, and assumes we are just crossing a year boundary
> because of the negative). 3.years.ago is also broken, but I don't think
> 3.days.ago is.
Gaah. Thanks for noticing and the fixes. I had tested the relative modes,
but only the "fixed offset" ones (days, hours, minutes, seconds), not the
months and years cases.
Linus
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2009-09-01 3:27 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-08-22 22:10 Improve on 'approxidate' Linus Torvalds
2009-08-23 1:11 ` Further 'approxidate' improvements Linus Torvalds
2009-08-23 2:08 ` Nicolas Pitre
2009-08-30 22:35 ` Improve on 'approxidate' Jeff King
2009-08-31 1:58 ` Jeff King
2009-09-01 3:27 ` Linus Torvalds
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).