* [RFC 5/5] Support 'master@2 hours ago' syntax
@ 2006-05-17 9:56 Shawn Pearce
2006-05-17 11:07 ` Junio C Hamano
0 siblings, 1 reply; 7+ messages in thread
From: Shawn Pearce @ 2006-05-17 9:56 UTC (permalink / raw)
To: git
Extended sha1 expressions may now include date specifications
which indicate a point in time within the local repository's
history. If the ref indicated to the left of '@' has a log in
$GIT_DIR/logs/<ref> then the value of the ref at the time indicated
by the specification is obtained from the ref's log.
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
Documentation/git-rev-parse.txt | 6 ++++
refs.c | 53 +++++++++++++++++++++++++++++++++++
refs.h | 3 ++
sha1_name.c | 59 ++++++++++++++++++++++++++++-----------
4 files changed, 104 insertions(+), 17 deletions(-)
efffa32e4896acd7978767b6a856fc7421060040
diff --git a/Documentation/git-rev-parse.txt b/Documentation/git-rev-parse.txt
index ab896fc..df308c3 100644
--- a/Documentation/git-rev-parse.txt
+++ b/Documentation/git-rev-parse.txt
@@ -124,6 +124,12 @@ syntax.
happen to have both heads/master and tags/master, you can
explicitly say 'heads/master' to tell git which one you mean.
+* A suffix '@' followed by a date specification such as 'yesterday'
+ (24 hours ago) or '1 month 2 weeks 3 days 1 hour 1 second ago'
+ to specify the value of the ref at a prior point in time.
+ This suffix may only be used immediately following a ref name
+ and the ref must have an existing log ($GIT_DIR/logs/<ref>).
+
* A suffix '{caret}' to a revision parameter means the first parent of
that commit object. '{caret}<n>' means the <n>th parent (i.e.
'rev{caret}'
diff --git a/refs.c b/refs.c
index 4be75a5..4c99e37 100644
--- a/refs.c
+++ b/refs.c
@@ -429,3 +429,56 @@ int write_ref_sha1(struct ref_lock *lock
unlock_ref(lock);
return 0;
}
+
+int read_ref_at(const char *ref, unsigned long at_time, unsigned char *sha1)
+{
+ const char *logfile, *logdata, *logend, *rec, *c;
+ char *tz_c;
+ int logfd, tz;
+ struct stat st;
+ unsigned long date;
+
+ logfile = git_path("logs/%s", ref);
+ logfd = open(logfile, O_RDONLY, 0);
+ if (logfd < 0)
+ die("Unable to read log %s: %s", logfile, strerror(errno));
+ fstat(logfd, &st);
+ if (!st.st_size)
+ die("Log %s is empty.", logfile);
+ logdata = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, logfd, 0);
+ close(logfd);
+
+ rec = logend = logdata + st.st_size;
+ while (logdata < rec) {
+ if (logdata < rec && *(rec-1) == '\n')
+ rec--;
+ while (logdata < rec && *(rec-1) != '\n')
+ rec--;
+ c = rec;
+ while (c < logend && *c != '>' && *c != '\n')
+ c++;
+ if (c == logend || *c == '\n')
+ die("Log %s is corrupt.", logfile);
+ date = strtoul(c, NULL, 10);
+ if (date <= at_time) {
+ if (get_sha1_hex(rec + 41, sha1))
+ die("Log %s is corrupt.", logfile);
+ munmap((void*)logdata, st.st_size);
+ return 0;
+ }
+ }
+
+ c = logdata;
+ while (c < logend && *c != '>' && *c != '\n')
+ c++;
+ if (c == logend || *c == '\n')
+ die("Log %s is corrupt.", logfile);
+ date = strtoul(c, &tz_c, 10);
+ tz = strtoul(tz_c, NULL, 10);
+ if (get_sha1_hex(logdata, sha1))
+ die("Log %s is corrupt.", logfile);
+ munmap((void*)logdata, st.st_size);
+ fprintf(stderr, "warning: Log %s only goes back to %s.\n",
+ logfile, show_rfc2822_date(date, tz));
+ return 0;
+}
diff --git a/refs.h b/refs.h
index 43831e9..2c854de 100644
--- a/refs.h
+++ b/refs.h
@@ -34,6 +34,9 @@ extern void unlock_ref (struct ref_lock
/** Writes sha1 into the ref specified by the lock. **/
extern int write_ref_sha1(struct ref_lock *lock, const unsigned char *sha1, const char *msg);
+/** Reads log for the value of ref during at_time. **/
+extern int read_ref_at(const char *ref, unsigned long at_time, unsigned char *sha1);
+
/** Returns 0 if target has the right format for a ref. **/
extern int check_ref_format(const char *target);
diff --git a/sha1_name.c b/sha1_name.c
index dc68355..3ac3ab4 100644
--- a/sha1_name.c
+++ b/sha1_name.c
@@ -4,6 +4,7 @@ #include "commit.h"
#include "tree.h"
#include "blob.h"
#include "tree-walk.h"
+#include "refs.h"
static int find_short_object_filename(int len, const char *name, unsigned char *sha1)
{
@@ -245,36 +246,60 @@ static int get_sha1_basic(const char *st
"refs/remotes/%.*s/HEAD",
NULL
};
- const char **p;
- const char *warning = "warning: refname '%.*s' is ambiguous.\n";
- char *pathname;
- int already_found = 0;
+ static const char *warning = "warning: refname '%.*s' is ambiguous.\n";
+ const char **p, *pathname;
+ char *real_path = NULL;
+ int refs_found = 0, at_mark;
+ unsigned long at_time = (unsigned long)-1;
unsigned char *this_result;
unsigned char sha1_from_ref[20];
if (len == 40 && !get_sha1_hex(str, sha1))
return 0;
+ /* At a given period of time? "@2 hours ago" */
+ for (at_mark = 1; at_mark < len; at_mark++) {
+ if (str[at_mark] == '@') {
+ int date_len = len - at_mark - 1;
+ char *date_spec = xmalloc(date_len + 1);
+ strncpy(date_spec, str + at_mark + 1, date_len);
+ date_spec[date_len] = 0;
+ at_time = approxidate(date_spec);
+ free(date_spec);
+ len = at_mark;
+ }
+ }
+
/* Accept only unambiguous ref paths. */
if (ambiguous_path(str, len))
return -1;
for (p = fmt; *p; p++) {
- this_result = already_found ? sha1_from_ref : sha1;
- pathname = git_path(*p, len, str);
- if (!read_ref(pathname, this_result)) {
- if (warn_ambiguous_refs) {
- if (already_found)
- fprintf(stderr, warning, len, str);
- already_found++;
- }
- else
- return 0;
+ this_result = refs_found ? sha1_from_ref : sha1;
+ pathname = resolve_ref(git_path(*p, len, str), this_result, 1);
+ if (pathname) {
+ if (!refs_found++)
+ real_path = strdup(pathname);
+ if (!warn_ambiguous_refs)
+ break;
}
}
- if (already_found)
- return 0;
- return -1;
+
+ if (!refs_found)
+ return -1;
+
+ if (warn_ambiguous_refs && refs_found > 1)
+ fprintf(stderr, warning, len, str);
+
+ if (at_time != (unsigned long)-1) {
+ read_ref_at(
+ real_path + strlen(git_path(".")) - 1,
+ at_time,
+ sha1);
+ }
+
+ free(real_path);
+ return 0;
}
static int get_sha1_1(const char *name, int len, unsigned char *sha1);
--
1.3.2.g7278
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [RFC 5/5] Support 'master@2 hours ago' syntax
2006-05-17 9:56 [RFC 5/5] Support 'master@2 hours ago' syntax Shawn Pearce
@ 2006-05-17 11:07 ` Junio C Hamano
2006-05-17 14:46 ` Linus Torvalds
2006-05-17 22:06 ` Shawn Pearce
0 siblings, 2 replies; 7+ messages in thread
From: Junio C Hamano @ 2006-05-17 11:07 UTC (permalink / raw)
To: Shawn Pearce; +Cc: git
Shawn Pearce <spearce@spearce.org> writes:
> Extended sha1 expressions may now include date specifications
> which indicate a point in time within the local repository's
> history. If the ref indicated to the left of '@' has a log in
> $GIT_DIR/logs/<ref> then the value of the ref at the time indicated
> by the specification is obtained from the ref's log.
This does not allow '2006-05-17 00:00:00' as the timespec, and
the documentation carefully avoids giving that example, but I
think it is better to spell that limitation out.
> +* A suffix '@' followed by a date specification such as 'yesterday'
> + (24 hours ago) or '1 month 2 weeks 3 days 1 hour 1 second ago'
> + to specify the value of the ref at a prior point in time.
> + This suffix may only be used immediately following a ref name
> + and the ref must have an existing log ($GIT_DIR/logs/<ref>).
+ fprintf(stderr, "warning: Log %s only goes back to %s.\n",
+ logfile, show_rfc2822_date(date, tz));
+ return 0;
I am not sure about this part. If the oldest log entry was 3
hours ago, the second oldest 2 hours ago, we can tell during
that one hour period the ref was at that point. If the user
asked "ref as of four hours ago", and if the oldest log entry
had old SHA1 that is not 0{40} (because the log was not enabled
before that record), it might make more sense to give that back.
Also I wonder how much complexity would we suffer and how much
efficiency would we gain if we binary search the logdata (the
committer info is variable length, so you would need to resync
in each step).
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC 5/5] Support 'master@2 hours ago' syntax
2006-05-17 11:07 ` Junio C Hamano
@ 2006-05-17 14:46 ` Linus Torvalds
2006-05-17 18:32 ` Junio C Hamano
2006-05-17 22:06 ` Shawn Pearce
1 sibling, 1 reply; 7+ messages in thread
From: Linus Torvalds @ 2006-05-17 14:46 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Shawn Pearce, git
On Wed, 17 May 2006, Junio C Hamano wrote:
>
> This does not allow '2006-05-17 00:00:00' as the timespec, and
> the documentation carefully avoids giving that example, but I
> think it is better to spell that limitation out.
It doesn't? The "approxidate()" function should handle any reasonable date
specifier, and the above is certainly more than reasonable.
Why doesn't approxidate handle it?
Linus
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC 5/5] Support 'master@2 hours ago' syntax
2006-05-17 14:46 ` Linus Torvalds
@ 2006-05-17 18:32 ` Junio C Hamano
2006-05-17 21:39 ` Shawn Pearce
0 siblings, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2006-05-17 18:32 UTC (permalink / raw)
To: Linus Torvalds; +Cc: Shawn Pearce, git
Linus Torvalds <torvalds@osdl.org> writes:
> On Wed, 17 May 2006, Junio C Hamano wrote:
>>
>> This does not allow '2006-05-17 00:00:00' as the timespec, and
>> the documentation carefully avoids giving that example, but I
>> think it is better to spell that limitation out.
>
> It doesn't? The "approxidate()" function should handle any reasonable date
> specifier, and the above is certainly more than reasonable.
>
> Why doesn't approxidate handle it?
The way I read the code is that get_sha1() would first do its
magic at the first colon and feeds get_sha1_1() with prefix up
to the first colon. This gets passed down to get_sha1_basic()
and what approxidate() is fed is the suffix of that prefix. It
ends up seeing stuff between '@' and ':'. I.e.
"master@2006-05-17 00:00:00:cache.h"
would ask for "00:00:cache.h" file in the "master" branch as of
timestamp "2006-05-17 00".
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC 5/5] Support 'master@2 hours ago' syntax
2006-05-17 18:32 ` Junio C Hamano
@ 2006-05-17 21:39 ` Shawn Pearce
0 siblings, 0 replies; 7+ messages in thread
From: Shawn Pearce @ 2006-05-17 21:39 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Linus Torvalds, git
Junio C Hamano <junkio@cox.net> wrote:
> Linus Torvalds <torvalds@osdl.org> writes:
>
> > On Wed, 17 May 2006, Junio C Hamano wrote:
> >>
> >> This does not allow '2006-05-17 00:00:00' as the timespec, and
> >> the documentation carefully avoids giving that example, but I
> >> think it is better to spell that limitation out.
> >
> > It doesn't? The "approxidate()" function should handle any reasonable date
> > specifier, and the above is certainly more than reasonable.
> >
> > Why doesn't approxidate handle it?
>
> The way I read the code is that get_sha1() would first do its
> magic at the first colon and feeds get_sha1_1() with prefix up
> to the first colon. This gets passed down to get_sha1_basic()
> and what approxidate() is fed is the suffix of that prefix. It
> ends up seeing stuff between '@' and ':'. I.e.
>
> "master@2006-05-17 00:00:00:cache.h"
>
> would ask for "00:00:cache.h" file in the "master" branch as of
> timestamp "2006-05-17 00".
Good catch. I'll see if I can deal with it later; probably early
tomorrow morning before work. It may just come down to documenting
this particular case as ambiguous and sure to parse the way you
did not mean it to. :-)
I tested a bunch of other date formats but not the basic ISO. Argh.
I'll send a test case soon for the expression parsing here, to be
sure we pull stuff from the log as expected as well as parse the
expression in a consistent way between releases. :-)
--
Shawn.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC 5/5] Support 'master@2 hours ago' syntax
2006-05-17 11:07 ` Junio C Hamano
2006-05-17 14:46 ` Linus Torvalds
@ 2006-05-17 22:06 ` Shawn Pearce
2006-05-17 22:32 ` Shawn Pearce
1 sibling, 1 reply; 7+ messages in thread
From: Shawn Pearce @ 2006-05-17 22:06 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano <junkio@cox.net> wrote:
> Shawn Pearce <spearce@spearce.org> writes:
>
> + fprintf(stderr, "warning: Log %s only goes back to %s.\n",
> + logfile, show_rfc2822_date(date, tz));
> + return 0;
>
> I am not sure about this part. If the oldest log entry was 3
> hours ago, the second oldest 2 hours ago, we can tell during
> that one hour period the ref was at that point. If the user
> asked "ref as of four hours ago", and if the oldest log entry
> had old SHA1 that is not 0{40} (because the log was not enabled
> before that record), it might make more sense to give that back.
If I understand my own code here what I'm doing is walking back
in the log file, realizing I fell off the first line of it, then
loading the old ref from the first line. This is the oldest ref
I can find so I return it as a valid ref to the caller but I'm
printing out this warning to tell the user that the oldest point
in time I found in the log is effectively the update date as I have
no idea when that old sha1 first became the value of the ref.
So I think I'm doing what you are expecting here. The log will start
with the value in the ref at the time the log started, not 0{40}
and that value is what gets returned when we have this warning
come out.. That's the best anyone can expect...
> Also I wonder how much complexity would we suffer and how much
> efficiency would we gain if we binary search the logdata (the
> committer info is variable length, so you would need to resync
> in each step).
I thought about doing this but did not think it would be worth the
effort (either developer to code or CPU to execute) at this point
in time. I don't think users will be pulling refs from the log very
often and if they are they will probably be pulling from recent time,
not very far back. Thus starting at the end and walking back is
probably "good enough".
But if it proves to be too slow in practice I'm sure I can come up
with a faster way to walk through the log. :-)
--
Shawn.
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [RFC 5/5] Support 'master@2 hours ago' syntax
2006-05-17 22:06 ` Shawn Pearce
@ 2006-05-17 22:32 ` Shawn Pearce
0 siblings, 0 replies; 7+ messages in thread
From: Shawn Pearce @ 2006-05-17 22:32 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Shawn Pearce <spearce@spearce.org> wrote:
> Junio C Hamano <junkio@cox.net> wrote:
> >
> > Also I wonder how much complexity would we suffer and how much
> > efficiency would we gain if we binary search the logdata (the
> > committer info is variable length, so you would need to resync
> > in each step).
>
> I thought about doing this but did not think it would be worth the
> effort (either developer to code or CPU to execute) at this point
> in time. I don't think users will be pulling refs from the log very
> often and if they are they will probably be pulling from recent time,
> not very far back. Thus starting at the end and walking back is
> probably "good enough".
>
> But if it proves to be too slow in practice I'm sure I can come up
> with a faster way to walk through the log. :-)
I just ran a test on my PowerBook: walking a 10,000 line log file and
extracting the very oldest commit along. Each hit on git-rev-parse
seems to took about 100 ms. Hardly worth worrying about for casual
use. Further git-rev-parse is taking 73 ms just to run '--verify
HEAD' so an extra 30 ms to read the 10k log is pretty much nothing.
[spearce@pb15 trash]$ wc -l .git/logs/refs/heads/master
10000 .git/logs/refs/heads/master
[spearce@pb15 trash]$ head -n 1 .git/logs/refs/heads/master
b943559a305bdd6bdee2cef6e5df2413c3d30a00 0000000000000000000000000000000000000000 A U Thor <example@example.com> 1136091600 -0500
[spearce@pb15 trash]$ perl -e 'print scalar(localtime shift),"\n"' 1136091600
Sun Jan 1 00:00:00 2006
[spearce@pb15 trash]$ time ../../git-rev-parse --verify HEAD@'300 days'
warning: Log .git/logs/refs/heads/master only goes back to Thu, 1 Jan 1970 00:00:00 +0000.
b943559a305bdd6bdee2cef6e5df2413c3d30a00
real 0m0.112s
user 0m0.029s
sys 0m0.023s
[spearce@pb15 trash]$ time ../../git-rev-parse --verify HEAD@'300 days'
warning: Log .git/logs/refs/heads/master only goes back to Thu, 1 Jan 1970 00:00:00 +0000.
b943559a305bdd6bdee2cef6e5df2413c3d30a00
real 0m0.105s
user 0m0.029s
sys 0m0.023s
--
Shawn.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2006-05-17 22:32 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-05-17 9:56 [RFC 5/5] Support 'master@2 hours ago' syntax Shawn Pearce
2006-05-17 11:07 ` Junio C Hamano
2006-05-17 14:46 ` Linus Torvalds
2006-05-17 18:32 ` Junio C Hamano
2006-05-17 21:39 ` Shawn Pearce
2006-05-17 22:06 ` Shawn Pearce
2006-05-17 22:32 ` Shawn Pearce
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).