* [PATCH 1/2] Delete ref $frotz by moving ref file to "deleted-$frotz~ref".
@ 2006-10-14 13:39 Christian Couder
2006-10-14 18:47 ` Junio C Hamano
0 siblings, 1 reply; 6+ messages in thread
From: Christian Couder @ 2006-10-14 13:39 UTC (permalink / raw)
To: Junio Hamano; +Cc: git
The idea is that moving:
$GIT_DIR/refs/<refpath>/frotz
to:
$GIT_DIR/deleted-refs/<refpath>/frotz~ref
maybe cheaper and safer than repacking the refs without the
deleted one.
On the other hand now when resolving a ref we have to check
if a related deleted ref file exists.
The new "delete_ref" function is similar to "write_ref_sha1".
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
refs.c | 160 ++++++++++++++++++++++++++++++++++++++++------------------------
refs.h | 2 +
2 files changed, 102 insertions(+), 60 deletions(-)
diff --git a/refs.c b/refs.c
index 89cffaf..6a8da58 100644
--- a/refs.c
+++ b/refs.c
@@ -176,6 +176,38 @@ static struct ref_list *get_loose_refs(v
return cached_refs.loose;
}
+static char *get_del_ref_name(const char *ref_name)
+{
+ size_t ref_name_len = strlen(ref_name);
+ char *delref_name = xmalloc(ref_name_len + 13);
+
+ strncpy(delref_name, "deleted-", 8);
+ strncpy(delref_name + 8, ref_name, ref_name_len);
+ strcpy(delref_name + 8 + ref_name_len, "~ref");
+
+ return delref_name;
+}
+
+static int lstat_del_ref_file(const char *ref_name, struct stat *st)
+{
+ char *delref_name = get_del_ref_name(ref_name);
+ char *delref_file = git_path("%s", delref_name);
+
+ int ret = lstat(delref_file, st);
+
+ free(delref_name);
+ return ret;
+}
+
+static char *get_del_ref_file(const char *ref_name)
+{
+ char *delref_name = get_del_ref_name(ref_name);
+ char *delref_file = xstrdup(git_path("%s", delref_name));
+
+ free(delref_name);
+ return delref_file;
+}
+
/* We allow "recursive" symbolic refs. Only within reason, though */
#define MAXDEPTH 5
@@ -204,6 +236,17 @@ const char *resolve_ref(const char *ref,
* born. It is NOT OK if we are resolving for
* reading.
*/
+
+ if (!lstat_del_ref_file(ref, &st)) {
+ /* The ref has been deleted. */
+ if (flag)
+ *flag |= REF_ISDELETED;
+ if (reading)
+ return NULL;
+ hashclr(sha1);
+ return ref;
+ }
+
if (lstat(path, &st) < 0) {
struct ref_list *list = get_packed_refs();
while (list) {
@@ -541,12 +584,14 @@ static struct ref_lock *lock_ref_sha1_ba
struct ref_lock *lock;
struct stat st;
int last_errno = 0;
+ int default_flag;
+ int *ref_flag = flag ? flag : &default_flag;
int mustexist = (old_sha1 && !is_null_sha1(old_sha1));
lock = xcalloc(1, sizeof(struct ref_lock));
lock->lock_fd = -1;
- ref = resolve_ref(ref, lock->old_sha1, mustexist, flag);
+ ref = resolve_ref(ref, lock->old_sha1, mustexist, ref_flag);
if (!ref && errno == EISDIR) {
/* we are trying to lock foo but we used to
* have foo/bar which now does not exist;
@@ -559,7 +604,7 @@ static struct ref_lock *lock_ref_sha1_ba
error("there are still refs under '%s'", orig_ref);
goto error_return;
}
- ref = resolve_ref(orig_ref, lock->old_sha1, mustexist, flag);
+ ref = resolve_ref(orig_ref, lock->old_sha1, mustexist, ref_flag);
}
if (!ref) {
last_errno = errno;
@@ -568,7 +613,8 @@ static struct ref_lock *lock_ref_sha1_ba
goto error_return;
}
if (is_null_sha1(lock->old_sha1)) {
- /* The ref did not exist and we are creating it.
+ /* The ref did not exist (or has been deleted)
+ * and we are (re)creating it.
* Make sure there is no existing ref that is packed
* whose name begins with our refname, nor a ref whose
* name is a proper prefix of our refname.
@@ -580,15 +626,19 @@ static struct ref_lock *lock_ref_sha1_ba
int len = strlen(list->name);
int cmplen = (namlen < len) ? namlen : len;
const char *lead = (namlen < len) ? list->name : ref;
+ struct stat st;
if (!strncmp(ref, list->name, cmplen) &&
- lead[cmplen] == '/') {
+ lead[cmplen] == '/' &&
+ lstat_del_ref_file(list->name, &st)) {
error("'%s' exists; cannot create '%s'",
list->name, ref);
goto error_return;
}
list = list->next;
}
+ if (*ref_flag & REF_ISDELETED)
+ lock->del_file = get_del_ref_file(ref);
}
lock->lk = xcalloc(1, sizeof(struct lock_file));
@@ -627,77 +677,60 @@ struct ref_lock *lock_any_ref_for_update
return lock_ref_sha1_basic(ref, old_sha1, NULL);
}
-static struct lock_file packlock;
-
-static int repack_without_ref(const char *refname)
+/* Remove the ref "refs/$frotz" by creating "deleted-refs/$frotz~ref". */
+int delete_ref(const char *ref_name, unsigned char *sha1)
{
- struct ref_list *list, *packed_ref_list;
- int fd;
- int found = 0;
+ int flag = 0;
+ static char term = '\n';
+ struct ref_lock *lock = lock_ref_sha1_basic(ref_name, sha1, &flag);
- packed_ref_list = get_packed_refs();
- for (list = packed_ref_list; list; list = list->next) {
- if (!strcmp(refname, list->name)) {
- found = 1;
- break;
- }
+ if (!lock)
+ return -1;
+ if (flag & REF_ISDELETED) {
+ unlock_ref(lock);
+ return 0; /* Already deleted is ok. */
}
- if (!found)
- return 0;
- memset(&packlock, 0, sizeof(packlock));
- fd = hold_lock_file_for_update(&packlock, git_path("packed-refs"), 0);
- if (fd < 0)
- return error("cannot delete '%s' from packed refs", refname);
-
- for (list = packed_ref_list; list; list = list->next) {
- char line[PATH_MAX + 100];
- int len;
-
- if (!strcmp(refname, list->name))
- continue;
- len = snprintf(line, sizeof(line), "%s %s\n",
- sha1_to_hex(list->sha1), list->name);
- /* this should not happen but just being defensive */
- if (len > sizeof(line))
- die("too long a refname '%s'", list->name);
- write_or_die(fd, line, len);
+ if (write(lock->lock_fd, sha1_to_hex(sha1), 40) != 40 ||
+ write(lock->lock_fd, &term, 1) != 1
+ || close(lock->lock_fd) < 0) {
+ error("Couldn't write %s", lock->lk->filename);
+ unlock_ref(lock);
+ return -1;
}
- return commit_lock_file(&packlock);
-}
-
-int delete_ref(const char *refname, unsigned char *sha1)
-{
- struct ref_lock *lock;
- int err, i, ret = 0, flag = 0;
-
- lock = lock_ref_sha1_basic(refname, sha1, &flag);
- if (!lock)
- return 1;
+ invalidate_cached_refs();
if (!(flag & REF_ISPACKED)) {
/* loose */
- i = strlen(lock->lk->filename) - 5; /* .lock */
+ int i = strlen(lock->lk->filename) - 5; /* .lock */
lock->lk->filename[i] = 0;
- err = unlink(lock->lk->filename);
- if (err) {
- ret = 1;
+ if (unlink(lock->lk->filename)) {
error("unlink(%s) failed: %s",
lock->lk->filename, strerror(errno));
+ unlock_ref(lock);
+ return -1;
}
lock->lk->filename[i] = '.';
}
- /* removing the loose one could have resurrected an earlier
- * packed one. Also, if it was not loose we need to repack
- * without it.
- */
- ret |= repack_without_ref(refname);
-
- err = unlink(lock->log_file);
- if (err && errno != ENOENT)
+ if (unlink(lock->log_file) && errno != ENOENT)
fprintf(stderr, "warning: unlink(%s) failed: %s",
lock->log_file, strerror(errno));
- invalidate_cached_refs();
+ if (!lock->del_file)
+ lock->del_file = get_del_ref_file(ref_name);
+ if (safe_create_leading_directories(lock->del_file)) {
+ error("unable to create directory for %s: %s",
+ lock->del_file, strerror(errno));
+ unlock_ref(lock);
+ return -1;
+ }
+ if (rename(lock->lk->filename, lock->del_file)) {
+ error("rename(%s -> %s) failed: %s",
+ lock->lk->filename, lock->del_file,
+ strerror(errno));
+ unlock_ref(lock);
+ return -1;
+ }
+ lock->lock_fd = -1;
unlock_ref(lock);
- return ret;
+ return 0;
}
void unlock_ref(struct ref_lock *lock)
@@ -710,6 +743,7 @@ void unlock_ref(struct ref_lock *lock)
}
free(lock->ref_name);
free(lock->log_file);
+ free(lock->del_file);
free(lock);
}
@@ -786,6 +820,12 @@ int write_ref_sha1(struct ref_lock *lock
unlock_ref(lock);
return -1;
}
+ if (lock->del_file && unlink(lock->del_file) && errno != ENOENT) {
+ error("unlink(%s) failed: %s",
+ lock->del_file, strerror(errno));
+ unlock_ref(lock);
+ return -1;
+ }
if (commit_lock_file(lock->lk)) {
error("Couldn't set %s", lock->ref_name);
unlock_ref(lock);
diff --git a/refs.h b/refs.h
index a57d437..5aa7755 100644
--- a/refs.h
+++ b/refs.h
@@ -4,6 +4,7 @@ #define REFS_H
struct ref_lock {
char *ref_name;
char *log_file;
+ char *del_file;
struct lock_file *lk;
unsigned char old_sha1[20];
int lock_fd;
@@ -16,6 +17,7 @@ struct ref_lock {
*/
#define REF_ISSYMREF 01
#define REF_ISPACKED 02
+#define REF_ISDELETED 04
typedef int each_ref_fn(const char *refname, const unsigned char *sha1, int flags, void *cb_data);
extern int head_ref(each_ref_fn, void *);
extern int for_each_ref(each_ref_fn, void *);
--
1.4.3.rc2.gbcf275-dirty
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH 1/2] Delete ref $frotz by moving ref file to "deleted-$frotz~ref".
2006-10-14 13:39 [PATCH 1/2] Delete ref $frotz by moving ref file to "deleted-$frotz~ref" Christian Couder
@ 2006-10-14 18:47 ` Junio C Hamano
2006-10-17 4:26 ` Christian Couder
0 siblings, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2006-10-14 18:47 UTC (permalink / raw)
To: Christian Couder; +Cc: git
Christian Couder <chriscool@tuxfamily.org> writes:
> The idea is that moving:
>
> $GIT_DIR/refs/<refpath>/frotz
>
> to:
>
> $GIT_DIR/deleted-refs/<refpath>/frotz~ref
>
> maybe cheaper and safer than repacking the refs without the
> deleted one.
Before actually implementing delete_ref(), we discussed this
"deleted-refs/" idea. but I do not think it is a direction we
would want to go.
Ref deletion is an operation that happens far far rarer than
updates and lookups, and I deliberately chose to optimize for
the latter.
There are valid reasons to delete refs, and one most frequent
one would be to discard throw-away temporary branches you may
have needed to switch to when your work was interrupted. But
even counting that kind of deletion, I imagine that you would
not be creating and removing more than one branch per every 10
commits you would create, and I also imagine you would be
invoking not less than 5 operations that inspect project
history, such as git-log and git-diff, between commits you make.
An operation to build a new commit itself needs at least two
lookups (one to see what's current upfront, and another to see
nobody touched it upon lockless update). Most history-related
operations at least need to look at one (typically, HEAD), and
any refname you use to spell the name of an object or revision
range (e.g. "v2.6.17..v2.6.18~10" needs to look at tags/v2.6.17
and tags/v2.6.18). Optimizing for deletion path at the expense
of giving even a tiny penalty to lookup path is optimizing for a
wrong case, and that is why I rejected deleted-refs/ idea when I
originally did the delete_ref() implementation.
Having said that, I would definitely think there still are rooms
for optimization in the current implementation. For example, I
do not recall offhand if I made the code to unconditionally
repack without the deleted one, or only repack when we know the
ref being deleted exists in the packed refs file. The latter
obviously would be more efficient and if we currently do not do
that, making it do so is a very welcomed change. Especially,
given that the latest code does not pack branch heads by
default, when a temporary throw-away branch is discarded, it is
far more likely that it is not packed and we do not need to
repack.
Independent from this topic of "removing deleted from packed" vs
"using deleted-refs/", I think we still do not handle .git/logs/
hierarchy correctly in the current code when ref deletion is
involved. We already made it to correctly unlink/rmdir/mkdir
on-demand for .git/refs/ hierarchy and I think we need to have
code that parallels that for the .git/logs/ side.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/2] Delete ref $frotz by moving ref file to "deleted-$frotz~ref".
2006-10-14 18:47 ` Junio C Hamano
@ 2006-10-17 4:26 ` Christian Couder
2006-10-17 4:41 ` Junio C Hamano
2006-10-17 9:24 ` Jakub Narebski
0 siblings, 2 replies; 6+ messages in thread
From: Christian Couder @ 2006-10-17 4:26 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git
Junio C Hamano wrote:
> Christian Couder <chriscool@tuxfamily.org> writes:
> > The idea is that moving:
> >
> > $GIT_DIR/refs/<refpath>/frotz
> >
> > to:
> >
> > $GIT_DIR/deleted-refs/<refpath>/frotz~ref
> >
> > maybe cheaper and safer than repacking the refs without the
> > deleted one.
>
> Before actually implementing delete_ref(), we discussed this
> "deleted-refs/" idea. but I do not think it is a direction we
> would want to go.
>
> Ref deletion is an operation that happens far far rarer than
> updates and lookups, and I deliberately chose to optimize for
> the latter.
I think that the ref deletion usage depends on the policy of people using
git, and there may be people that delete a ref very often.
For example, when git becomes a major SCM, there may be people working on
big projects that want to create a new branch for each new bug and then
delete the branch when the code on the bug branch has been integrated into
a new release and the bug is closed.
> There are valid reasons to delete refs, and one most frequent
> one would be to discard throw-away temporary branches you may
> have needed to switch to when your work was interrupted. But
> even counting that kind of deletion, I imagine that you would
> not be creating and removing more than one branch per every 10
> commits you would create, and I also imagine you would be
> invoking not less than 5 operations that inspect project
> history, such as git-log and git-diff, between commits you make.
The operations that inspect project history may use a ref cache or something
so that a lookup on the disk may not be needed. So only the ref creation or
update rate versus delete rate may matter.
> An operation to build a new commit itself needs at least two
> lookups (one to see what's current upfront, and another to see
> nobody touched it upon lockless update). Most history-related
> operations at least need to look at one (typically, HEAD), and
> any refname you use to spell the name of an object or revision
> range (e.g. "v2.6.17..v2.6.18~10" needs to look at tags/v2.6.17
> and tags/v2.6.18). Optimizing for deletion path at the expense
> of giving even a tiny penalty to lookup path is optimizing for a
> wrong case, and that is why I rejected deleted-refs/ idea when I
> originally did the delete_ref() implementation.
The lookup code is already using cached packed refs. It could also use
cached loose and deleted refs, so the lookup penalty would be very very
small. By the way, the OS may already cache loose and deleted ref file stat
information, so that may right now be a very small penalty.
And at least, algorythmically speaking, with my patch the deletion path is
now independent of the number of existing refs, so it's much better (while
the lookup path stay the same).
If there are thousand of refs and a heavy I/O load, rewritting the packed
ref file for each deletion means writing on disk something that may not fit
in the disk cache. It may be very bad.
My patch also has a few added benefits like making it possible to have a
read only packed ref file, while still letting people delete refs. It also
make it possible to resurect a deleted ref, or to control branch deletion
rights on a per ref directory basis (though that may not be very usefull).
And the fact that the packed ref file (which may be read only for added
safety) is not rewritten each time a ref is deleted make things much safer
if there are many users working on the same git repository.
Christian.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/2] Delete ref $frotz by moving ref file to "deleted-$frotz~ref".
2006-10-17 4:26 ` Christian Couder
@ 2006-10-17 4:41 ` Junio C Hamano
2006-10-17 5:07 ` Shawn Pearce
2006-10-17 9:24 ` Jakub Narebski
1 sibling, 1 reply; 6+ messages in thread
From: Junio C Hamano @ 2006-10-17 4:41 UTC (permalink / raw)
To: Christian Couder; +Cc: git
Christian Couder <chriscool@tuxfamily.org> writes:
> For example, when git becomes a major SCM, there may be people working on
> big projects that want to create a new branch for each new bug and then
> delete the branch when the code on the bug branch has been integrated into
> a new release and the bug is closed.
I would say that is a very valid way to work with git,
regardless of the size of project. Now, how often would you
create such a per-bug branch and delete one, compared to the
number of operations that would require ref lookups? Your
example actually supports what I've said -- optimizing for
deletion at the cost of more expensive lookups is wrong.
> The operations that inspect project history may use a ref cache or something
> so that a lookup on the disk may not be needed. So only the ref creation or
> update rate versus delete rate may matter.
Stop and think about what you are saying. What's a ref cache?
We do not have such a beast today (unless you equate it with
packed-refs file), and you would need to design and implement
it, but think about how you make that operate. You would need
to invalidate it when you delete a ref using the deleted-ref/
approach; that's not much different from repacking packed-refs
file without the ref you just deleted, no?
Of course you can argue that instead of repacking you always
stat deleted-ref/ hierarchy; in other words, you can argue that
you can make deletion path faster by penalizing the lookup path.
So I do not think using "ref cache" (whatever it is, and however
it operates) does not change the situation a bit.
> If there are thousand of refs and a heavy I/O load, rewritting the packed
> ref file for each deletion means writing on disk something that may not fit
> in the disk cache. It may be very bad.
If the goal is to optimize for deletion path, then that is
true. My point is that we do not want to optimize for deletion
path at the expense of more costly lookup path.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/2] Delete ref $frotz by moving ref file to "deleted-$frotz~ref".
2006-10-17 4:41 ` Junio C Hamano
@ 2006-10-17 5:07 ` Shawn Pearce
0 siblings, 0 replies; 6+ messages in thread
From: Shawn Pearce @ 2006-10-17 5:07 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Christian Couder, git
Junio C Hamano <junkio@cox.net> wrote:
> Christian Couder <chriscool@tuxfamily.org> writes:
>
> > For example, when git becomes a major SCM, there may be people working on
> > big projects that want to create a new branch for each new bug and then
> > delete the branch when the code on the bug branch has been integrated into
> > a new release and the bug is closed.
>
> I would say that is a very valid way to work with git,
> regardless of the size of project. Now, how often would you
> create such a per-bug branch and delete one, compared to the
> number of operations that would require ref lookups? Your
> example actually supports what I've said -- optimizing for
> deletion at the cost of more expensive lookups is wrong.
I agree completely with Junio. I make a lot of temporary "throw
away" branches in Git; often they live on disk for 5/10 minutes at
most before getting deleted again. I also make a smaller number
(but still significant) of longer lived branches that hang around
for days or weeks before getting deleted.
In the former case (throw away) I wouldn't want those refs added
to the packed refs file. They just don't live around long enough
to make it worth it. And when I delete them I want them gone.
So moving them off to a 'deleted-refs' directory to indicate they
are gone is just delaying the removal. Not something I want.
In the latter case (longer lived) I don't mind if I have to sit
though an extra 500 ms to rewrite the entire packed refs file
during a ref delete operation. I lived with the branch for weeks;
I can probably spare a second to finally get rid of it once its
gone upstream. Heck, the push to move that branch upstream might
actually take longer to unpack the loose objects contained on that
branch than the packed ref delete, even on 1000s of refs.
> If the goal is to optimize for deletion path, then that is
> true. My point is that we do not want to optimize for deletion
> path at the expense of more costly lookup path.
Absolutely. I figure I do ref lookups at least 3x the number of ref
deletes I perform. And that's just thinking about the sequence of
commands I commonly perform against my "throw away" branches which
live for at most 10 minutes, let alone my longer lived branches
that hang around for weeks.
--
Shawn.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/2] Delete ref $frotz by moving ref file to "deleted-$frotz~ref".
2006-10-17 4:26 ` Christian Couder
2006-10-17 4:41 ` Junio C Hamano
@ 2006-10-17 9:24 ` Jakub Narebski
1 sibling, 0 replies; 6+ messages in thread
From: Jakub Narebski @ 2006-10-17 9:24 UTC (permalink / raw)
To: git
Christian Couder wrote:
>> Ref deletion is an operation that happens far far rarer than
>> updates and lookups, and I deliberately chose to optimize for
>> the latter.
>
> I think that the ref deletion usage depends on the policy of people using
> git, and there may be people that delete a ref very often.
>
> For example, when git becomes a major SCM, there may be people working on
> big projects that want to create a new branch for each new bug and then
> delete the branch when the code on the bug branch has been integrated into
> a new release and the bug is closed.
So do not pack branches then, pack only tags.
--
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2006-10-17 9:24 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-14 13:39 [PATCH 1/2] Delete ref $frotz by moving ref file to "deleted-$frotz~ref" Christian Couder
2006-10-14 18:47 ` Junio C Hamano
2006-10-17 4:26 ` Christian Couder
2006-10-17 4:41 ` Junio C Hamano
2006-10-17 5:07 ` Shawn Pearce
2006-10-17 9:24 ` Jakub Narebski
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).