From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
"Jens Lehmann" <Jens.Lehmann@web.de>,
"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v2 1/8] sha1_file: allow to select pack origin when looking up an object
Date: Tue, 30 Apr 2013 10:42:45 +0700 [thread overview]
Message-ID: <1367293372-1958-2-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1367293372-1958-1-git-send-email-pclouds@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
cache.h | 29 +++++++++--
fast-import.c | 2 +-
pack-check.c | 2 +-
sha1_file.c | 165 +++++++++++++++++++++++++++++++++++++++-------------------
streaming.c | 4 +-
5 files changed, 141 insertions(+), 61 deletions(-)
diff --git a/cache.h b/cache.h
index 94ca1ac..bed403a 100644
--- a/cache.h
+++ b/cache.h
@@ -744,12 +744,33 @@ char *strip_path_suffix(const char *path, const char *suffix);
int daemon_avoid_alias(const char *path);
int offset_1st_component(const char *path);
+/*
+ * odb origin bit mask:
+ * - 8 low bits are for requested objects
+ * - 8 high bits are for their base objects
+ */
+#define ODB_LOCAL 1
+#define ODB_ALT 2
+#define ODB_CACHED 4
+/*
+ * This flag is used to track if the origin selection is from
+ * "odb_default" below. Not a real source, not to be passed to any
+ * function cals explicitly.
+ */
+#define ODB_DEFAULT 8
+
+extern unsigned int odb_default;
+
/* object replacement */
#define READ_SHA1_FILE_REPLACE 1
-extern void *read_sha1_file_extended(const unsigned char *sha1, enum object_type *type, unsigned long *size, unsigned flag);
+extern void *read_sha1_file_extended(const unsigned char *sha1,
+ unsigned int origin,
+ enum object_type *type,
+ unsigned long *size,
+ unsigned flag);
static inline void *read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size)
{
- return read_sha1_file_extended(sha1, type, size, READ_SHA1_FILE_REPLACE);
+ return read_sha1_file_extended(sha1, odb_default, type, size, READ_SHA1_FILE_REPLACE);
}
extern const unsigned char *do_lookup_replace_object(const unsigned char *sha1);
static inline const unsigned char *lookup_replace_object(const unsigned char *sha1)
@@ -765,7 +786,7 @@ extern int hash_sha1_file(const void *buf, unsigned long len, const char *type,
extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1);
extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *);
extern int force_object_loose(const unsigned char *sha1, time_t mtime);
-extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size);
+extern void *map_sha1_file(const unsigned char *sha1, unsigned int, unsigned long *size);
extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz);
extern int parse_sha1_header(const char *hdr, unsigned long *sizep);
@@ -1091,7 +1112,7 @@ extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t
extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t);
extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *);
extern int is_pack_valid(struct packed_git *);
-extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *);
+extern void *unpack_entry(struct packed_git *, unsigned int, off_t, enum object_type *, unsigned long *);
extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep);
extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t);
extern int unpack_object_header(struct packed_git *, struct pack_window **, off_t *, unsigned long *);
diff --git a/fast-import.c b/fast-import.c
index 5f539d7..8542786 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1303,7 +1303,7 @@ static void *gfi_unpack_entry(
*/
p->pack_size = pack_size + 20;
}
- return unpack_entry(p, oe->idx.offset, &type, sizep);
+ return unpack_entry(p, odb_default, oe->idx.offset, &type, sizep);
}
static const char *get_mode(const char *str, uint16_t *modep)
diff --git a/pack-check.c b/pack-check.c
index 63a595c..981dc98 100644
--- a/pack-check.c
+++ b/pack-check.c
@@ -116,7 +116,7 @@ static int verify_packfile(struct packed_git *p,
sha1_to_hex(entries[i].sha1),
p->pack_name, (uintmax_t)offset);
}
- data = unpack_entry(p, entries[i].offset, &type, &size);
+ data = unpack_entry(p, odb_default, entries[i].offset, &type, &size);
if (!data)
err = error("cannot unpack %s from %s at offset %"PRIuMAX"",
sha1_to_hex(entries[i].sha1), p->pack_name,
diff --git a/sha1_file.c b/sha1_file.c
index 99ead7c..d1f44c9 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -37,6 +37,12 @@ static inline uintmax_t sz_fmt(size_t s) { return s; }
const unsigned char null_sha1[20];
/*
+ * clear_delta_base_cache() may be needed if odb_default is changed to
+ * a narrower origin set.
+ */
+unsigned int odb_default = ODB_DEFAULT | ODB_LOCAL | ODB_ALT | ODB_CACHED;
+
+/*
* This is meant to hold a *small* number of objects that you would
* want read_sha1_file() to be able to return, but yet you do not want
* to write them into the object store (e.g. a browse-only
@@ -59,11 +65,15 @@ static struct cached_object empty_tree = {
static struct packed_git *last_found_pack;
-static struct cached_object *find_cached_object(const unsigned char *sha1)
+static struct cached_object *find_cached_object(const unsigned char *sha1,
+ unsigned int origin)
{
int i;
struct cached_object *co = cached_objects;
+ if (!(origin & ODB_CACHED))
+ return NULL;
+
for (i = 0; i < cached_object_nr; i++, co++) {
if (!hashcmp(co->sha1, sha1))
return co;
@@ -404,6 +414,12 @@ void foreach_alt_odb(alt_odb_fn fn, void *cb)
return;
}
+static inline int match_origin(unsigned int origin, int pack_local)
+{
+ return (pack_local && (origin & ODB_LOCAL)) ||
+ (!pack_local && (origin & ODB_ALT));
+}
+
void prepare_alt_odb(void)
{
const char *alt;
@@ -420,28 +436,31 @@ void prepare_alt_odb(void)
read_info_alternates(get_object_directory(), 0);
}
-static int has_loose_object_local(const unsigned char *sha1)
+static int has_loose_object_extended(const unsigned char *sha1,
+ unsigned int origin)
{
- char *name = sha1_file_name(sha1);
- return !access(name, F_OK);
-}
-
-int has_loose_object_nonlocal(const unsigned char *sha1)
-{
- struct alternate_object_database *alt;
- prepare_alt_odb();
- for (alt = alt_odb_list; alt; alt = alt->next) {
- fill_sha1_path(alt->name, sha1);
- if (!access(alt->base, F_OK))
+ if (origin & ODB_LOCAL) {
+ char *name = sha1_file_name(sha1);
+ if (!access(name, F_OK))
return 1;
}
+
+ if (origin & ODB_ALT) {
+ struct alternate_object_database *alt;
+ prepare_alt_odb();
+ for (alt = alt_odb_list; alt; alt = alt->next) {
+ fill_sha1_path(alt->name, sha1);
+ if (!access(alt->base, F_OK))
+ return 1;
+ }
+ }
return 0;
}
-static int has_loose_object(const unsigned char *sha1)
+int has_loose_object_nonlocal(const unsigned char *sha1)
{
- return has_loose_object_local(sha1) ||
- has_loose_object_nonlocal(sha1);
+ unsigned int origin = ODB_ALT;
+ return has_loose_object_extended(sha1, origin);
}
static unsigned int pack_used_ctr;
@@ -1303,16 +1322,21 @@ static int git_open_noatime(const char *name)
}
}
-static int open_sha1_file(const unsigned char *sha1)
+static int open_sha1_file(const unsigned char *sha1, unsigned int origin)
{
int fd;
char *name = sha1_file_name(sha1);
struct alternate_object_database *alt;
- fd = git_open_noatime(name);
- if (fd >= 0)
+ if ((origin & ODB_LOCAL) &&
+ (fd = git_open_noatime(name)) >= 0)
return fd;
+ if (!(origin & ODB_ALT)) {
+ errno = ENOENT;
+ return -1;
+ }
+
prepare_alt_odb();
errno = ENOENT;
for (alt = alt_odb_list; alt; alt = alt->next) {
@@ -1325,12 +1349,14 @@ static int open_sha1_file(const unsigned char *sha1)
return -1;
}
-void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+void *map_sha1_file(const unsigned char *sha1,
+ unsigned int origin,
+ unsigned long *size)
{
void *map;
int fd;
- fd = open_sha1_file(sha1);
+ fd = open_sha1_file(sha1, origin);
map = NULL;
if (fd >= 0) {
struct stat st;
@@ -1872,7 +1898,8 @@ static void clear_delta_base_cache_entry(struct delta_base_cache_entry *ent)
delta_base_cached -= ent->size;
}
-static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
+static void *cache_or_unpack_entry(struct packed_git *p,
+ unsigned int origin, off_t base_offset,
unsigned long *base_size, enum object_type *type, int keep_cache)
{
struct delta_base_cache_entry *ent;
@@ -1880,8 +1907,9 @@ static void *cache_or_unpack_entry(struct packed_git *p, off_t base_offset,
ent = get_delta_base_cache_entry(p, base_offset);
- if (!eq_delta_base_cache_entry(ent, p, base_offset))
- return unpack_entry(p, base_offset, type, base_size);
+ if (!(origin & ODB_DEFAULT) || /* only cache the default case */
+ !eq_delta_base_cache_entry(ent, p, base_offset))
+ return unpack_entry(p, origin, base_offset, type, base_size);
ret = ent->data;
@@ -1951,7 +1979,9 @@ add_delta_base_cache(struct packed_git *p, off_t base_offset,
return ent;
}
-static void *read_object(const unsigned char *sha1, enum object_type *type,
+static void *read_object(const unsigned char *sha1,
+ unsigned int origin,
+ enum object_type *type,
unsigned long *size);
static void write_pack_access_log(struct packed_git *p, off_t obj_offset)
@@ -1981,7 +2011,8 @@ struct unpack_entry_stack_ent {
unsigned long size;
};
-void *unpack_entry(struct packed_git *p, off_t obj_offset,
+void *unpack_entry(struct packed_git *p,
+ unsigned int origin, off_t obj_offset,
enum object_type *final_type, unsigned long *final_size)
{
struct pack_window *w_curs = NULL;
@@ -2093,7 +2124,7 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
data = NULL;
- if (base)
+ if (base && (origin & ODB_DEFAULT))
ent = add_delta_base_cache(p, obj_offset, base, base_size, type);
if (!base) {
@@ -2113,7 +2144,7 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset,
sha1_to_hex(base_sha1), (uintmax_t)obj_offset,
p->pack_name);
mark_bad_packed_object(p, base_sha1);
- base = read_object(base_sha1, &type, &base_size);
+ base = read_object(base_sha1, origin, &type, &base_size);
}
}
@@ -2277,11 +2308,15 @@ int is_pack_valid(struct packed_git *p)
}
static int fill_pack_entry(const unsigned char *sha1,
+ unsigned int origin,
struct pack_entry *e,
struct packed_git *p)
{
off_t offset;
+ if (!match_origin(origin, p->pack_local))
+ return 0;
+
if (p->num_bad_objects) {
unsigned i;
for (i = 0; i < p->num_bad_objects; i++)
@@ -2310,7 +2345,9 @@ static int fill_pack_entry(const unsigned char *sha1,
return 1;
}
-static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
+static int find_pack_entry(const unsigned char *sha1,
+ unsigned int origin,
+ struct pack_entry *e)
{
struct packed_git *p;
@@ -2318,11 +2355,12 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e)
if (!packed_git)
return 0;
- if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack))
+ if (last_found_pack &&
+ fill_pack_entry(sha1, origin, e, last_found_pack))
return 1;
for (p = packed_git; p; p = p->next) {
- if (p == last_found_pack || !fill_pack_entry(sha1, e, p))
+ if (p == last_found_pack || !fill_pack_entry(sha1, origin, e, p))
continue;
last_found_pack = p;
@@ -2352,7 +2390,7 @@ static int sha1_loose_object_info(const unsigned char *sha1, unsigned long *size
git_zstream stream;
char hdr[32];
- map = map_sha1_file(sha1, &mapsize);
+ map = map_sha1_file(sha1, odb_default, &mapsize);
if (!map)
return error("unable to find %s", sha1_to_hex(sha1));
if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0)
@@ -2373,8 +2411,9 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
struct cached_object *co;
struct pack_entry e;
int status, rtype;
+ unsigned int origin = odb_default;
- co = find_cached_object(sha1);
+ co = find_cached_object(sha1, origin);
if (co) {
if (oi->sizep)
*(oi->sizep) = co->size;
@@ -2382,7 +2421,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
return co->type;
}
- if (!find_pack_entry(sha1, &e)) {
+ if (!find_pack_entry(sha1, origin, &e)) {
/* Most likely it's a loose object. */
status = sha1_loose_object_info(sha1, oi->sizep);
if (status >= 0) {
@@ -2392,7 +2431,7 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi)
/* Not a loose object; someone else may have just packed it. */
reprepare_packed_git();
- if (!find_pack_entry(sha1, &e))
+ if (!find_pack_entry(sha1, origin, &e))
return status;
}
@@ -2422,15 +2461,26 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep)
}
static void *read_packed_sha1(const unsigned char *sha1,
+ unsigned int origin,
enum object_type *type, unsigned long *size)
{
struct pack_entry e;
void *data;
- if (!find_pack_entry(sha1, &e))
+ if (!find_pack_entry(sha1, origin, &e))
return NULL;
- data = cache_or_unpack_entry(e.p, e.offset, size, type, 1);
- if (!data) {
+ data = cache_or_unpack_entry(e.p, origin, e.offset, size, type, 1);
+ if (!data &&
+ /*
+ * If a user attempts to read from a single source via
+ * read_sha1_file_extended and the object has base in
+ * another source, it'll come here. It's not as bad as the
+ * corrupted pack we're handling in the below block
+ * because the user asked to do so and must be able to
+ * deal with the consequences. Just return NULL in this
+ * case without marking the sha-1 "bad".
+ */
+ !(origin & ODB_DEFAULT)) {
/*
* We're probably in deep shit, but let's try to fetch
* the required object anyway from another pack or loose.
@@ -2440,7 +2490,7 @@ static void *read_packed_sha1(const unsigned char *sha1,
error("failed to read object %s at offset %"PRIuMAX" from %s",
sha1_to_hex(sha1), (uintmax_t)e.offset, e.p->pack_name);
mark_bad_packed_object(e.p, sha1);
- data = read_object(sha1, type, size);
+ data = read_object(sha1, origin, type, size);
}
return data;
}
@@ -2451,7 +2501,7 @@ int pretend_sha1_file(void *buf, unsigned long len, enum object_type type,
struct cached_object *co;
hash_sha1_file(buf, len, typename(type), sha1);
- if (has_sha1_file(sha1) || find_cached_object(sha1))
+ if (has_sha1_file(sha1) || find_cached_object(sha1, odb_default))
return 0;
if (cached_object_alloc <= cached_object_nr) {
cached_object_alloc = alloc_nr(cached_object_alloc);
@@ -2468,31 +2518,33 @@ int pretend_sha1_file(void *buf, unsigned long len, enum object_type type,
return 0;
}
-static void *read_object(const unsigned char *sha1, enum object_type *type,
+static void *read_object(const unsigned char *sha1,
+ unsigned int origin,
+ enum object_type *type,
unsigned long *size)
{
unsigned long mapsize;
void *map, *buf;
struct cached_object *co;
- co = find_cached_object(sha1);
+ co = find_cached_object(sha1, origin);
if (co) {
*type = co->type;
*size = co->size;
return xmemdupz(co->buf, co->size);
}
- buf = read_packed_sha1(sha1, type, size);
+ buf = read_packed_sha1(sha1, origin, type, size);
if (buf)
return buf;
- map = map_sha1_file(sha1, &mapsize);
+ map = map_sha1_file(sha1, origin, &mapsize);
if (map) {
buf = unpack_sha1_file(map, mapsize, type, size, sha1);
munmap(map, mapsize);
return buf;
}
reprepare_packed_git();
- return read_packed_sha1(sha1, type, size);
+ return read_packed_sha1(sha1, origin, type, size);
}
/*
@@ -2501,6 +2553,7 @@ static void *read_object(const unsigned char *sha1, enum object_type *type,
* messages themselves.
*/
void *read_sha1_file_extended(const unsigned char *sha1,
+ unsigned int origin,
enum object_type *type,
unsigned long *size,
unsigned flag)
@@ -2512,7 +2565,7 @@ void *read_sha1_file_extended(const unsigned char *sha1,
? lookup_replace_object(sha1) : sha1;
errno = 0;
- data = read_object(repl, type, size);
+ data = read_object(repl, origin, type, size);
if (data)
return data;
@@ -2524,7 +2577,7 @@ void *read_sha1_file_extended(const unsigned char *sha1,
die("replacement %s not found for %s",
sha1_to_hex(repl), sha1_to_hex(sha1));
- if (has_loose_object(repl)) {
+ if (has_loose_object_extended(repl, origin)) {
path = sha1_file_name(sha1);
die("loose object %s (stored in %s) is corrupt",
sha1_to_hex(repl), path);
@@ -2809,9 +2862,9 @@ int force_object_loose(const unsigned char *sha1, time_t mtime)
int hdrlen;
int ret;
- if (has_loose_object(sha1))
+ if (has_loose_object_extended(sha1, odb_default))
return 0;
- buf = read_packed_sha1(sha1, &type, &len);
+ buf = read_packed_sha1(sha1, odb_default, &type, &len);
if (!buf)
return error("cannot read sha1_file for %s", sha1_to_hex(sha1));
hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1;
@@ -2832,16 +2885,22 @@ int has_pack_index(const unsigned char *sha1)
int has_sha1_pack(const unsigned char *sha1)
{
struct pack_entry e;
- return find_pack_entry(sha1, &e);
+ return find_pack_entry(sha1, odb_default, &e);
}
-int has_sha1_file(const unsigned char *sha1)
+static int has_sha1_file_extended(const unsigned char *sha1,
+ unsigned origin)
{
struct pack_entry e;
- if (find_pack_entry(sha1, &e))
+ if (find_pack_entry(sha1, origin, &e))
return 1;
- return has_loose_object(sha1);
+ return has_loose_object_extended(sha1, origin);
+}
+
+int has_sha1_file(const unsigned char *sha1)
+{
+ return has_sha1_file_extended(sha1, odb_default);
}
static void check_tree(const void *buf, size_t size)
diff --git a/streaming.c b/streaming.c
index cabcd9d..975002c 100644
--- a/streaming.c
+++ b/streaming.c
@@ -332,7 +332,7 @@ static struct stream_vtbl loose_vtbl = {
static open_method_decl(loose)
{
- st->u.loose.mapped = map_sha1_file(sha1, &st->u.loose.mapsize);
+ st->u.loose.mapped = map_sha1_file(sha1, odb_default, &st->u.loose.mapsize);
if (!st->u.loose.mapped)
return -1;
if (unpack_sha1_header(&st->z,
@@ -483,7 +483,7 @@ static struct stream_vtbl incore_vtbl = {
static open_method_decl(incore)
{
- st->u.incore.buf = read_sha1_file_extended(sha1, type, &st->size, 0);
+ st->u.incore.buf = read_sha1_file_extended(sha1, odb_default, type, &st->size, 0);
st->u.incore.read_ptr = 0;
st->vtbl = &incore_vtbl;
--
1.8.2.83.gc99314b
next prev parent reply other threads:[~2013-04-30 3:42 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-04-30 3:42 [PATCH v2 0/8] Some object db protection when add_submodule_odb is used Nguyễn Thái Ngọc Duy
2013-04-30 3:42 ` Nguyễn Thái Ngọc Duy [this message]
2013-04-30 6:01 ` [PATCH v2 1/8] sha1_file: allow to select pack origin when looking up an object Eric Sunshine
2013-04-30 3:42 ` [PATCH v2 2/8] sha1_file: keep track of alternate source of objects Nguyễn Thái Ngọc Duy
2013-04-30 3:42 ` [PATCH v2 3/8] sha1_file: mark alt object database from add_submodule_odb() Nguyễn Thái Ngọc Duy
2013-04-30 6:03 ` Eric Sunshine
2013-04-30 3:42 ` [PATCH v2 4/8] sha1_file: new object source for submodule's alt object database Nguyễn Thái Ngọc Duy
2013-04-30 6:07 ` Eric Sunshine
2013-04-30 3:42 ` [PATCH v2 5/8] commit.c: refuse to write commits referring to external objects Nguyễn Thái Ngọc Duy
2013-04-30 3:42 ` [PATCH v2 6/8] cache-tree.c: refuse to write trees " Nguyễn Thái Ngọc Duy
2013-04-30 3:42 ` [PATCH v2 7/8] mktag: refuse to write tags " Nguyễn Thái Ngọc Duy
2013-04-30 3:42 ` [PATCH v2 8/8] sha1_file: do write objects even if found in ODB_EXTALT database Nguyễn Thái Ngọc Duy
2013-04-30 8:43 ` [PATCH v2 0/8] Some object db protection when add_submodule_odb is used Thomas Rast
2013-04-30 10:32 ` Duy Nguyen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1367293372-1958-2-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=Jens.Lehmann@web.de \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.