* [PATCH v2 17/26] git-compat-util.h: implement checked size_t to uint32_t conversion
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
In a similar fashion as other checked cast functions in this header
(such as `cast_size_t_to_ulong()` and `cast_size_t_to_int()`), implement
a checked cast function for going from a size_t to a uint32_t value.
This function will be utilized in a future commit which needs to make
such a conversion.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
git-compat-util.h | 9 +++++++++
1 file changed, 9 insertions(+)
diff --git a/git-compat-util.h b/git-compat-util.h
index 3e7a59b5ff..c3b6c2c226 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -1013,6 +1013,15 @@ static inline unsigned long cast_size_t_to_ulong(size_t a)
return (unsigned long)a;
}
+static inline uint32_t cast_size_t_to_uint32_t(size_t a)
+{
+ if (a != (uint32_t)a)
+ die("object too large to read on this platform: %"
+ PRIuMAX" is cut off to %u",
+ (uintmax_t)a, (uint32_t)a);
+ return (uint32_t)a;
+}
+
static inline int cast_size_t_to_int(size_t a)
{
if (a > INT_MAX)
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* [PATCH v2 18/26] midx: implement `midx_preferred_pack()`
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
When performing a binary search over the objects in a MIDX's bitmap
(i.e. in pseudo-pack order), the reader reconstructs the pseudo-pack
ordering using a combination of (a) the preferred pack, (b) the pack's
lexical position in the MIDX based on pack names, and (c) the object
offset within the pack.
In order to perform this binary search, the reader must know the
identity of the preferred pack. This could be stored in the MIDX, but
isn't for historical reasons, mostly because it can easily be inferred
at read-time by looking at the object in the first bit position and
finding out which pack it was selected from in the MIDX, like so:
nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0));
In midx_to_pack_pos() which performs this binary search, we look up the
identity of the preferred pack before each search. This is relatively
quick, since it involves two table-driven lookups (one in the MIDX's
revindex for `pack_pos_to_midx()`, and another in the MIDX's object
table for `nth_midxed_pack_int_id()`).
But since the preferred pack does not change after the MIDX is written,
it is safe to cache this value on the MIDX itself.
Write a helper to do just that, and rewrite all of the existing
call-sites that care about the identity of the preferred pack in terms
of this new helper.
This will prepare us for a subsequent patch where we will need to binary
search through the MIDX's pseudo-pack order multiple times.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
midx.c | 20 ++++++++++++++++++++
midx.h | 2 ++
pack-bitmap.c | 17 +++++++----------
pack-bitmap.h | 1 -
pack-revindex.c | 4 +++-
t/helper/test-read-midx.c | 13 +++++--------
6 files changed, 37 insertions(+), 20 deletions(-)
diff --git a/midx.c b/midx.c
index beaf0c0de4..85e1c2cd12 100644
--- a/midx.c
+++ b/midx.c
@@ -21,6 +21,7 @@
#include "refs.h"
#include "revision.h"
#include "list-objects.h"
+#include "pack-revindex.h"
#define MIDX_SIGNATURE 0x4d494458 /* "MIDX" */
#define MIDX_VERSION 1
@@ -177,6 +178,8 @@ struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local
m->num_packs = get_be32(m->data + MIDX_BYTE_NUM_PACKS);
+ m->preferred_pack_idx = -1;
+
cf = init_chunkfile(NULL);
if (read_table_of_contents(cf, m->data, midx_size,
@@ -460,6 +463,23 @@ int midx_contains_pack(struct multi_pack_index *m, const char *idx_or_pack_name)
return midx_locate_pack(m, idx_or_pack_name, NULL);
}
+int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id)
+{
+ if (m->preferred_pack_idx == -1) {
+ if (load_midx_revindex(m) < 0) {
+ m->preferred_pack_idx = -2;
+ return -1;
+ }
+
+ m->preferred_pack_idx =
+ nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0));
+ } else if (m->preferred_pack_idx == -2)
+ return -1; /* no revindex */
+
+ *pack_int_id = m->preferred_pack_idx;
+ return 0;
+}
+
int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local)
{
struct multi_pack_index *m;
diff --git a/midx.h b/midx.h
index 89c5aa637e..f87a8fff26 100644
--- a/midx.h
+++ b/midx.h
@@ -29,6 +29,7 @@ struct multi_pack_index {
unsigned char num_chunks;
uint32_t num_packs;
uint32_t num_objects;
+ int preferred_pack_idx;
int local;
@@ -74,6 +75,7 @@ int midx_contains_pack(struct multi_pack_index *m,
const char *idx_or_pack_name);
int midx_locate_pack(struct multi_pack_index *m, const char *idx_or_pack_name,
uint32_t *pos);
+int midx_preferred_pack(struct multi_pack_index *m, uint32_t *pack_int_id);
int prepare_multi_pack_index_one(struct repository *r, const char *object_dir, int local);
/*
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 4d5a484678..1682f99596 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -338,7 +338,7 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
struct stat st;
char *bitmap_name = midx_bitmap_filename(midx);
int fd = git_open(bitmap_name);
- uint32_t i;
+ uint32_t i, preferred_pack;
struct packed_git *preferred;
if (fd < 0) {
@@ -393,7 +393,12 @@ static int open_midx_bitmap_1(struct bitmap_index *bitmap_git,
}
}
- preferred = bitmap_git->midx->packs[midx_preferred_pack(bitmap_git)];
+ if (midx_preferred_pack(bitmap_git->midx, &preferred_pack) < 0) {
+ warning(_("could not determine MIDX preferred pack"));
+ goto cleanup;
+ }
+
+ preferred = bitmap_git->midx->packs[preferred_pack];
if (!is_pack_valid(preferred)) {
warning(_("preferred pack (%s) is invalid"),
preferred->pack_name);
@@ -1926,14 +1931,6 @@ static int try_partial_reuse(struct bitmapped_pack *pack,
return 0;
}
-uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git)
-{
- struct multi_pack_index *m = bitmap_git->midx;
- if (!m)
- BUG("midx_preferred_pack: requires non-empty MIDX");
- return nth_midxed_pack_int_id(m, pack_pos_to_midx(bitmap_git->midx, 0));
-}
-
static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git,
struct bitmapped_pack *pack,
struct bitmap *reuse)
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 7a12a2ce81..179b343912 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -77,7 +77,6 @@ int test_bitmap_hashes(struct repository *r);
struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
int filter_provided_objects);
-uint32_t midx_preferred_pack(struct bitmap_index *bitmap_git);
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmapped_pack **packs_out,
size_t *packs_nr_out,
diff --git a/pack-revindex.c b/pack-revindex.c
index acf1dd9786..7dc6c776d5 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -542,7 +542,9 @@ int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos)
* implicitly is preferred (and includes all its objects, since ties are
* broken first by pack identifier).
*/
- key.preferred_pack = nth_midxed_pack_int_id(m, pack_pos_to_midx(m, 0));
+ if (midx_preferred_pack(key.midx, &key.preferred_pack) < 0)
+ return error(_("could not determine preferred pack"));
+
found = bsearch(&key, m->revindex_data, m->num_objects,
sizeof(*m->revindex_data), midx_pack_order_cmp);
diff --git a/t/helper/test-read-midx.c b/t/helper/test-read-midx.c
index e48557aba1..4acae41bb9 100644
--- a/t/helper/test-read-midx.c
+++ b/t/helper/test-read-midx.c
@@ -6,6 +6,7 @@
#include "pack-bitmap.h"
#include "packfile.h"
#include "setup.h"
+#include "gettext.h"
static int read_midx_file(const char *object_dir, int show_objects)
{
@@ -79,7 +80,7 @@ static int read_midx_checksum(const char *object_dir)
static int read_midx_preferred_pack(const char *object_dir)
{
struct multi_pack_index *midx = NULL;
- struct bitmap_index *bitmap = NULL;
+ uint32_t preferred_pack;
setup_git_directory();
@@ -87,16 +88,12 @@ static int read_midx_preferred_pack(const char *object_dir)
if (!midx)
return 1;
- bitmap = prepare_bitmap_git(the_repository);
- if (!bitmap)
- return 1;
- if (!bitmap_is_midx(bitmap)) {
- free_bitmap_index(bitmap);
+ if (midx_preferred_pack(midx, &preferred_pack) < 0) {
+ warning(_("could not determine MIDX preferred pack"));
return 1;
}
- printf("%s\n", midx->pack_names[midx_preferred_pack(bitmap)]);
- free_bitmap_index(bitmap);
+ printf("%s\n", midx->pack_names[preferred_pack]);
return 0;
}
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* [PATCH v2 19/26] pack-revindex: factor out `midx_key_to_pack_pos()` helper
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
The `midx_to_pack_pos()` function implements a binary search over
objects in the MIDX between lexical and pseudo-pack order. It does this
by taking in an index into the lexical order (i.e. the same argument
you'd use for `nth_midxed_object_id()` and similar) and spits out a
position in the pseudo-pack order.
This works for all callers, since they currently all are translating
from lexical order to pseudo-pack order. But future callers may want to
translate a known (offset, pack_id) tuple into an index into the
psuedo-pack order, without knowing where that (offset, pack_id) tuple
appears in lexical order.
Prepare for implementing a function that translates between a (offset,
pack_id) tuple into an index into the psuedo-pack order by extracting a
helper function which does just that, and then reimplementing
midx_to_pack_pos() in terms of it.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-revindex.c | 39 ++++++++++++++++++++++++---------------
1 file changed, 24 insertions(+), 15 deletions(-)
diff --git a/pack-revindex.c b/pack-revindex.c
index 7dc6c776d5..baa4657ed3 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -520,19 +520,12 @@ static int midx_pack_order_cmp(const void *va, const void *vb)
return 0;
}
-int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos)
+static int midx_key_to_pack_pos(struct multi_pack_index *m,
+ struct midx_pack_key *key,
+ uint32_t *pos)
{
- struct midx_pack_key key;
uint32_t *found;
- if (!m->revindex_data)
- BUG("midx_to_pack_pos: reverse index not yet loaded");
- if (m->num_objects <= at)
- BUG("midx_to_pack_pos: out-of-bounds object at %"PRIu32, at);
-
- key.pack = nth_midxed_pack_int_id(m, at);
- key.offset = nth_midxed_offset(m, at);
- key.midx = m;
/*
* The preferred pack sorts first, so determine its identifier by
* looking at the first object in pseudo-pack order.
@@ -542,16 +535,32 @@ int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos)
* implicitly is preferred (and includes all its objects, since ties are
* broken first by pack identifier).
*/
- if (midx_preferred_pack(key.midx, &key.preferred_pack) < 0)
+ if (midx_preferred_pack(key->midx, &key->preferred_pack) < 0)
return error(_("could not determine preferred pack"));
-
- found = bsearch(&key, m->revindex_data, m->num_objects,
- sizeof(*m->revindex_data), midx_pack_order_cmp);
+ found = bsearch(key, m->revindex_data, m->num_objects,
+ sizeof(*m->revindex_data),
+ midx_pack_order_cmp);
if (!found)
- return error("bad offset for revindex");
+ return -1;
*pos = found - m->revindex_data;
return 0;
}
+
+int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos)
+{
+ struct midx_pack_key key;
+
+ if (!m->revindex_data)
+ BUG("midx_to_pack_pos: reverse index not yet loaded");
+ if (m->num_objects <= at)
+ BUG("midx_to_pack_pos: out-of-bounds object at %"PRIu32, at);
+
+ key.pack = nth_midxed_pack_int_id(m, at);
+ key.offset = nth_midxed_offset(m, at);
+ key.midx = m;
+
+ return midx_key_to_pack_pos(m, &key, pos);
+}
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* [PATCH v2 20/26] pack-revindex: implement `midx_pair_to_pack_pos()`
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
Now that we have extracted the `midx_key_to_pack_pos()` function, we can
implement the `midx_pair_to_pack_pos()` function which accepts (pack_id,
offset) tuples and returns an index into the psuedo-pack order.
This will be used in a following commit in order to figure out whether
or not the MIDX chose a given delta's base object from the same pack as
the delta resides in. It will do so by locating the base object's offset
in the pack, and then performing a binary search using the same pack ID
with the base object's offset.
If (and only if) it finds a match (at any position) we can guarantee
that the MIDX selected both halves of the delta/base pair from the same
pack.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-revindex.c | 11 +++++++++++
pack-revindex.h | 3 +++
2 files changed, 14 insertions(+)
diff --git a/pack-revindex.c b/pack-revindex.c
index baa4657ed3..a7624d8be8 100644
--- a/pack-revindex.c
+++ b/pack-revindex.c
@@ -564,3 +564,14 @@ int midx_to_pack_pos(struct multi_pack_index *m, uint32_t at, uint32_t *pos)
return midx_key_to_pack_pos(m, &key, pos);
}
+
+int midx_pair_to_pack_pos(struct multi_pack_index *m, uint32_t pack_int_id,
+ off_t ofs, uint32_t *pos)
+{
+ struct midx_pack_key key = {
+ .pack = pack_int_id,
+ .offset = ofs,
+ .midx = m,
+ };
+ return midx_key_to_pack_pos(m, &key, pos);
+}
diff --git a/pack-revindex.h b/pack-revindex.h
index 6dd47efea1..422c2487ae 100644
--- a/pack-revindex.h
+++ b/pack-revindex.h
@@ -142,4 +142,7 @@ uint32_t pack_pos_to_midx(struct multi_pack_index *m, uint32_t pos);
*/
int midx_to_pack_pos(struct multi_pack_index *midx, uint32_t at, uint32_t *pos);
+int midx_pair_to_pack_pos(struct multi_pack_index *midx, uint32_t pack_id,
+ off_t ofs, uint32_t *pos);
+
#endif
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* [PATCH v2 21/26] pack-bitmap: prepare to mark objects from multiple packs for reuse
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
Now that the pack-objects code is equipped to handle reusing objects
from multiple packs, prepare the pack-bitmap code to mark objects from
multiple packs as reuse candidates.
In order to prepare the pack-bitmap code for this change, remove the
same set of assumptions we unwound in previous commits from the helper
function `reuse_partial_packfile_from_bitmap_1()`, in preparation for it
to be called in a loop over the set of bitmapped packs in a following
commit.
Most importantly, we can no longer assume that the bit position
corresponding to the first object in a given reuse pack candidate is at
the beginning of the bitmap itself.
For the single pack that this assumption is still true for (in MIDX
bitmaps, this is the preferred pack, in single-pack bitmaps it is the
pack the bitmap is tied to), we can still use our whole-words
optimization.
But for all subsequent packs, we can not make use of this optimization,
since it assumes that all delta bases are being sent from the same pack,
which would break if we are sending OFS_DELTAs down to the client. To
understand why, consider two packs, P1 and P2 where:
- P1 has object A which is a delta on base B
- P2 has its own copy of B, in addition to other objects
Suppose that the MIDX which covers P1 and P2 selected its copy of A from
P1, but selected its copy of B from P2. Since A is a delta of B, but the
base was selected from a different pack, sending the bytes corresponding
to A as an OFS_DELTA verbatim from P1 would be incorrect, since we don't
guarantee that B is in the same place relative to A in the generated
pack as in P1.
For now, we detect and reject these cross-pack deltas by searching for
the (pack_id, offset) pair for the delta's base object (using the same
pack_id as the pack containing the delta'd object) in the MIDX. If we
find a match, that means that the MIDX did indeed pick the base object
from the same pack, and we are OK to reuse the delta.
If we don't find a match, however, that means that the base object was
selected from a different pack in the MIDX, and we can let the slower
path handle re-delta'ing our candidate object.
In the future, there are a couple of other things we could do, namely:
- Turn any cross-pack deltas (which are stored as OFS_DELTAs) into
REF_DELTAs. We already do this today when reusing an OFS_DELTA
without `--delta-base-offset` enabled, so it's not a huge stretch to
do the same for cross-pack deltas even when `--delta-base-offset` is
enabled.
This would work, but would obviously result in larger-than-necessary
packs, as we in theory *could* represent these cross-pack deltas by
patching an existing OFS_DELTA. But it's not clear how much that
would matter in practice. I suspect it would have a lot to do with
how you pack your repository in the first place.
- Finally, we could patch OFS_DELTAs across packs in a similar fashion
as we do today for OFS_DELTAs within a single pack on either side of
a gap. This would result in the smallest packs of the three options
here, but implementing this would be more involved.
At minimum, you'd have to keep the reusable chunks list for all
reused packs, not just the one we're currently processing. And you'd
have to ensure that any bases which are a part of cross-pack deltas
appear before the delta. I think this is possible to do, but would
require assembling the reusable chunks list potentially in a
different order than they appear in the source packs.
For now, let's pursue the simplest approach and reject any cross-pack
deltas.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
pack-bitmap.c | 172 +++++++++++++++++++++++++++++++-------------------
1 file changed, 106 insertions(+), 66 deletions(-)
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 1682f99596..242a5908f7 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -1841,8 +1841,10 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
* -1 means "stop trying further objects"; 0 means we may or may not have
* reused, but you can keep feeding bits.
*/
-static int try_partial_reuse(struct bitmapped_pack *pack,
- size_t pos,
+static int try_partial_reuse(struct bitmap_index *bitmap_git,
+ struct bitmapped_pack *pack,
+ size_t bitmap_pos,
+ uint32_t pack_pos,
struct bitmap *reuse,
struct pack_window **w_curs)
{
@@ -1850,33 +1852,10 @@ static int try_partial_reuse(struct bitmapped_pack *pack,
enum object_type type;
unsigned long size;
- /*
- * try_partial_reuse() is called either on (a) objects in the
- * bitmapped pack (in the case of a single-pack bitmap) or (b)
- * objects in the preferred pack of a multi-pack bitmap.
- * Importantly, the latter can pretend as if only a single pack
- * exists because:
- *
- * - The first pack->num_objects bits of a MIDX bitmap are
- * reserved for the preferred pack, and
- *
- * - Ties due to duplicate objects are always resolved in
- * favor of the preferred pack.
- *
- * Therefore we do not need to ever ask the MIDX for its copy of
- * an object by OID, since it will always select it from the
- * preferred pack. Likewise, the selected copy of the base
- * object for any deltas will reside in the same pack.
- *
- * This means that we can reuse pos when looking up the bit in
- * the reuse bitmap, too, since bits corresponding to the
- * preferred pack precede all bits from other packs.
- */
+ if (pack_pos >= pack->p->num_objects)
+ return -1; /* not actually in the pack */
- if (pos >= pack->p->num_objects)
- return -1; /* not actually in the pack or MIDX preferred pack */
-
- offset = delta_obj_offset = pack_pos_to_offset(pack->p, pos);
+ offset = delta_obj_offset = pack_pos_to_offset(pack->p, pack_pos);
type = unpack_object_header(pack->p, w_curs, &offset, &size);
if (type < 0)
return -1; /* broken packfile, punt */
@@ -1884,6 +1863,7 @@ static int try_partial_reuse(struct bitmapped_pack *pack,
if (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA) {
off_t base_offset;
uint32_t base_pos;
+ uint32_t base_bitmap_pos;
/*
* Find the position of the base object so we can look it up
@@ -1897,20 +1877,44 @@ static int try_partial_reuse(struct bitmapped_pack *pack,
delta_obj_offset);
if (!base_offset)
return 0;
- if (offset_to_pack_pos(pack->p, base_offset, &base_pos) < 0)
- return 0;
- /*
- * We assume delta dependencies always point backwards. This
- * lets us do a single pass, and is basically always true
- * due to the way OFS_DELTAs work. You would not typically
- * find REF_DELTA in a bitmapped pack, since we only bitmap
- * packs we write fresh, and OFS_DELTA is the default). But
- * let's double check to make sure the pack wasn't written with
- * odd parameters.
- */
- if (base_pos >= pos)
- return 0;
+ offset_to_pack_pos(pack->p, base_offset, &base_pos);
+
+ if (bitmap_is_midx(bitmap_git)) {
+ /*
+ * Cross-pack deltas are rejected for now, but could
+ * theoretically be supported in the future.
+ *
+ * We would need to ensure that we're sending both
+ * halves of the delta/base pair, regardless of whether
+ * or not the two cross a pack boundary. If they do,
+ * then we must convert the delta to an REF_DELTA to
+ * refer back to the base in the other pack.
+ * */
+ if (midx_pair_to_pack_pos(bitmap_git->midx,
+ pack->pack_int_id,
+ base_offset,
+ &base_bitmap_pos) < 0) {
+ return 0;
+ }
+ } else {
+ if (offset_to_pack_pos(pack->p, base_offset,
+ &base_pos) < 0)
+ return 0;
+ /*
+ * We assume delta dependencies always point backwards.
+ * This lets us do a single pass, and is basically
+ * always true due to the way OFS_DELTAs work. You would
+ * not typically find REF_DELTA in a bitmapped pack,
+ * since we only bitmap packs we write fresh, and
+ * OFS_DELTA is the default). But let's double check to
+ * make sure the pack wasn't written with odd
+ * parameters.
+ */
+ if (base_pos >= pack_pos)
+ return 0;
+ base_bitmap_pos = pack->bitmap_pos + base_pos;
+ }
/*
* And finally, if we're not sending the base as part of our
@@ -1920,14 +1924,14 @@ static int try_partial_reuse(struct bitmapped_pack *pack,
* to REF_DELTA on the fly. Better to just let the normal
* object_entry code path handle it.
*/
- if (!bitmap_get(reuse, pack->bitmap_pos + base_pos))
+ if (!bitmap_get(reuse, base_bitmap_pos))
return 0;
}
/*
* If we got here, then the object is OK to reuse. Mark it.
*/
- bitmap_set(reuse, pack->bitmap_pos + pos);
+ bitmap_set(reuse, bitmap_pos);
return 0;
}
@@ -1937,36 +1941,72 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git
{
struct bitmap *result = bitmap_git->result;
struct pack_window *w_curs = NULL;
- size_t i = 0;
+ size_t pos = pack->bitmap_pos / BITS_IN_EWORD;
- while (i < result->word_alloc && result->words[i] == (eword_t)~0)
- i++;
+ if (!pack->bitmap_pos) {
+ /*
+ * If we're processing the first (in the case of a MIDX, the
+ * preferred pack) or the only (in the case of single-pack
+ * bitmaps) pack, then we can reuse whole words at a time.
+ *
+ * This is because we know that any deltas in this range *must*
+ * have their bases chosen from the same pack, since:
+ *
+ * - In the single pack case, there is no other pack to choose
+ * them from.
+ *
+ * - In the MIDX case, the first pack is the preferred pack, so
+ * all ties are broken in favor of that pack (i.e. the one
+ * we're currently processing). So any duplicate bases will be
+ * resolved in favor of the pack we're processing.
+ */
+ while (pos < result->word_alloc &&
+ pos < pack->bitmap_nr / BITS_IN_EWORD &&
+ result->words[pos] == (eword_t)~0)
+ pos++;
+ memset(reuse->words, 0xFF, pos * sizeof(eword_t));
+ }
- /*
- * Don't mark objects not in the packfile or preferred pack. This bitmap
- * marks objects eligible for reuse, but the pack-reuse code only
- * understands how to reuse a single pack. Since the preferred pack is
- * guaranteed to have all bases for its deltas (in a multi-pack bitmap),
- * we use it instead of another pack. In single-pack bitmaps, the choice
- * is made for us.
- */
- if (i > pack->p->num_objects / BITS_IN_EWORD)
- i = pack->p->num_objects / BITS_IN_EWORD;
-
- memset(reuse->words, 0xFF, i * sizeof(eword_t));
-
- for (; i < result->word_alloc; ++i) {
- eword_t word = result->words[i];
- size_t pos = (i * BITS_IN_EWORD);
+ for (; pos < result->word_alloc; pos++) {
+ eword_t word = result->words[pos];
size_t offset;
- for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
- if ((word >> offset) == 0)
+ for (offset = 0; offset < BITS_IN_EWORD; offset++) {
+ size_t bit_pos;
+ uint32_t pack_pos;
+
+ if (word >> offset == 0)
break;
offset += ewah_bit_ctz64(word >> offset);
- if (try_partial_reuse(pack, pos + offset,
- reuse, &w_curs) < 0) {
+
+ bit_pos = pos * BITS_IN_EWORD + offset;
+ if (bit_pos < pack->bitmap_pos)
+ continue;
+ if (bit_pos >= pack->bitmap_pos + pack->bitmap_nr)
+ goto done;
+
+ if (bitmap_is_midx(bitmap_git)) {
+ uint32_t midx_pos;
+ off_t ofs;
+
+ midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos);
+ ofs = nth_midxed_offset(bitmap_git->midx, midx_pos);
+
+ if (offset_to_pack_pos(pack->p, ofs, &pack_pos) < 0)
+ BUG("could not find object in pack %s "
+ "at offset %"PRIuMAX" in MIDX",
+ pack_basename(pack->p), (uintmax_t)ofs);
+ } else {
+ pack_pos = cast_size_t_to_uint32_t(st_sub(bit_pos, pack->bitmap_pos));
+ if (pack_pos >= pack->p->num_objects)
+ BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")",
+ pack_basename(pack->p), (uintmax_t)pack_pos,
+ pack->p->num_objects);
+ }
+
+ if (try_partial_reuse(bitmap_git, pack, bit_pos,
+ pack_pos, reuse, &w_curs) < 0) {
/*
* try_partial_reuse indicated we couldn't reuse
* any bits, so there is no point in trying more
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* [PATCH v2 22/26] pack-objects: add tracing for various packfile metrics
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
As part of the multi-pack reuse effort, we will want to add some tests
that assert that we reused a certain number of objects from a certain
number of packs.
We could do this by grepping through the stderr output of
`pack-objects`, but doing so would be brittle in case the output format
changed.
Instead, let's use the trace2 mechanism to log various pieces of
information about the generated packfile, which we can then use to
compare against desired values.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
builtin/pack-objects.c | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 7eb035eb7d..7aae9f104b 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -4595,6 +4595,13 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
reuse_packfile_objects,
(uintmax_t)reuse_packfiles_used_nr);
+ trace2_data_intmax("pack-objects", the_repository, "written", written);
+ trace2_data_intmax("pack-objects", the_repository, "written/delta", written_delta);
+ trace2_data_intmax("pack-objects", the_repository, "reused", reused);
+ trace2_data_intmax("pack-objects", the_repository, "reused/delta", reused_delta);
+ trace2_data_intmax("pack-objects", the_repository, "pack-reused", reuse_packfile_objects);
+ trace2_data_intmax("pack-objects", the_repository, "packs-reused", reuse_packfiles_used_nr);
+
cleanup:
clear_packing_data(&to_pack);
list_objects_filter_release(&filter_options);
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* [PATCH v2 23/26] t/test-lib-functions.sh: implement `test_trace2_data` helper
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
Introduce a helper function which looks for a specific (category, key,
value) tuple in the output of a trace2 event stream.
We will use this function in a future patch to ensure that the expected
number of objects are reused from an expected number of packs.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
t/test-lib-functions.sh | 14 ++++++++++++++
1 file changed, 14 insertions(+)
diff --git a/t/test-lib-functions.sh b/t/test-lib-functions.sh
index 9c3cf12b26..93fe819b0a 100644
--- a/t/test-lib-functions.sh
+++ b/t/test-lib-functions.sh
@@ -1874,6 +1874,20 @@ test_region () {
return 0
}
+# Check that the given data fragment was included as part of the
+# trace2-format trace on stdin.
+#
+# test_trace2_data <category> <key> <value>
+#
+# For example, to look for trace2_data_intmax("pack-objects", repo,
+# "reused", N) in an invocation of "git pack-objects", run:
+#
+# GIT_TRACE2_EVENT="$(pwd)/trace.txt" git pack-objects ... &&
+# test_trace2_data pack-objects reused N <trace2.txt
+test_trace2_data () {
+ grep -e '"category":"'"$1"'","key":"'"$2"'","value":"'"$3"'"'
+}
+
# Given a GIT_TRACE2_EVENT log over stdin, writes to stdout a list of URLs
# sent to git-remote-https child processes.
test_remote_https_urls() {
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* [PATCH v2 24/26] pack-objects: allow setting `pack.allowPackReuse` to "single"
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
In e704fc7978 (pack-objects: introduce pack.allowPackReuse, 2019-12-18),
the `pack.allowPackReuse` configuration option was introduced, allowing
users to disable the pack reuse mechanism.
To prepare for debugging multi-pack reuse, allow setting configuration
to "single" in addition to the usual bool-or-int values.
"single" implies the same behavior as "true", "1", "yes", and so on. But
it will complement a new "multi" value (to be introduced in a future
commit). When set to "single", we will only perform pack reuse on a
single pack, regardless of whether or not there are multiple MIDX'd
packs.
This requires no code changes (yet), since we only support single pack
reuse.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
Documentation/config/pack.txt | 2 +-
builtin/pack-objects.c | 19 ++++++++++++++++---
2 files changed, 17 insertions(+), 4 deletions(-)
diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt
index f50df9dbce..fe100d0fb7 100644
--- a/Documentation/config/pack.txt
+++ b/Documentation/config/pack.txt
@@ -28,7 +28,7 @@ all existing objects. You can force recompression by passing the -F option
to linkgit:git-repack[1].
pack.allowPackReuse::
- When true, and when reachability bitmaps are enabled,
+ When true or "single", and when reachability bitmaps are enabled,
pack-objects will try to send parts of the bitmapped packfile
verbatim. This can reduce memory and CPU usage to serve fetches,
but might result in sending a slightly larger pack. Defaults to
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 7aae9f104b..684698f679 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -229,7 +229,10 @@ static struct bitmap *reuse_packfile_bitmap;
static int use_bitmap_index_default = 1;
static int use_bitmap_index = -1;
-static int allow_pack_reuse = 1;
+static enum {
+ NO_PACK_REUSE = 0,
+ SINGLE_PACK_REUSE,
+} allow_pack_reuse = SINGLE_PACK_REUSE;
static enum {
WRITE_BITMAP_FALSE = 0,
WRITE_BITMAP_QUIET,
@@ -3244,7 +3247,17 @@ static int git_pack_config(const char *k, const char *v,
return 0;
}
if (!strcmp(k, "pack.allowpackreuse")) {
- allow_pack_reuse = git_config_bool(k, v);
+ int res = git_parse_maybe_bool_text(v);
+ if (res < 0) {
+ if (!strcasecmp(v, "single"))
+ allow_pack_reuse = SINGLE_PACK_REUSE;
+ else
+ die(_("invalid pack.allowPackReuse value: '%s'"), v);
+ } else if (res) {
+ allow_pack_reuse = SINGLE_PACK_REUSE;
+ } else {
+ allow_pack_reuse = NO_PACK_REUSE;
+ }
return 0;
}
if (!strcmp(k, "pack.threads")) {
@@ -3999,7 +4012,7 @@ static void loosen_unused_packed_objects(void)
*/
static int pack_options_allow_reuse(void)
{
- return allow_pack_reuse &&
+ return allow_pack_reuse != NO_PACK_REUSE &&
pack_to_stdout &&
!ignore_packed_keep_on_disk &&
!ignore_packed_keep_in_core &&
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* [PATCH v2 25/26] pack-bitmap: enable reuse from all bitmapped packs
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
Now that both the pack-bitmap and pack-objects code are prepared to
handle marking and using objects from multiple bitmapped packs for
verbatim reuse, allow marking objects from all bitmapped packs as
eligible for reuse.
Within the `reuse_partial_packfile_from_bitmap()` function, we no longer
only mark the pack whose first object is at bit position zero for reuse,
and instead mark any pack contained in the MIDX as a reuse candidate.
Provide a handful of test cases in a new script (t5332) exercising
interesting behavior for multi-pack reuse to ensure that we performed
all of the previous steps correctly.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
Documentation/config/pack.txt | 16 ++-
builtin/pack-objects.c | 6 +-
pack-bitmap.c | 34 ++++--
pack-bitmap.h | 3 +-
t/t5332-multi-pack-reuse.sh | 203 ++++++++++++++++++++++++++++++++++
5 files changed, 245 insertions(+), 17 deletions(-)
create mode 100755 t/t5332-multi-pack-reuse.sh
diff --git a/Documentation/config/pack.txt b/Documentation/config/pack.txt
index fe100d0fb7..9c630863e6 100644
--- a/Documentation/config/pack.txt
+++ b/Documentation/config/pack.txt
@@ -28,11 +28,17 @@ all existing objects. You can force recompression by passing the -F option
to linkgit:git-repack[1].
pack.allowPackReuse::
- When true or "single", and when reachability bitmaps are enabled,
- pack-objects will try to send parts of the bitmapped packfile
- verbatim. This can reduce memory and CPU usage to serve fetches,
- but might result in sending a slightly larger pack. Defaults to
- true.
+ When true or "single", and when reachability bitmaps are
+ enabled, pack-objects will try to send parts of the bitmapped
+ packfile verbatim. When "multi", and when a multi-pack
+ reachability bitmap is available, pack-objects will try to send
+ parts of all packs in the MIDX.
++
+ If only a single pack bitmap is available, and
+ `pack.allowPackReuse` is set to "multi", reuse parts of just the
+ bitmapped packfile. This can reduce memory and CPU usage to
+ serve fetches, but might result in sending a slightly larger
+ pack. Defaults to true.
pack.island::
An extended regular expression configuring a set of delta
diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 684698f679..5d3c42035b 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -232,6 +232,7 @@ static int use_bitmap_index = -1;
static enum {
NO_PACK_REUSE = 0,
SINGLE_PACK_REUSE,
+ MULTI_PACK_REUSE,
} allow_pack_reuse = SINGLE_PACK_REUSE;
static enum {
WRITE_BITMAP_FALSE = 0,
@@ -3251,6 +3252,8 @@ static int git_pack_config(const char *k, const char *v,
if (res < 0) {
if (!strcasecmp(v, "single"))
allow_pack_reuse = SINGLE_PACK_REUSE;
+ else if (!strcasecmp(v, "multi"))
+ allow_pack_reuse = MULTI_PACK_REUSE;
else
die(_("invalid pack.allowPackReuse value: '%s'"), v);
} else if (res) {
@@ -4029,7 +4032,8 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
reuse_partial_packfile_from_bitmap(bitmap_git,
&reuse_packfiles,
&reuse_packfiles_nr,
- &reuse_packfile_bitmap);
+ &reuse_packfile_bitmap,
+ allow_pack_reuse == MULTI_PACK_REUSE);
if (reuse_packfiles) {
reuse_packfile_objects = bitmap_popcount(reuse_packfile_bitmap);
diff --git a/pack-bitmap.c b/pack-bitmap.c
index 242a5908f7..229a11fb00 100644
--- a/pack-bitmap.c
+++ b/pack-bitmap.c
@@ -2040,7 +2040,8 @@ static int bitmapped_pack_cmp(const void *va, const void *vb)
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmapped_pack **packs_out,
size_t *packs_nr_out,
- struct bitmap **reuse_out)
+ struct bitmap **reuse_out,
+ int multi_pack_reuse)
{
struct repository *r = the_repository;
struct bitmapped_pack *packs = NULL;
@@ -2064,15 +2065,30 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
free(packs);
return;
}
+
if (!pack.bitmap_nr)
- continue; /* no objects from this pack */
- if (pack.bitmap_pos)
- continue; /* not preferred pack */
+ continue;
+
+ if (!multi_pack_reuse && pack.bitmap_pos) {
+ /*
+ * If we're only reusing a single pack, skip
+ * over any packs which are not positioned at
+ * the beginning of the MIDX bitmap.
+ *
+ * This is consistent with the existing
+ * single-pack reuse behavior, which only reuses
+ * parts of the MIDX's preferred pack.
+ */
+ continue;
+ }
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
memcpy(&packs[packs_nr++], &pack, sizeof(pack));
objects_nr += pack.p->num_objects;
+
+ if (!multi_pack_reuse)
+ break;
}
QSORT(packs, packs_nr, bitmapped_pack_cmp);
@@ -2080,10 +2096,10 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
ALLOC_GROW(packs, packs_nr + 1, packs_alloc);
packs[packs_nr].p = bitmap_git->pack;
- packs[packs_nr].bitmap_pos = 0;
packs[packs_nr].bitmap_nr = bitmap_git->pack->num_objects;
+ packs[packs_nr].bitmap_pos = 0;
- objects_nr = packs[packs_nr++].p->num_objects;
+ objects_nr = packs[packs_nr++].bitmap_nr;
}
word_alloc = objects_nr / BITS_IN_EWORD;
@@ -2091,10 +2107,8 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
word_alloc++;
reuse = bitmap_word_alloc(word_alloc);
- if (packs_nr != 1)
- BUG("pack reuse not yet implemented for multiple packs");
-
- reuse_partial_packfile_from_bitmap_1(bitmap_git, packs, reuse);
+ for (i = 0; i < packs_nr; i++)
+ reuse_partial_packfile_from_bitmap_1(bitmap_git, &packs[i], reuse);
if (bitmap_is_empty(reuse)) {
free(packs);
diff --git a/pack-bitmap.h b/pack-bitmap.h
index 179b343912..c7dea13217 100644
--- a/pack-bitmap.h
+++ b/pack-bitmap.h
@@ -80,7 +80,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs,
void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
struct bitmapped_pack **packs_out,
size_t *packs_nr_out,
- struct bitmap **reuse_out);
+ struct bitmap **reuse_out,
+ int multi_pack_reuse);
int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping,
kh_oid_map_t *reused_bitmaps, int show_progress);
void free_bitmap_index(struct bitmap_index *);
diff --git a/t/t5332-multi-pack-reuse.sh b/t/t5332-multi-pack-reuse.sh
new file mode 100755
index 0000000000..2ba788b042
--- /dev/null
+++ b/t/t5332-multi-pack-reuse.sh
@@ -0,0 +1,203 @@
+#!/bin/sh
+
+test_description='pack-objects multi-pack reuse'
+
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-bitmap.sh
+
+objdir=.git/objects
+packdir=$objdir/pack
+
+test_pack_reused () {
+ test_trace2_data pack-objects pack-reused "$1"
+}
+
+test_packs_reused () {
+ test_trace2_data pack-objects packs-reused "$1"
+}
+
+
+# pack_position <object> </path/to/pack.idx
+pack_position () {
+ git show-index >objects &&
+ grep "$1" objects | cut -d" " -f1
+}
+
+test_expect_success 'preferred pack is reused for single-pack reuse' '
+ test_config pack.allowPackReuse single &&
+
+ for i in A B
+ do
+ test_commit "$i" &&
+ git repack -d || return 1
+ done &&
+
+ git multi-pack-index write --bitmap &&
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs --all >/dev/null &&
+
+ test_pack_reused 3 <trace2.txt &&
+ test_packs_reused 1 <trace2.txt
+'
+
+test_expect_success 'enable multi-pack reuse' '
+ git config pack.allowPackReuse multi
+'
+
+test_expect_success 'reuse all objects from subset of bitmapped packs' '
+ test_commit C &&
+ git repack -d &&
+
+ git multi-pack-index write --bitmap &&
+
+ cat >in <<-EOF &&
+ $(git rev-parse C)
+ ^$(git rev-parse A)
+ EOF
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs <in >/dev/null &&
+
+ test_pack_reused 6 <trace2.txt &&
+ test_packs_reused 2 <trace2.txt
+'
+
+test_expect_success 'reuse all objects from all packs' '
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --revs --all >/dev/null &&
+
+ test_pack_reused 9 <trace2.txt &&
+ test_packs_reused 3 <trace2.txt
+'
+
+test_expect_success 'reuse objects from first pack with middle gap' '
+ for i in D E F
+ do
+ test_commit "$i" || return 1
+ done &&
+
+ # Set "pack.window" to zero to ensure that we do not create any
+ # deltas, which could alter the amount of pack reuse we perform
+ # (if, for e.g., we are not sending one or more bases).
+ D="$(git -c pack.window=0 pack-objects --all --unpacked $packdir/pack)" &&
+
+ d_pos="$(pack_position $(git rev-parse D) <$packdir/pack-$D.idx)" &&
+ e_pos="$(pack_position $(git rev-parse E) <$packdir/pack-$D.idx)" &&
+ f_pos="$(pack_position $(git rev-parse F) <$packdir/pack-$D.idx)" &&
+
+ # commits F, E, and D, should appear in that order at the
+ # beginning of the pack
+ test $f_pos -lt $e_pos &&
+ test $e_pos -lt $d_pos &&
+
+ # Ensure that the pack we are constructing sorts ahead of any
+ # other packs in lexical/bitmap order by choosing it as the
+ # preferred pack.
+ git multi-pack-index write --bitmap --preferred-pack="pack-$D.idx" &&
+
+ cat >in <<-EOF &&
+ $(git rev-parse E)
+ ^$(git rev-parse D)
+ EOF
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
+
+ test_pack_reused 3 <trace2.txt &&
+ test_packs_reused 1 <trace2.txt
+'
+
+test_expect_success 'reuse objects from middle pack with middle gap' '
+ rm -fr $packdir/multi-pack-index* &&
+
+ # Ensure that the pack we are constructing sort into any
+ # position *but* the first one, by choosing a different pack as
+ # the preferred one.
+ git multi-pack-index write --bitmap --preferred-pack="pack-$A.idx" &&
+
+ cat >in <<-EOF &&
+ $(git rev-parse E)
+ ^$(git rev-parse D)
+ EOF
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
+
+ test_pack_reused 3 <trace2.txt &&
+ test_packs_reused 1 <trace2.txt
+'
+
+test_expect_success 'omit delta with uninteresting base (same pack)' '
+ git repack -adk &&
+
+ test_seq 32 >f &&
+ git add f &&
+ test_tick &&
+ git commit -m "delta" &&
+ delta="$(git rev-parse HEAD)" &&
+
+ test_seq 64 >f &&
+ test_tick &&
+ git commit -a -m "base" &&
+ base="$(git rev-parse HEAD)" &&
+
+ test_commit other &&
+
+ git repack -d &&
+
+ have_delta "$(git rev-parse $delta:f)" "$(git rev-parse $base:f)" &&
+
+ git multi-pack-index write --bitmap &&
+
+ cat >in <<-EOF &&
+ $(git rev-parse other)
+ ^$base
+ EOF
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --delta-base-offset --revs <in >/dev/null &&
+
+ # We can only reuse the 3 objects corresponding to "other" from
+ # the latest pack.
+ #
+ # This is because even though we want "delta", we do not want
+ # "base", meaning that we have to inflate the delta/base-pair
+ # corresponding to the blob in commit "delta", which bypasses
+ # the pack-reuse mechanism.
+ #
+ # The remaining objects from the other pack are similarly not
+ # reused because their objects are on the uninteresting side of
+ # the query.
+ test_pack_reused 3 <trace2.txt &&
+ test_packs_reused 1 <trace2.txt
+'
+
+test_expect_success 'omit delta from uninteresting base (cross pack)' '
+ cat >in <<-EOF &&
+ $(git rev-parse $base)
+ ^$(git rev-parse $delta)
+ EOF
+
+ P="$(git pack-objects --revs $packdir/pack <in)" &&
+
+ git multi-pack-index write --bitmap --preferred-pack="pack-$P.idx" &&
+
+ : >trace2.txt &&
+ GIT_TRACE2_EVENT="$PWD/trace2.txt" \
+ git pack-objects --stdout --delta-base-offset --all >/dev/null &&
+
+ packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" &&
+ objects_nr="$(git rev-list --count --all --objects)" &&
+
+ test_pack_reused $(($objects_nr - 1)) <trace2.txt &&
+ test_packs_reused $packs_nr <trace2.txt
+'
+
+test_done
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* [PATCH v2 26/26] t/perf: add performance tests for multi-pack reuse
From: Taylor Blau @ 2023-12-14 22:24 UTC (permalink / raw)
To: git; +Cc: Jeff King, Patrick Steinhardt, Junio C Hamano
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
To ensure that we don't regress either the size or runtime performance
of multi-pack reuse, add a performance test to measure both of these.
The test partitions the objects in GIT_TEST_PERF_LARGE_REPO into 1, 10,
and 100 packs, and then tries to perform a "clone" at each stage with
both single- and multi-pack reuse enabled.
Note that the `repack_into_n_chunks()` function in this new test script
differs from the existing `repack_into_n()`. The former partitions the
repository into N equal-sized chunks, while the latter produces N packs
of five commits each (plus their objects), and then another pack with
the remainder.
On git.git, I can produce the following results on my machine:
Test this tree
--------------------------------------------------------------------------------
5332.3: clone for 1-pack scenario (single-pack reuse) 1.57(2.99+0.15)
5332.4: clone size for 1-pack scenario (single-pack reuse) 231.8M
5332.5: clone for 1-pack scenario (multi-pack reuse) 1.79(2.96+0.21)
5332.6: clone size for 1-pack scenario (multi-pack reuse) 231.7M
5332.9: clone for 10-pack scenario (single-pack reuse) 3.89(16.75+0.35)
5332.10: clone size for 10-pack scenario (single-pack reuse) 209.9M
5332.11: clone for 10-pack scenario (multi-pack reuse) 1.56(2.99+0.17)
5332.12: clone size for 10-pack scenario (multi-pack reuse) 224.4M
5332.15: clone for 100-pack scenario (single-pack reuse) 8.24(54.31+0.59)
5332.16: clone size for 100-pack scenario (single-pack reuse) 278.3M
5332.17: clone for 100-pack scenario (multi-pack reuse) 2.13(2.44+0.33)
5332.18: clone size for 100-pack scenario (multi-pack reuse) 357.9M
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
t/perf/p5332-multi-pack-reuse.sh | 81 ++++++++++++++++++++++++++++++++
1 file changed, 81 insertions(+)
create mode 100755 t/perf/p5332-multi-pack-reuse.sh
diff --git a/t/perf/p5332-multi-pack-reuse.sh b/t/perf/p5332-multi-pack-reuse.sh
new file mode 100755
index 0000000000..5c6c575d62
--- /dev/null
+++ b/t/perf/p5332-multi-pack-reuse.sh
@@ -0,0 +1,81 @@
+#!/bin/sh
+
+test_description='tests pack performance with multi-pack reuse'
+
+. ./perf-lib.sh
+. "${TEST_DIRECTORY}/perf/lib-pack.sh"
+
+packdir=.git/objects/pack
+
+test_perf_large_repo
+
+find_pack () {
+ for idx in $packdir/pack-*.idx
+ do
+ if git show-index <$idx | grep -q "$1"
+ then
+ basename $idx
+ fi || return 1
+ done
+}
+
+repack_into_n_chunks () {
+ git repack -adk &&
+
+ test "$1" -eq 1 && return ||
+
+ find $packdir -type f | sort >packs.before &&
+
+ # partition the repository into $1 chunks of consecutive commits, and
+ # then create $1 packs with the objects reachable from each chunk
+ # (excluding any objects reachable from the previous chunks)
+ sz="$(($(git rev-list --count --all) / $1))"
+ for rev in $(git rev-list --all | awk "NR % $sz == 0" | tac)
+ do
+ pack="$(echo "$rev" | git pack-objects --revs \
+ --honor-pack-keep --delta-base-offset $packdir/pack)" &&
+ touch $packdir/pack-$pack.keep || return 1
+ done
+
+ # grab any remaining objects not packed by the previous step(s)
+ git pack-objects --revs --all --honor-pack-keep --delta-base-offset \
+ $packdir/pack &&
+
+ find $packdir -type f | sort >packs.after &&
+
+ # and install the whole thing
+ for f in $(comm -12 packs.before packs.after)
+ do
+ rm -f "$f" || return 1
+ done
+ rm -fr $packdir/*.keep
+}
+
+for nr_packs in 1 10 100
+do
+ test_expect_success "create $nr_packs-pack scenario" '
+ repack_into_n_chunks $nr_packs
+ '
+
+ test_expect_success "setup bitmaps for $nr_packs-pack scenario" '
+ find $packdir -type f -name "*.idx" | sed -e "s/.*\/\(.*\)$/+\1/g" |
+ git multi-pack-index write --stdin-packs --bitmap \
+ --preferred-pack="$(find_pack $(git rev-parse HEAD))"
+ '
+
+ for reuse in single multi
+ do
+ test_perf "clone for $nr_packs-pack scenario ($reuse-pack reuse)" "
+ git for-each-ref --format='%(objectname)' refs/heads refs/tags >in &&
+ git -c pack.allowPackReuse=$reuse pack-objects \
+ --revs --delta-base-offset --use-bitmap-index \
+ --stdout <in >result
+ "
+
+ test_size "clone size for $nr_packs-pack scenario ($reuse-pack reuse)" '
+ wc -c <result
+ '
+ done
+done
+
+test_done
--
2.43.0.102.ga31d690331.dirty
^ permalink raw reply related
* Re: [PATCH v2 00/26] pack-objects: multi-pack verbatim reuse
From: Junio C Hamano @ 2023-12-15 0:06 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Jeff King, Patrick Steinhardt
In-Reply-To: <cover.1702592603.git.me@ttaylorr.com>
I haven't looked into the details yet, but it seems that
t5309-pack-delta-cycles.sh fails under
$ SANITIZE=leak GIT_TEST_PASSING_SANITIZE_LEAK=true make -j16 test
^ permalink raw reply
* Re: [PATCH v2 00/26] pack-objects: multi-pack verbatim reuse
From: Taylor Blau @ 2023-12-15 0:38 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Jeff King, Patrick Steinhardt
In-Reply-To: <xmqqbkasnxba.fsf@gitster.g>
On Thu, Dec 14, 2023 at 04:06:49PM -0800, Junio C Hamano wrote:
> I haven't looked into the details yet, but it seems that
> t5309-pack-delta-cycles.sh fails under
>
> $ SANITIZE=leak GIT_TEST_PASSING_SANITIZE_LEAK=true make -j16 test
Hrm. I tried to reproduce this, but I'm not seeing it. I have:
$ make SANITIZE=leak GIT_TEST_PASSING_SANITIZE_LEAK=true test
[ ... ]
All tests successful.
Files=1001, Tests=14558, 48 wallclock secs ( 8.34 usr 2.10 sys + 391.60 cusr 319.67 csys = 721.71 CPU)
Result: PASS
With this series applied on top of 1a87c842ec (Start the 2.44 cycle,
2023-12-09). The tree I get at the end is d148e16f5cfba405a9823cb68540a8c83004f98f.
Did we apply onto different bases?
Thanks,
Taylor
^ permalink raw reply
* Re: [PATCH v2 00/26] pack-objects: multi-pack verbatim reuse
From: Junio C Hamano @ 2023-12-15 0:40 UTC (permalink / raw)
To: Taylor Blau; +Cc: git, Jeff King, Patrick Steinhardt
In-Reply-To: <xmqqbkasnxba.fsf@gitster.g>
Junio C Hamano <gitster@pobox.com> writes:
> I haven't looked into the details yet, but it seems that
> t5309-pack-delta-cycles.sh fails under
>
> $ SANITIZE=leak GIT_TEST_PASSING_SANITIZE_LEAK=true make -j16 test
Hmph, this seems to be elusive. I tried it again and then it did
not fail this time.
^ permalink raw reply
* Re: [PATCH v2 00/26] pack-objects: multi-pack verbatim reuse
From: Taylor Blau @ 2023-12-15 1:25 UTC (permalink / raw)
To: Junio C Hamano; +Cc: git, Jeff King, Patrick Steinhardt
In-Reply-To: <xmqq7clgnvqv.fsf@gitster.g>
On Thu, Dec 14, 2023 at 04:40:40PM -0800, Junio C Hamano wrote:
> Junio C Hamano <gitster@pobox.com> writes:
>
> > I haven't looked into the details yet, but it seems that
> > t5309-pack-delta-cycles.sh fails under
> >
> > $ SANITIZE=leak GIT_TEST_PASSING_SANITIZE_LEAK=true make -j16 test
>
> Hmph, this seems to be elusive. I tried it again and then it did
> not fail this time.
Indeed, but I was able to reproduce the failure both on my branch and on
'master' under --stress, yielding the following failure in t5309.6:
+ git index-pack --fix-thin --stdin
fatal: REF_DELTA at offset 46 already resolved (duplicate base 01d7713666f4de822776c7622c10f1b07de280dc?)
=================================================================
==3904583==ERROR: LeakSanitizer: detected memory leaks
Direct leak of 32 byte(s) in 1 object(s) allocated from:
#0 0x7fa790d01986 in __interceptor_realloc ../../../../src/libsanitizer/lsan/lsan_interceptors.cpp:98
#1 0x7fa790add769 in __pthread_getattr_np nptl/pthread_getattr_np.c:180
#2 0x7fa790d117c5 in __sanitizer::GetThreadStackTopAndBottom(bool, unsigned long*, unsigned long*) ../../../../src/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:150
#3 0x7fa790d11957 in __sanitizer::GetThreadStackAndTls(bool, unsigned long*, unsigned long*, unsigned long*, unsigned long*) ../../../../src/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp:598
#4 0x7fa790d03fe8 in __lsan::ThreadStart(unsigned int, unsigned long long, __sanitizer::ThreadType) ../../../../src/libsanitizer/lsan/lsan_posix.cpp:51
#5 0x7fa790d013fd in __lsan_thread_start_func ../../../../src/libsanitizer/lsan/lsan_interceptors.cpp:440
#6 0x7fa790adc3eb in start_thread nptl/pthread_create.c:444
#7 0x7fa790b5ca5b in clone3 ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
SUMMARY: LeakSanitizer: 32 byte(s) leaked in 1 allocation(s).
Aborted
The duplicate base thing is a red-herring, and is an expected result of
the test. But the leak is definitely not, and I'm not sure what's going
on here since the frames listed above are in the LSan runtime, not Git.
I'll try to dig into this a bit more, but I'm not quite sure what's
going on yet.
Thanks,
Taylor
^ permalink raw reply
* Re: [PATCH] tests: prefer host Git to verify chainlint self-checks
From: Patrick Steinhardt @ 2023-12-15 5:33 UTC (permalink / raw)
To: Eric Sunshine; +Cc: Junio C Hamano, git
In-Reply-To: <CAPig+cQvcSeSKVE=0kDyNiSztNAgVwhfAzoL5K7uYHEKe=0f_A@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1539 bytes --]
On Thu, Dec 14, 2023 at 01:10:48PM -0500, Eric Sunshine wrote:
> On Thu, Dec 14, 2023 at 11:16 AM Junio C Hamano <gitster@pobox.com> wrote:
> > Eric Sunshine <sunshine@sunshineco.com> writes:
> > > I sent a reply[1] in the other thread explaining why I'm still leaning
> > > toward `sed` to smooth over these minor differences rather than
> > > churning the "expect" files, especially since the minor differences
> > > are not significant to what is actually being tested.
> >
> > If it is just one time bulk conversion under t/chainlint/ to match
> > what the chainlint.pl script produces, with the possibility of
> > similar bulk updates in the future when the script gets updated, I
> > tend to agree with Patrick that getting rid of the fuzzy comparison
> > will be the best way forward.
>
> Okay, that's fine. If we take this approach, though, then it would
> make sense to eliminate _all_ gratuitous postprocessing of the
> "expect" files[1] so that we really are comparing the direct output of
> chainlint.pl with the "expect" files, rather than merely munging the
> inline whitespace of the "expect" files slightly as Patrick's proposed
> patch does[2].
>
> (The only postprocessing of "expect" files which needs to stay is the
> bit which removes the "# LINT:" comments which litter the "expect"
> files explaining to human readers why the linter should insert a
> "???FOO???" annotation at that particular point.)
Okay. I'll send a v3 to also drop the other post-processing steps then.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [PATCH] tests: drop dependency on `git diff` in check-chainlint
From: Patrick Steinhardt @ 2023-12-15 5:36 UTC (permalink / raw)
To: Junio C Hamano; +Cc: Eric Sunshine, git, Eric Sunshine, Eric Sunshine
In-Reply-To: <xmqqo7esohjm.fsf@gitster.g>
[-- Attachment #1: Type: text/plain, Size: 1541 bytes --]
On Thu, Dec 14, 2023 at 08:49:49AM -0800, Junio C Hamano wrote:
> Patrick Steinhardt <ps@pks.im> writes:
> > This strongly reminds me of the thread at [1], where a similar issue was
> > discussed for git-grep(1). Quoting Junio:
> >
> >> I actually do not think these "we are allowing Git tools to be used
> >> on random garbage" is a good idea to begin with X-<. If we invented
> >> something nice for our variant in "git grep" and wish we can use it
> >> outside the repository, contributing the feature to implementations
> >> of "grep" would have been the right way to move forward, instead of
> >> contaminating the codebase with things that are not related to Git.
> >
> > So this might not be the best way to go.
>
> That is not a conclusion I want people to draw.
>
> Like it or not, "git diff --no-index" will be with us to stay, and
> "--no-index" being "we have abused the rest of Git code to implement
> 'diff' that works _outside_ a Git repository---now go and do your
> thing", we would eventually want to correct it, if it is misbehaving
> when a repository it finds is in a shape it does not like, no?
>
> We should have what you quoted in mind as a general principle, and
> think twice when we are tempted to hoard useful features for another
> tool we initially wrote for Git and allow them to be used with the
> "--no-index" option, instead of contributing them to the tool that
> does not know or care "git" repositories (like "diff" and "grep").
Okay, thanks for clarifying!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* [PATCH v3] tests: adjust whitespace in chainlint expectations
From: Patrick Steinhardt @ 2023-12-15 6:04 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Eric Sunshine
In-Reply-To: <4112adbe467c14a8f22a87ea41aa4705f8760cf6.1702380646.git.ps@pks.im>
[-- Attachment #1: Type: text/plain, Size: 19823 bytes --]
The "check-chainlint" target runs automatically when running tests and
performs self-checks to verify that the chainlinter itself produces the
expected output. Originally, the chainlinter was implemented via sed,
but the infrastructure has been rewritten in fb41727b7e (t: retire
unused chainlint.sed, 2022-09-01) to use a Perl script instead.
The rewrite caused some slight whitespace changes in the output that are
ultimately not of much importance. In order to be able to assert that
the actual chainlinter errors match our expectations we thus have to
ignore whitespace characters when diffing them. As the `-w` flag is not
in POSIX we try to use `git diff -w --no-index` before we fall back to
`diff -w -u`.
To accomodate for cases where the host system has no Git installation we
use the locally-compiled version of Git. This can result in problems
though when the Git project's repository is using extensions that the
locally-compiled version of Git doesn't understand. It will refuse to
run and thus cause the checks to fail.
Instead of improving the detection logic, fix our ".expect" files so
that we do not need any post-processing at all anymore. This allows us
to drop the `-w` flag when diffing so that we can always use diff(1)
now.
Note that we leave the post-processing of `chainlint.pl` output intact.
All we do here is to strip leading line numbers that it would otherwise
generate. Having these would cause a rippling effect whenever we add a
new test that sorts into the middle of existing tests and would require
us to renumerate all subsequent lines, which seems rather pointless.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
t/Makefile | 12 ++-------
t/chainlint/blank-line-before-esac.expect | 12 +++------
t/chainlint/block.expect | 6 ++---
t/chainlint/chain-break-background.expect | 4 +--
t/chainlint/chain-break-continue.expect | 1 -
t/chainlint/chain-break-return-exit.expect | 15 +++++------
t/chainlint/chain-break-status.expect | 5 ++--
t/chainlint/chained-block.expect | 1 -
t/chainlint/chained-subshell.expect | 5 ++--
.../command-substitution-subsubshell.expect | 2 +-
t/chainlint/cuddled.expect | 4 ---
t/chainlint/dqstring-line-splice.expect | 4 +--
t/chainlint/dqstring-no-interpolate.expect | 7 ++---
t/chainlint/empty-here-doc.expect | 4 +--
t/chainlint/exclamation.expect | 2 +-
t/chainlint/for-loop-abbreviated.expect | 2 +-
t/chainlint/function.expect | 6 ++---
t/chainlint/here-doc-indent-operator.expect | 2 --
t/chainlint/here-doc.expect | 7 ++---
| 1 -
t/chainlint/loop-detect-failure.expect | 1 -
t/chainlint/loop-detect-status.expect | 20 +++++++-------
t/chainlint/negated-one-liner.expect | 1 -
t/chainlint/nested-here-doc.expect | 3 ---
t/chainlint/nested-loop-detect-failure.expect | 27 +++++++++----------
t/chainlint/not-heredoc.expect | 1 -
t/chainlint/one-liner.expect | 2 --
t/chainlint/subshell-here-doc.expect | 6 ++---
t/chainlint/token-pasting.expect | 18 +++++--------
29 files changed, 65 insertions(+), 116 deletions(-)
diff --git a/t/Makefile b/t/Makefile
index 225aaf78ed..ab620cf732 100644
--- a/t/Makefile
+++ b/t/Makefile
@@ -103,20 +103,12 @@ check-chainlint:
echo "# chainlint: $(CHAINLINTTMP_SQ)/tests" && \
for i in $(CHAINLINTTESTS); do \
echo "# chainlint: $$i" && \
- sed -e '/^[ ]*$$/d' chainlint/$$i.expect; \
+ cat chainlint/$$i.expect; \
done \
} >'$(CHAINLINTTMP_SQ)'/expect && \
$(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \
sed -e 's/^[1-9][0-9]* //;/^[ ]*$$/d' >'$(CHAINLINTTMP_SQ)'/actual && \
- if test -f ../GIT-BUILD-OPTIONS; then \
- . ../GIT-BUILD-OPTIONS; \
- fi && \
- if test -x ../git$$X; then \
- DIFFW="../git$$X --no-pager diff -w --no-index"; \
- else \
- DIFFW="diff -w -u"; \
- fi && \
- $$DIFFW '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
+ diff -u '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \
test-lint-filenames
diff --git a/t/chainlint/blank-line-before-esac.expect b/t/chainlint/blank-line-before-esac.expect
index 48ed4eb124..7400a1df7e 100644
--- a/t/chainlint/blank-line-before-esac.expect
+++ b/t/chainlint/blank-line-before-esac.expect
@@ -1,18 +1,14 @@
-test_done ( ) {
+test_done () {
case "$test_failure" in
- 0 )
+ 0)
test_at_end_hook_
-
exit 0 ;;
-
- * )
+ *)
if test $test_external_has_tap -eq 0
then
say_color error "# failed $test_failure among $msg"
say "1..$test_count"
fi
-
exit 1 ;;
-
- esac
+ esac
}
diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect
index a3bcea492a..569211de08 100644
--- a/t/chainlint/block.expect
+++ b/t/chainlint/block.expect
@@ -10,12 +10,10 @@
} ?!AMP?!
baz
) &&
-
{
- echo a ; ?!AMP?! echo b
+ echo a; ?!AMP?! echo b
} &&
-{ echo a ; ?!AMP?! echo b ; } &&
-
+{ echo a; ?!AMP?! echo b; } &&
{
echo "${var}9" &&
echo "done"
diff --git a/t/chainlint/chain-break-background.expect b/t/chainlint/chain-break-background.expect
index 28f9114f42..20d0bb5333 100644
--- a/t/chainlint/chain-break-background.expect
+++ b/t/chainlint/chain-break-background.expect
@@ -1,9 +1,9 @@
JGIT_DAEMON_PID= &&
git init --bare empty.git &&
-> empty.git/git-daemon-export-ok &&
+>empty.git/git-daemon-export-ok &&
mkfifo jgit_daemon_output &&
{
- jgit daemon --port="$JGIT_DAEMON_PORT" . > jgit_daemon_output &
+ jgit daemon --port="$JGIT_DAEMON_PORT" . >jgit_daemon_output &
JGIT_DAEMON_PID=$!
} &&
test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git
diff --git a/t/chainlint/chain-break-continue.expect b/t/chainlint/chain-break-continue.expect
index 47a3457710..fa00c9fdda 100644
--- a/t/chainlint/chain-break-continue.expect
+++ b/t/chainlint/chain-break-continue.expect
@@ -4,7 +4,6 @@ do
test "$path" = "foobar/non-note.txt" && continue
test "$path" = "deadbeef" && continue
test "$path" = "de/adbeef" && continue
-
if test $(expr length "$path") -ne $hexsz
then
return 1
diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect
index 1732d221c3..bedc0711e4 100644
--- a/t/chainlint/chain-break-return-exit.expect
+++ b/t/chainlint/chain-break-return-exit.expect
@@ -1,18 +1,17 @@
case "$(git ls-files)" in
-one ) echo pass one ;;
-* ) echo bad one ; return 1 ;;
+one) echo pass one ;;
+*) echo bad one; return 1 ;;
esac &&
(
case "$(git ls-files)" in
- two ) echo pass two ;;
- * ) echo bad two ; exit 1 ;;
-esac
+ two) echo pass two ;;
+ *) echo bad two; exit 1 ;;
+ esac
) &&
case "$(git ls-files)" in
-dir/two"$LF"one ) echo pass both ;;
-* ) echo bad ; return 1 ;;
+dir/two"$LF"one) echo pass both ;;
+*) echo bad; return 1 ;;
esac &&
-
for i in 1 2 3 4 ; do
git checkout main -b $i || return $?
test_commit $i $i $i tag$i || return $?
diff --git a/t/chainlint/chain-break-status.expect b/t/chainlint/chain-break-status.expect
index f4bada9463..53b3f42e7a 100644
--- a/t/chainlint/chain-break-status.expect
+++ b/t/chainlint/chain-break-status.expect
@@ -1,7 +1,6 @@
-OUT=$(( ( large_git ; echo $? 1 >& 3 ) | : ) 3 >& 1) &&
+OUT=$( ((large_git; echo $? 1>&3) | :) 3>&1 ) &&
test_match_signal 13 "$OUT" &&
-
-{ test-tool sigchain > actual ; ret=$? ; } &&
+{ test-tool sigchain >actual; ret=$?; } &&
{
test_match_signal 15 "$ret" ||
test "$ret" = 3
diff --git a/t/chainlint/chained-block.expect b/t/chainlint/chained-block.expect
index 574cdceb07..2416e0fc80 100644
--- a/t/chainlint/chained-block.expect
+++ b/t/chainlint/chained-block.expect
@@ -2,7 +2,6 @@ echo nobody home && {
test the doohicky ?!AMP?!
right now
} &&
-
GIT_EXTERNAL_DIFF=echo git diff | {
read path oldfile oldhex oldmode newfile newhex newmode &&
test "z$oh" = "z$oldhex"
diff --git a/t/chainlint/chained-subshell.expect b/t/chainlint/chained-subshell.expect
index af0369d328..2a44845759 100644
--- a/t/chainlint/chained-subshell.expect
+++ b/t/chainlint/chained-subshell.expect
@@ -3,8 +3,7 @@ mkdir sub && (
foo the bar ?!AMP?!
nuff said
) &&
-
-cut "-d " -f actual | ( read s1 s2 s3 &&
+cut "-d " -f actual | (read s1 s2 s3 &&
test -f $s1 ?!AMP?!
test $(cat $s2) = tree2path1 &&
-test $(cat $s3) = tree3path1 )
+test $(cat $s3) = tree3path1)
diff --git a/t/chainlint/command-substitution-subsubshell.expect b/t/chainlint/command-substitution-subsubshell.expect
index ab2f79e845..ec42f2c30c 100644
--- a/t/chainlint/command-substitution-subsubshell.expect
+++ b/t/chainlint/command-substitution-subsubshell.expect
@@ -1,2 +1,2 @@
-OUT=$(( ( large_git 1 >& 3 ) | : ) 3 >& 1) &&
+OUT=$( ((large_git 1>&3) | :) 3>&1 ) &&
test_match_signal 13 "$OUT"
diff --git a/t/chainlint/cuddled.expect b/t/chainlint/cuddled.expect
index c3e0be4047..9558362917 100644
--- a/t/chainlint/cuddled.expect
+++ b/t/chainlint/cuddled.expect
@@ -1,17 +1,13 @@
(cd foo &&
bar
) &&
-
(cd foo ?!AMP?!
bar
) &&
-
(
cd foo &&
bar) &&
-
(cd foo &&
bar) &&
-
(cd foo ?!AMP?!
bar)
diff --git a/t/chainlint/dqstring-line-splice.expect b/t/chainlint/dqstring-line-splice.expect
index bf9ced60d4..f8fa3bc969 100644
--- a/t/chainlint/dqstring-line-splice.expect
+++ b/t/chainlint/dqstring-line-splice.expect
@@ -1,3 +1,3 @@
-echo 'fatal: reword option of --fixup is mutually exclusive with' '--patch/--interactive/--all/--include/--only' > expect &&
-test_must_fail git commit --fixup=reword:HEAD~ $1 2 > actual &&
+echo 'fatal: reword option of --fixup is mutually exclusive with' '--patch/--interactive/--all/--include/--only' >expect &&
+test_must_fail git commit --fixup=reword:HEAD~ $1 2>actual &&
test_cmp expect actual
diff --git a/t/chainlint/dqstring-no-interpolate.expect b/t/chainlint/dqstring-no-interpolate.expect
index 10724987a5..bd60121b47 100644
--- a/t/chainlint/dqstring-no-interpolate.expect
+++ b/t/chainlint/dqstring-no-interpolate.expect
@@ -1,11 +1,8 @@
grep "^ ! [rejected][ ]*$BRANCH -> $BRANCH (non-fast-forward)$" out &&
-
grep "^\.git$" output.txt &&
-
-
(
cd client$version &&
GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. $(cat ../input)
-) > output &&
- cut -d ' ' -f 2 < output | sort > actual &&
+) >output &&
+ cut -d ' ' -f 2 <output | sort >actual &&
test_cmp expect actual
diff --git a/t/chainlint/empty-here-doc.expect b/t/chainlint/empty-here-doc.expect
index e8733c97c6..8507721192 100644
--- a/t/chainlint/empty-here-doc.expect
+++ b/t/chainlint/empty-here-doc.expect
@@ -1,4 +1,4 @@
-git ls-tree $tree path > current &&
-cat > expected <<\EOF &&
+git ls-tree $tree path >current &&
+cat >expected <<\EOF &&
EOF
test_output
diff --git a/t/chainlint/exclamation.expect b/t/chainlint/exclamation.expect
index 2d961a58c6..765a35bb4c 100644
--- a/t/chainlint/exclamation.expect
+++ b/t/chainlint/exclamation.expect
@@ -1,4 +1,4 @@
-if ! condition ; then echo nope ; else yep ; fi &&
+if ! condition; then echo nope; else yep; fi &&
test_prerequisite !MINGW &&
mail uucp!address &&
echo !whatever!
diff --git a/t/chainlint/for-loop-abbreviated.expect b/t/chainlint/for-loop-abbreviated.expect
index a21007a63f..02c0d15cca 100644
--- a/t/chainlint/for-loop-abbreviated.expect
+++ b/t/chainlint/for-loop-abbreviated.expect
@@ -1,5 +1,5 @@
for it
do
- path=$(expr "$it" : ( [^:]*) ) &&
+ path=$(expr "$it" : ([^:]*)) &&
git update-index --add "$path" || exit
done
diff --git a/t/chainlint/function.expect b/t/chainlint/function.expect
index a14388e6b9..826b2ccc95 100644
--- a/t/chainlint/function.expect
+++ b/t/chainlint/function.expect
@@ -1,11 +1,9 @@
-sha1_file ( ) {
+sha1_file() {
echo "$*" | sed "s#..#.git/objects/&/#"
} &&
-
-remove_object ( ) {
+remove_object() {
file=$(sha1_file "$*") &&
test -e "$file" ?!AMP?!
rm -f "$file"
} ?!AMP?!
-
sha1_file arg && remove_object arg
diff --git a/t/chainlint/here-doc-indent-operator.expect b/t/chainlint/here-doc-indent-operator.expect
index f92a7ce999..bbaad024d2 100644
--- a/t/chainlint/here-doc-indent-operator.expect
+++ b/t/chainlint/here-doc-indent-operator.expect
@@ -3,9 +3,7 @@ header: 43475048 1 $(test_oid oid_version) $NUM_CHUNKS 0
num_commits: $1
chunks: oid_fanout oid_lookup commit_metadata generation_data bloom_indexes bloom_data
EOF
-
cat >expect << -EOF ?!AMP?!
this is not indented
-EOF
-
cleanup
diff --git a/t/chainlint/here-doc.expect b/t/chainlint/here-doc.expect
index 1df3f78282..9e612ac8b1 100644
--- a/t/chainlint/here-doc.expect
+++ b/t/chainlint/here-doc.expect
@@ -1,22 +1,19 @@
boodle wobba \
- gorgo snoot \
- wafta snurb <<EOF &&
+ gorgo snoot \
+ wafta snurb <<EOF &&
quoth the raven,
nevermore...
EOF
-
cat <<-Arbitrary_Tag_42 >foo &&
snoz
boz
woz
Arbitrary_Tag_42
-
cat <<"zump" >boo &&
snoz
boz
woz
zump
-
horticulture <<\EOF
gomez
morticia
--git a/t/chainlint/inline-comment.expect b/t/chainlint/inline-comment.expect
index 6bad218530..c7fc411f91 100644
--- a/t/chainlint/inline-comment.expect
+++ b/t/chainlint/inline-comment.expect
@@ -3,6 +3,5 @@
barfoo ?!AMP?! # wrong position for &&
flibble "not a # comment"
) &&
-
(cd foo &&
flibble "not a # comment")
diff --git a/t/chainlint/loop-detect-failure.expect b/t/chainlint/loop-detect-failure.expect
index a66025c39d..7a1a1ff0bf 100644
--- a/t/chainlint/loop-detect-failure.expect
+++ b/t/chainlint/loop-detect-failure.expect
@@ -5,7 +5,6 @@ do
git -C r1 add file.$n &&
git -C r1 commit -m "$n" || return 1
done &&
-
git init r2 &&
for n in 1000 10000
do
diff --git a/t/chainlint/loop-detect-status.expect b/t/chainlint/loop-detect-status.expect
index 24da9e86d5..7ce3a34806 100644
--- a/t/chainlint/loop-detect-status.expect
+++ b/t/chainlint/loop-detect-status.expect
@@ -1,18 +1,18 @@
-( while test $i -le $blobcount
-do
- printf "Generating blob $i/$blobcount\r" >& 2 &&
+(while test $i -le $blobcount
+ do
+ printf "Generating blob $i/$blobcount\r" >&2 &&
printf "blob\nmark :$i\ndata $blobsize\n" &&
#test-tool genrandom $i $blobsize &&
printf "%-${blobsize}s" $i &&
echo "M 100644 :$i $i" >> commit &&
i=$(($i+1)) ||
echo $? > exit-status
-done &&
-echo "commit refs/heads/main" &&
-echo "author A U Thor <author@email.com> 123456789 +0000" &&
-echo "committer C O Mitter <committer@email.com> 123456789 +0000" &&
-echo "data 5" &&
-echo ">2gb" &&
-cat commit ) |
+ done &&
+ echo "commit refs/heads/main" &&
+ echo "author A U Thor <author@email.com> 123456789 +0000" &&
+ echo "committer C O Mitter <committer@email.com> 123456789 +0000" &&
+ echo "data 5" &&
+ echo ">2gb" &&
+ cat commit) |
git fast-import --big-file-threshold=2 &&
test ! -f exit-status
diff --git a/t/chainlint/negated-one-liner.expect b/t/chainlint/negated-one-liner.expect
index ad4c2d949e..7ae60eb770 100644
--- a/t/chainlint/negated-one-liner.expect
+++ b/t/chainlint/negated-one-liner.expect
@@ -1,5 +1,4 @@
! (foo && bar) &&
! (foo && bar) >baz &&
-
! (foo; ?!AMP?! bar) &&
! (foo; ?!AMP?! bar) >baz
diff --git a/t/chainlint/nested-here-doc.expect b/t/chainlint/nested-here-doc.expect
index 29b3832a98..93eeded9f4 100644
--- a/t/chainlint/nested-here-doc.expect
+++ b/t/chainlint/nested-here-doc.expect
@@ -6,7 +6,6 @@ fub <<EOF
EOF
formp
ARBITRARY
-
(
cat <<-\INPUT_END &&
fish are mice
@@ -17,7 +16,6 @@ ARBITRARY
EOF
toink
INPUT_END
-
cat <<-\EOT ?!AMP?!
text goes here
data <<EOF
@@ -25,6 +23,5 @@ ARBITRARY
EOF
more test here
EOT
-
foobar
)
diff --git a/t/chainlint/nested-loop-detect-failure.expect b/t/chainlint/nested-loop-detect-failure.expect
index 4793a0e8e1..211fdb7479 100644
--- a/t/chainlint/nested-loop-detect-failure.expect
+++ b/t/chainlint/nested-loop-detect-failure.expect
@@ -1,31 +1,28 @@
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
do
- for j in 0 1 2 3 4 5 6 7 8 9 ;
+ for j in 0 1 2 3 4 5 6 7 8 9;
do
- echo "$i$j" > "path$i$j" ?!LOOP?!
+ echo "$i$j" >"path$i$j" ?!LOOP?!
done ?!LOOP?!
done &&
-
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
do
- for j in 0 1 2 3 4 5 6 7 8 9 ;
+ for j in 0 1 2 3 4 5 6 7 8 9;
do
- echo "$i$j" > "path$i$j" || return 1
+ echo "$i$j" >"path$i$j" || return 1
done
done &&
-
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
do
- for j in 0 1 2 3 4 5 6 7 8 9 ;
+ for j in 0 1 2 3 4 5 6 7 8 9;
do
- echo "$i$j" > "path$i$j" ?!LOOP?!
+ echo "$i$j" >"path$i$j" ?!LOOP?!
done || return 1
done &&
-
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
do
- for j in 0 1 2 3 4 5 6 7 8 9 ;
+ for j in 0 1 2 3 4 5 6 7 8 9;
do
- echo "$i$j" > "path$i$j" || return 1
+ echo "$i$j" >"path$i$j" || return 1
done || return 1
done
diff --git a/t/chainlint/not-heredoc.expect b/t/chainlint/not-heredoc.expect
index 2e9bb135fe..47311f4c2d 100644
--- a/t/chainlint/not-heredoc.expect
+++ b/t/chainlint/not-heredoc.expect
@@ -3,7 +3,6 @@ echo ourside &&
echo "=======" &&
echo theirside &&
echo ">>>>>>> theirs" &&
-
(
echo "<<<<<<< ours" &&
echo ourside &&
diff --git a/t/chainlint/one-liner.expect b/t/chainlint/one-liner.expect
index 57a7a444c1..cdb45231f1 100644
--- a/t/chainlint/one-liner.expect
+++ b/t/chainlint/one-liner.expect
@@ -1,9 +1,7 @@
(foo && bar) &&
(foo && bar) |
(foo && bar) >baz &&
-
(foo; ?!AMP?! bar) &&
(foo; ?!AMP?! bar) |
(foo; ?!AMP?! bar) >baz &&
-
(foo "bar; baz")
diff --git a/t/chainlint/subshell-here-doc.expect b/t/chainlint/subshell-here-doc.expect
index 52789278d1..fa7cb0288a 100644
--- a/t/chainlint/subshell-here-doc.expect
+++ b/t/chainlint/subshell-here-doc.expect
@@ -1,15 +1,13 @@
(
echo wobba \
- gorgo snoot \
- wafta snurb <<-EOF &&
+ gorgo snoot \
+ wafta snurb <<-EOF &&
quoth the raven,
nevermore...
EOF
-
cat <<EOF >bip ?!AMP?!
fish fly high
EOF
-
echo <<-\EOF >bop
gomez
morticia
diff --git a/t/chainlint/token-pasting.expect b/t/chainlint/token-pasting.expect
index 342360bcd0..cda7d54037 100644
--- a/t/chainlint/token-pasting.expect
+++ b/t/chainlint/token-pasting.expect
@@ -1,26 +1,22 @@
git config filter.rot13.smudge ./rot13.sh &&
git config filter.rot13.clean ./rot13.sh &&
-
{
echo "*.t filter=rot13" ?!AMP?!
echo "*.i ident"
-} > .gitattributes &&
-
+} >.gitattributes &&
{
echo a b c d e f g h i j k l m ?!AMP?!
echo n o p q r s t u v w x y z ?!AMP?!
echo '$Id$'
-} > test &&
-cat test > test.t &&
-cat test > test.o &&
-cat test > test.i &&
+} >test &&
+cat test >test.t &&
+cat test >test.o &&
+cat test >test.i &&
git add test test.t test.i &&
rm -f test test.t test.i &&
git checkout -- test test.t test.i &&
-
-echo "content-test2" > test2.o &&
-echo "content-test3 - filename with special characters" > "test3 'sq',$x=.o" ?!AMP?!
-
+echo "content-test2" >test2.o &&
+echo "content-test3 - filename with special characters" >"test3 'sq',$x=.o" ?!AMP?!
downstream_url_for_sed=$(
printf "%sn" "$downstream_url" |
sed -e 's/\/\\/g' -e 's/[[/.*^$]/\&/g'
--
2.43.GIT
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply related
* Re: [PATCH v3] tests: adjust whitespace in chainlint expectations
From: Eric Sunshine @ 2023-12-15 6:24 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Junio C Hamano
In-Reply-To: <8c3e1cb5eae13210070cc14f5f2a3f8c0dfc39c3.1702620230.git.ps@pks.im>
On Fri, Dec 15, 2023 at 1:04 AM Patrick Steinhardt <ps@pks.im> wrote:
> [...]
> Instead of improving the detection logic, fix our ".expect" files so
> that we do not need any post-processing at all anymore. This allows us
> to drop the `-w` flag when diffing so that we can always use diff(1)
> now.
>
> Note that we leave the post-processing of `chainlint.pl` output intact.
> All we do here is to strip leading line numbers that it would otherwise
> generate.
Hmm, okay, but... (see below)
> Having these would cause a rippling effect whenever we add a
> new test that sorts into the middle of existing tests and would require
> us to renumerate all subsequent lines, which seems rather pointless.
Just an aside, not strictly relevant at this time: Ævar has proposed
that check-chainlint should not be creating conglomerate "test",
"expect", and "actual" files, but should instead let `make` run
chainlint.pl separately on each chainlint self-test file, thus
benefiting from `make`'s innate parallelism rather than baking
parallelism into chainlint.pl itself. More importantly, `make`'s
dependency tracking would ensure that a chainlint self-test file only
gets rechecked if its timestamp changes. That differs from the current
situation in which _all_ of the chainlint self-test files are checked
on _every_ `make test` which is wasteful if none of them have changed.
Anyhow, with his proposed approach, there wouldn't be cascading line
number changes just because a new self-test file was added.
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> diff --git a/t/Makefile b/t/Makefile
> @@ -103,20 +103,12 @@ check-chainlint:
> $(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \
> sed -e 's/^[1-9][0-9]* //;/^[ ]*$$/d' >'$(CHAINLINTTMP_SQ)'/actual && \
The commit message claims that this is only stripping the line numbers
which prefix each emitted line, but the `/^[ ]*$$/d` bit is also
deleting blank lines from the output of chainlint.pl. Thus, this ought
to be:
sed -e 's/^[1-9][0-9]* //' >'$(CHAINLINTTMP_SQ)'/actual && \
^ permalink raw reply
* Re: [PATCH v3] tests: adjust whitespace in chainlint expectations
From: Patrick Steinhardt @ 2023-12-15 6:29 UTC (permalink / raw)
To: Eric Sunshine; +Cc: git, Junio C Hamano
In-Reply-To: <CAPig+cQozi+aiTc5Bve4OHrfuSRGUCSkKmhoYtkGTmn64Ps-rw@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 2603 bytes --]
On Fri, Dec 15, 2023 at 01:24:20AM -0500, Eric Sunshine wrote:
> On Fri, Dec 15, 2023 at 1:04 AM Patrick Steinhardt <ps@pks.im> wrote:
> > [...]
> > Instead of improving the detection logic, fix our ".expect" files so
> > that we do not need any post-processing at all anymore. This allows us
> > to drop the `-w` flag when diffing so that we can always use diff(1)
> > now.
> >
> > Note that we leave the post-processing of `chainlint.pl` output intact.
> > All we do here is to strip leading line numbers that it would otherwise
> > generate.
>
> Hmm, okay, but... (see below)
>
> > Having these would cause a rippling effect whenever we add a
> > new test that sorts into the middle of existing tests and would require
> > us to renumerate all subsequent lines, which seems rather pointless.
>
> Just an aside, not strictly relevant at this time: Ævar has proposed
> that check-chainlint should not be creating conglomerate "test",
> "expect", and "actual" files, but should instead let `make` run
> chainlint.pl separately on each chainlint self-test file, thus
> benefiting from `make`'s innate parallelism rather than baking
> parallelism into chainlint.pl itself. More importantly, `make`'s
> dependency tracking would ensure that a chainlint self-test file only
> gets rechecked if its timestamp changes. That differs from the current
> situation in which _all_ of the chainlint self-test files are checked
> on _every_ `make test` which is wasteful if none of them have changed.
> Anyhow, with his proposed approach, there wouldn't be cascading line
> number changes just because a new self-test file was added.
I was indeed also thinking along this way and would tend to agree. I
punted on it as I honestly only really care for fixing the immediate
issue that the post-processing causes for me.
Are you fine with deferring this bigger refactoring?
> > Signed-off-by: Patrick Steinhardt <ps@pks.im>
> > ---
> > diff --git a/t/Makefile b/t/Makefile
> > @@ -103,20 +103,12 @@ check-chainlint:
> > $(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \
> > sed -e 's/^[1-9][0-9]* //;/^[ ]*$$/d' >'$(CHAINLINTTMP_SQ)'/actual && \
>
> The commit message claims that this is only stripping the line numbers
> which prefix each emitted line, but the `/^[ ]*$$/d` bit is also
> deleting blank lines from the output of chainlint.pl. Thus, this ought
> to be:
>
> sed -e 's/^[1-9][0-9]* //' >'$(CHAINLINTTMP_SQ)'/actual && \
Gah, you're right, I missed the second part. Will fix in another round.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [PATCH v3] tests: adjust whitespace in chainlint expectations
From: Eric Sunshine @ 2023-12-15 6:40 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Junio C Hamano
In-Reply-To: <ZXvyL2wtoTIt6OVc@tanuki>
On Fri, Dec 15, 2023 at 1:29 AM Patrick Steinhardt <ps@pks.im> wrote:
> On Fri, Dec 15, 2023 at 01:24:20AM -0500, Eric Sunshine wrote:
> > Just an aside, not strictly relevant at this time: Ævar has proposed
> > that check-chainlint should not be creating conglomerate "test",
> > "expect", and "actual" files, but should instead let `make` run
> > chainlint.pl separately on each chainlint self-test file, thus
> > benefiting from `make`'s innate parallelism rather than baking
> > parallelism into chainlint.pl itself. More importantly, `make`'s
> > dependency tracking would ensure that a chainlint self-test file only
> > gets rechecked if its timestamp changes. That differs from the current
> > situation in which _all_ of the chainlint self-test files are checked
> > on _every_ `make test` which is wasteful if none of them have changed.
> > Anyhow, with his proposed approach, there wouldn't be cascading line
> > number changes just because a new self-test file was added.
>
> I was indeed also thinking along this way and would tend to agree. I
> punted on it as I honestly only really care for fixing the immediate
> issue that the post-processing causes for me.
>
> Are you fine with deferring this bigger refactoring?
Oh, yes, of course. Please don't read my aside-comment as a suggestion
that you should tackle such a change or that there is any urgent need
to change how this all works. The currently proposed simple
solution(s) to allow you to get past this issue are perfectly
acceptable.
(As a further aside, just for completeness since I already mentioned
part of his plan, Ævar also really intended that the test scripts
themselves, "t/t????-*.sh", should be run individually through
chainlint.pl by the Makefile rather than sending all of them through
it in one go, thus once again taking advantage of `make`'s innate
parallelism, and only checking for &&-chain breakage if a test
script's timestamp has actually changed rather than checking all test
scripts unconditionally on every `make test`[1,2,3].)
[1]: https://lore.kernel.org/git/220901.86bkrzjm6e.gmgdl@evledraar.gmail.com/
[2]: https://lore.kernel.org/git/221122.86cz9fbyln.gmgdl@evledraar.gmail.com/
[3]: https://github.com/avar/git/commit/ff8b4a12b30ac5ca521a64e74b0fd968ab2d4585
^ permalink raw reply
* [PATCH v4] tests: adjust whitespace in chainlint expectations
From: Patrick Steinhardt @ 2023-12-15 6:42 UTC (permalink / raw)
To: git; +Cc: Junio C Hamano, Eric Sunshine
In-Reply-To: <4112adbe467c14a8f22a87ea41aa4705f8760cf6.1702380646.git.ps@pks.im>
[-- Attachment #1: Type: text/plain, Size: 17894 bytes --]
The "check-chainlint" target runs automatically when running tests and
performs self-checks to verify that the chainlinter itself produces the
expected output. Originally, the chainlinter was implemented via sed,
but the infrastructure has been rewritten in fb41727b7e (t: retire
unused chainlint.sed, 2022-09-01) to use a Perl script instead.
The rewrite caused some slight whitespace changes in the output that are
ultimately not of much importance. In order to be able to assert that
the actual chainlinter errors match our expectations we thus have to
ignore whitespace characters when diffing them. As the `-w` flag is not
in POSIX we try to use `git diff -w --no-index` before we fall back to
`diff -w -u`.
To accomodate for cases where the host system has no Git installation we
use the locally-compiled version of Git. This can result in problems
though when the Git project's repository is using extensions that the
locally-compiled version of Git doesn't understand. It will refuse to
run and thus cause the checks to fail.
Instead of improving the detection logic, fix our ".expect" files so
that we do not need any post-processing at all anymore. This allows us
to drop the `-w` flag when diffing so that we can always use diff(1)
now.
Note that we keep some of the post-processing of `chainlint.pl` output
intact to strip leading line numbers generated by the script. Having
these would cause a rippling effect whenever we add a new test that
sorts into the middle of existing tests and would require us to
renumerate all subsequent lines, which seems rather pointless.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
t/Makefile | 14 +++--------
t/chainlint/blank-line-before-esac.expect | 8 +++----
t/chainlint/blank-line.expect | 4 ++++
t/chainlint/block.expect | 4 ++--
t/chainlint/chain-break-background.expect | 4 ++--
t/chainlint/chain-break-return-exit.expect | 14 +++++------
t/chainlint/chain-break-status.expect | 4 ++--
t/chainlint/chained-subshell.expect | 4 ++--
.../command-substitution-subsubshell.expect | 2 +-
t/chainlint/dqstring-line-splice.expect | 6 +++--
t/chainlint/dqstring-no-interpolate.expect | 5 ++--
t/chainlint/empty-here-doc.expect | 4 ++--
t/chainlint/exclamation.expect | 2 +-
t/chainlint/for-loop-abbreviated.expect | 2 +-
t/chainlint/for-loop.expect | 1 +
t/chainlint/function.expect | 4 ++--
t/chainlint/here-doc.expect | 4 ++--
t/chainlint/loop-detect-status.expect | 20 ++++++++--------
t/chainlint/nested-cuddled-subshell.expect | 6 +++++
t/chainlint/nested-loop-detect-failure.expect | 24 +++++++++----------
t/chainlint/nested-subshell.expect | 1 +
t/chainlint/pipe.expect | 2 ++
t/chainlint/subshell-here-doc.expect | 4 ++--
t/chainlint/subshell-one-liner.expect | 5 ++++
t/chainlint/t7900-subtree.expect | 1 +
t/chainlint/token-pasting.expect | 14 +++++------
t/chainlint/while-loop.expect | 1 +
27 files changed, 90 insertions(+), 74 deletions(-)
diff --git a/t/Makefile b/t/Makefile
index 225aaf78ed..b7a6fefe28 100644
--- a/t/Makefile
+++ b/t/Makefile
@@ -103,20 +103,12 @@ check-chainlint:
echo "# chainlint: $(CHAINLINTTMP_SQ)/tests" && \
for i in $(CHAINLINTTESTS); do \
echo "# chainlint: $$i" && \
- sed -e '/^[ ]*$$/d' chainlint/$$i.expect; \
+ cat chainlint/$$i.expect; \
done \
} >'$(CHAINLINTTMP_SQ)'/expect && \
$(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \
- sed -e 's/^[1-9][0-9]* //;/^[ ]*$$/d' >'$(CHAINLINTTMP_SQ)'/actual && \
- if test -f ../GIT-BUILD-OPTIONS; then \
- . ../GIT-BUILD-OPTIONS; \
- fi && \
- if test -x ../git$$X; then \
- DIFFW="../git$$X --no-pager diff -w --no-index"; \
- else \
- DIFFW="diff -w -u"; \
- fi && \
- $$DIFFW '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
+ sed -e 's/^[1-9][0-9]* //' >'$(CHAINLINTTMP_SQ)'/actual && \
+ diff -u '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
test-lint: test-lint-duplicates test-lint-executable test-lint-shell-syntax \
test-lint-filenames
diff --git a/t/chainlint/blank-line-before-esac.expect b/t/chainlint/blank-line-before-esac.expect
index 48ed4eb124..056e03003d 100644
--- a/t/chainlint/blank-line-before-esac.expect
+++ b/t/chainlint/blank-line-before-esac.expect
@@ -1,11 +1,11 @@
-test_done ( ) {
+test_done () {
case "$test_failure" in
- 0 )
+ 0)
test_at_end_hook_
exit 0 ;;
- * )
+ *)
if test $test_external_has_tap -eq 0
then
say_color error "# failed $test_failure among $msg"
@@ -14,5 +14,5 @@ test_done ( ) {
exit 1 ;;
- esac
+ esac
}
diff --git a/t/chainlint/blank-line.expect b/t/chainlint/blank-line.expect
index f76fde1ffb..b47827d749 100644
--- a/t/chainlint/blank-line.expect
+++ b/t/chainlint/blank-line.expect
@@ -1,4 +1,8 @@
(
+
nothing &&
+
something
+
+
)
diff --git a/t/chainlint/block.expect b/t/chainlint/block.expect
index a3bcea492a..1c87326364 100644
--- a/t/chainlint/block.expect
+++ b/t/chainlint/block.expect
@@ -12,9 +12,9 @@
) &&
{
- echo a ; ?!AMP?! echo b
+ echo a; ?!AMP?! echo b
} &&
-{ echo a ; ?!AMP?! echo b ; } &&
+{ echo a; ?!AMP?! echo b; } &&
{
echo "${var}9" &&
diff --git a/t/chainlint/chain-break-background.expect b/t/chainlint/chain-break-background.expect
index 28f9114f42..20d0bb5333 100644
--- a/t/chainlint/chain-break-background.expect
+++ b/t/chainlint/chain-break-background.expect
@@ -1,9 +1,9 @@
JGIT_DAEMON_PID= &&
git init --bare empty.git &&
-> empty.git/git-daemon-export-ok &&
+>empty.git/git-daemon-export-ok &&
mkfifo jgit_daemon_output &&
{
- jgit daemon --port="$JGIT_DAEMON_PORT" . > jgit_daemon_output &
+ jgit daemon --port="$JGIT_DAEMON_PORT" . >jgit_daemon_output &
JGIT_DAEMON_PID=$!
} &&
test_expect_code 2 git ls-remote --exit-code git://localhost:$JGIT_DAEMON_PORT/empty.git
diff --git a/t/chainlint/chain-break-return-exit.expect b/t/chainlint/chain-break-return-exit.expect
index 1732d221c3..4cd18e2edf 100644
--- a/t/chainlint/chain-break-return-exit.expect
+++ b/t/chainlint/chain-break-return-exit.expect
@@ -1,16 +1,16 @@
case "$(git ls-files)" in
-one ) echo pass one ;;
-* ) echo bad one ; return 1 ;;
+one) echo pass one ;;
+*) echo bad one; return 1 ;;
esac &&
(
case "$(git ls-files)" in
- two ) echo pass two ;;
- * ) echo bad two ; exit 1 ;;
-esac
+ two) echo pass two ;;
+ *) echo bad two; exit 1 ;;
+ esac
) &&
case "$(git ls-files)" in
-dir/two"$LF"one ) echo pass both ;;
-* ) echo bad ; return 1 ;;
+dir/two"$LF"one) echo pass both ;;
+*) echo bad; return 1 ;;
esac &&
for i in 1 2 3 4 ; do
diff --git a/t/chainlint/chain-break-status.expect b/t/chainlint/chain-break-status.expect
index f4bada9463..e6b3b2193e 100644
--- a/t/chainlint/chain-break-status.expect
+++ b/t/chainlint/chain-break-status.expect
@@ -1,7 +1,7 @@
-OUT=$(( ( large_git ; echo $? 1 >& 3 ) | : ) 3 >& 1) &&
+OUT=$( ((large_git; echo $? 1>&3) | :) 3>&1 ) &&
test_match_signal 13 "$OUT" &&
-{ test-tool sigchain > actual ; ret=$? ; } &&
+{ test-tool sigchain >actual; ret=$?; } &&
{
test_match_signal 15 "$ret" ||
test "$ret" = 3
diff --git a/t/chainlint/chained-subshell.expect b/t/chainlint/chained-subshell.expect
index af0369d328..83810ea7ec 100644
--- a/t/chainlint/chained-subshell.expect
+++ b/t/chainlint/chained-subshell.expect
@@ -4,7 +4,7 @@ mkdir sub && (
nuff said
) &&
-cut "-d " -f actual | ( read s1 s2 s3 &&
+cut "-d " -f actual | (read s1 s2 s3 &&
test -f $s1 ?!AMP?!
test $(cat $s2) = tree2path1 &&
-test $(cat $s3) = tree3path1 )
+test $(cat $s3) = tree3path1)
diff --git a/t/chainlint/command-substitution-subsubshell.expect b/t/chainlint/command-substitution-subsubshell.expect
index ab2f79e845..ec42f2c30c 100644
--- a/t/chainlint/command-substitution-subsubshell.expect
+++ b/t/chainlint/command-substitution-subsubshell.expect
@@ -1,2 +1,2 @@
-OUT=$(( ( large_git 1 >& 3 ) | : ) 3 >& 1) &&
+OUT=$( ((large_git 1>&3) | :) 3>&1 ) &&
test_match_signal 13 "$OUT"
diff --git a/t/chainlint/dqstring-line-splice.expect b/t/chainlint/dqstring-line-splice.expect
index bf9ced60d4..37eab80738 100644
--- a/t/chainlint/dqstring-line-splice.expect
+++ b/t/chainlint/dqstring-line-splice.expect
@@ -1,3 +1,5 @@
-echo 'fatal: reword option of --fixup is mutually exclusive with' '--patch/--interactive/--all/--include/--only' > expect &&
-test_must_fail git commit --fixup=reword:HEAD~ $1 2 > actual &&
+
+echo 'fatal: reword option of --fixup is mutually exclusive with' '--patch/--interactive/--all/--include/--only' >expect &&
+test_must_fail git commit --fixup=reword:HEAD~ $1 2>actual &&
test_cmp expect actual
+
diff --git a/t/chainlint/dqstring-no-interpolate.expect b/t/chainlint/dqstring-no-interpolate.expect
index 10724987a5..087eda15e4 100644
--- a/t/chainlint/dqstring-no-interpolate.expect
+++ b/t/chainlint/dqstring-no-interpolate.expect
@@ -6,6 +6,7 @@ grep "^\.git$" output.txt &&
(
cd client$version &&
GIT_TEST_PROTOCOL_VERSION=$version git fetch-pack --no-progress .. $(cat ../input)
-) > output &&
- cut -d ' ' -f 2 < output | sort > actual &&
+) >output &&
+ cut -d ' ' -f 2 <output | sort >actual &&
test_cmp expect actual
+
diff --git a/t/chainlint/empty-here-doc.expect b/t/chainlint/empty-here-doc.expect
index e8733c97c6..8507721192 100644
--- a/t/chainlint/empty-here-doc.expect
+++ b/t/chainlint/empty-here-doc.expect
@@ -1,4 +1,4 @@
-git ls-tree $tree path > current &&
-cat > expected <<\EOF &&
+git ls-tree $tree path >current &&
+cat >expected <<\EOF &&
EOF
test_output
diff --git a/t/chainlint/exclamation.expect b/t/chainlint/exclamation.expect
index 2d961a58c6..765a35bb4c 100644
--- a/t/chainlint/exclamation.expect
+++ b/t/chainlint/exclamation.expect
@@ -1,4 +1,4 @@
-if ! condition ; then echo nope ; else yep ; fi &&
+if ! condition; then echo nope; else yep; fi &&
test_prerequisite !MINGW &&
mail uucp!address &&
echo !whatever!
diff --git a/t/chainlint/for-loop-abbreviated.expect b/t/chainlint/for-loop-abbreviated.expect
index a21007a63f..02c0d15cca 100644
--- a/t/chainlint/for-loop-abbreviated.expect
+++ b/t/chainlint/for-loop-abbreviated.expect
@@ -1,5 +1,5 @@
for it
do
- path=$(expr "$it" : ( [^:]*) ) &&
+ path=$(expr "$it" : ([^:]*)) &&
git update-index --add "$path" || exit
done
diff --git a/t/chainlint/for-loop.expect b/t/chainlint/for-loop.expect
index d65c82129a..d2237f1e38 100644
--- a/t/chainlint/for-loop.expect
+++ b/t/chainlint/for-loop.expect
@@ -6,6 +6,7 @@
bar
EOF
done ?!AMP?!
+
for i in a b c; do
echo $i &&
cat $i ?!LOOP?!
diff --git a/t/chainlint/function.expect b/t/chainlint/function.expect
index a14388e6b9..dd7c997a3c 100644
--- a/t/chainlint/function.expect
+++ b/t/chainlint/function.expect
@@ -1,8 +1,8 @@
-sha1_file ( ) {
+sha1_file() {
echo "$*" | sed "s#..#.git/objects/&/#"
} &&
-remove_object ( ) {
+remove_object() {
file=$(sha1_file "$*") &&
test -e "$file" ?!AMP?!
rm -f "$file"
diff --git a/t/chainlint/here-doc.expect b/t/chainlint/here-doc.expect
index 1df3f78282..91b961242a 100644
--- a/t/chainlint/here-doc.expect
+++ b/t/chainlint/here-doc.expect
@@ -1,6 +1,6 @@
boodle wobba \
- gorgo snoot \
- wafta snurb <<EOF &&
+ gorgo snoot \
+ wafta snurb <<EOF &&
quoth the raven,
nevermore...
EOF
diff --git a/t/chainlint/loop-detect-status.expect b/t/chainlint/loop-detect-status.expect
index 24da9e86d5..7ce3a34806 100644
--- a/t/chainlint/loop-detect-status.expect
+++ b/t/chainlint/loop-detect-status.expect
@@ -1,18 +1,18 @@
-( while test $i -le $blobcount
-do
- printf "Generating blob $i/$blobcount\r" >& 2 &&
+(while test $i -le $blobcount
+ do
+ printf "Generating blob $i/$blobcount\r" >&2 &&
printf "blob\nmark :$i\ndata $blobsize\n" &&
#test-tool genrandom $i $blobsize &&
printf "%-${blobsize}s" $i &&
echo "M 100644 :$i $i" >> commit &&
i=$(($i+1)) ||
echo $? > exit-status
-done &&
-echo "commit refs/heads/main" &&
-echo "author A U Thor <author@email.com> 123456789 +0000" &&
-echo "committer C O Mitter <committer@email.com> 123456789 +0000" &&
-echo "data 5" &&
-echo ">2gb" &&
-cat commit ) |
+ done &&
+ echo "commit refs/heads/main" &&
+ echo "author A U Thor <author@email.com> 123456789 +0000" &&
+ echo "committer C O Mitter <committer@email.com> 123456789 +0000" &&
+ echo "data 5" &&
+ echo ">2gb" &&
+ cat commit) |
git fast-import --big-file-threshold=2 &&
test ! -f exit-status
diff --git a/t/chainlint/nested-cuddled-subshell.expect b/t/chainlint/nested-cuddled-subshell.expect
index 2a86885ee6..3836049cc4 100644
--- a/t/chainlint/nested-cuddled-subshell.expect
+++ b/t/chainlint/nested-cuddled-subshell.expect
@@ -2,18 +2,24 @@
(cd foo &&
bar
) &&
+
(cd foo &&
bar
) ?!AMP?!
+
(
cd foo &&
bar) &&
+
(
cd foo &&
bar) ?!AMP?!
+
(cd foo &&
bar) &&
+
(cd foo &&
bar) ?!AMP?!
+
foobar
)
diff --git a/t/chainlint/nested-loop-detect-failure.expect b/t/chainlint/nested-loop-detect-failure.expect
index 4793a0e8e1..3461df40e5 100644
--- a/t/chainlint/nested-loop-detect-failure.expect
+++ b/t/chainlint/nested-loop-detect-failure.expect
@@ -1,31 +1,31 @@
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
do
- for j in 0 1 2 3 4 5 6 7 8 9 ;
+ for j in 0 1 2 3 4 5 6 7 8 9;
do
- echo "$i$j" > "path$i$j" ?!LOOP?!
+ echo "$i$j" >"path$i$j" ?!LOOP?!
done ?!LOOP?!
done &&
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
do
- for j in 0 1 2 3 4 5 6 7 8 9 ;
+ for j in 0 1 2 3 4 5 6 7 8 9;
do
- echo "$i$j" > "path$i$j" || return 1
+ echo "$i$j" >"path$i$j" || return 1
done
done &&
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
do
- for j in 0 1 2 3 4 5 6 7 8 9 ;
+ for j in 0 1 2 3 4 5 6 7 8 9;
do
- echo "$i$j" > "path$i$j" ?!LOOP?!
+ echo "$i$j" >"path$i$j" ?!LOOP?!
done || return 1
done &&
-for i in 0 1 2 3 4 5 6 7 8 9 ;
+for i in 0 1 2 3 4 5 6 7 8 9;
do
- for j in 0 1 2 3 4 5 6 7 8 9 ;
+ for j in 0 1 2 3 4 5 6 7 8 9;
do
- echo "$i$j" > "path$i$j" || return 1
+ echo "$i$j" >"path$i$j" || return 1
done || return 1
done
diff --git a/t/chainlint/nested-subshell.expect b/t/chainlint/nested-subshell.expect
index 02e0a9f1bb..73ff28546a 100644
--- a/t/chainlint/nested-subshell.expect
+++ b/t/chainlint/nested-subshell.expect
@@ -4,6 +4,7 @@
echo a &&
echo b
) >file &&
+
cd foo &&
(
echo a ?!AMP?!
diff --git a/t/chainlint/pipe.expect b/t/chainlint/pipe.expect
index 2cfc028297..811971b1a3 100644
--- a/t/chainlint/pipe.expect
+++ b/t/chainlint/pipe.expect
@@ -2,7 +2,9 @@
foo |
bar |
baz &&
+
fish |
cow ?!AMP?!
+
sunder
)
diff --git a/t/chainlint/subshell-here-doc.expect b/t/chainlint/subshell-here-doc.expect
index 52789278d1..75d6f607e2 100644
--- a/t/chainlint/subshell-here-doc.expect
+++ b/t/chainlint/subshell-here-doc.expect
@@ -1,7 +1,7 @@
(
echo wobba \
- gorgo snoot \
- wafta snurb <<-EOF &&
+ gorgo snoot \
+ wafta snurb <<-EOF &&
quoth the raven,
nevermore...
EOF
diff --git a/t/chainlint/subshell-one-liner.expect b/t/chainlint/subshell-one-liner.expect
index b7015361bf..8f694990e8 100644
--- a/t/chainlint/subshell-one-liner.expect
+++ b/t/chainlint/subshell-one-liner.expect
@@ -2,13 +2,18 @@
(foo && bar) &&
(foo && bar) |
(foo && bar) >baz &&
+
(foo; ?!AMP?! bar) &&
(foo; ?!AMP?! bar) |
(foo; ?!AMP?! bar) >baz &&
+
(foo || exit 1) &&
(foo || exit 1) |
(foo || exit 1) >baz &&
+
(foo && bar) ?!AMP?!
+
(foo && bar; ?!AMP?! baz) ?!AMP?!
+
foobar
)
diff --git a/t/chainlint/t7900-subtree.expect b/t/chainlint/t7900-subtree.expect
index 71b3b3bc20..02f3129232 100644
--- a/t/chainlint/t7900-subtree.expect
+++ b/t/chainlint/t7900-subtree.expect
@@ -15,6 +15,7 @@ main-sub4" &&
$chkms
TXT
) &&
+
subfiles=$(git ls-files) &&
check_equal "$subfiles" "$chkms
$chks"
diff --git a/t/chainlint/token-pasting.expect b/t/chainlint/token-pasting.expect
index 342360bcd0..6a387917a7 100644
--- a/t/chainlint/token-pasting.expect
+++ b/t/chainlint/token-pasting.expect
@@ -4,22 +4,22 @@ git config filter.rot13.clean ./rot13.sh &&
{
echo "*.t filter=rot13" ?!AMP?!
echo "*.i ident"
-} > .gitattributes &&
+} >.gitattributes &&
{
echo a b c d e f g h i j k l m ?!AMP?!
echo n o p q r s t u v w x y z ?!AMP?!
echo '$Id$'
-} > test &&
-cat test > test.t &&
-cat test > test.o &&
-cat test > test.i &&
+} >test &&
+cat test >test.t &&
+cat test >test.o &&
+cat test >test.i &&
git add test test.t test.i &&
rm -f test test.t test.i &&
git checkout -- test test.t test.i &&
-echo "content-test2" > test2.o &&
-echo "content-test3 - filename with special characters" > "test3 'sq',$x=.o" ?!AMP?!
+echo "content-test2" >test2.o &&
+echo "content-test3 - filename with special characters" >"test3 'sq',$x=.o" ?!AMP?!
downstream_url_for_sed=$(
printf "%sn" "$downstream_url" |
diff --git a/t/chainlint/while-loop.expect b/t/chainlint/while-loop.expect
index 1f5eaea0fd..06c1567f48 100644
--- a/t/chainlint/while-loop.expect
+++ b/t/chainlint/while-loop.expect
@@ -6,6 +6,7 @@
bar
EOF
done ?!AMP?!
+
while true; do
echo foo &&
cat bar ?!LOOP?!
--
2.43.GIT
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply related
* Re: [PATCH v4] tests: adjust whitespace in chainlint expectations
From: Eric Sunshine @ 2023-12-15 7:17 UTC (permalink / raw)
To: Patrick Steinhardt; +Cc: git, Junio C Hamano
In-Reply-To: <fb312f559de7b99244e4c86a995250599cd9be06.1702622508.git.ps@pks.im>
On Fri, Dec 15, 2023 at 1:42 AM Patrick Steinhardt <ps@pks.im> wrote:
> [...]
> Instead of improving the detection logic, fix our ".expect" files so
> that we do not need any post-processing at all anymore. This allows us
> to drop the `-w` flag when diffing so that we can always use diff(1)
> now.
>
> Note that we keep some of the post-processing of `chainlint.pl` output
> intact to strip leading line numbers generated by the script. Having
> these would cause a rippling effect whenever we add a new test that
> sorts into the middle of existing tests and would require us to
> renumerate all subsequent lines, which seems rather pointless.
>
> Signed-off-by: Patrick Steinhardt <ps@pks.im>
> ---
> diff --git a/t/Makefile b/t/Makefile
> @@ -103,20 +103,12 @@ check-chainlint:
> for i in $(CHAINLINTTESTS); do \
> echo "# chainlint: $$i" && \
> - sed -e '/^[ ]*$$/d' chainlint/$$i.expect; \
> + cat chainlint/$$i.expect; \
> done \
> $(CHAINLINT) --emit-all '$(CHAINLINTTMP_SQ)'/tests | \
> - sed -e 's/^[1-9][0-9]* //;/^[ ]*$$/d' >'$(CHAINLINTTMP_SQ)'/actual && \
> + sed -e 's/^[1-9][0-9]* //' >'$(CHAINLINTTMP_SQ)'/actual && \
> + diff -u '$(CHAINLINTTMP_SQ)'/expect '$(CHAINLINTTMP_SQ)'/actual
Thanks, this version looks fine. FWIW, you may consider this:
Reviewed-by: Eric Sunshine <sunshine@sunshineco.com>
Aside: I was rather surprised to see this output from git-am when applying v4:
Applying: tests: adjust whitespace in chainlint expectations
.git/rebase-apply/patch:205: new blank line at EOF.
+
.git/rebase-apply/patch:219: new blank line at EOF.
+
warning: 2 lines add whitespace errors.
But upon investigating the two "test" files in question,
dqstring-line-splice.test and dqstring-no-interpolate.test, I recalled
that I had to play tricks to escape the single-quote context created
by the Makefile when generating t/chainlinttmp/tests in order to allow
chainlint.pl to see a double-quoted string. So, the abovementioned
blank lines are indeed expected output from chainlint.pl given the
tricks played.
^ permalink raw reply
* Re: [PATCH 0/2] avoiding recursion in mailinfo
From: Patrick Steinhardt @ 2023-12-15 7:58 UTC (permalink / raw)
To: Jeff King; +Cc: git, Taylor Blau, Carlos Andrés Ramírez Cataño
In-Reply-To: <20231214214444.GB2297853@coredump.intra.peff.net>
[-- Attachment #1: Type: text/plain, Size: 1414 bytes --]
On Thu, Dec 14, 2023 at 04:44:44PM -0500, Jeff King wrote:
> On Thu, Dec 14, 2023 at 08:41:20AM +0100, Patrick Steinhardt wrote:
>
> > > @@ -72,11 +73,14 @@ static const char *unquote_comment(struct strbuf *outbuf, const char *in)
> > > take_next_literally = 1;
> > > continue;
> > > case '(':
> > > - in = unquote_comment(outbuf, in);
> > > + strbuf_addch(outbuf, '(');
> > > + depth++;
> > > continue;
> > > case ')':
> > > strbuf_addch(outbuf, ')');
> > > - return in;
> > > + if (!--depth)
> > > + return in;
> > > + continue;
> > > }
> > > }
> > >
> > > I doubt it's a big deal either way, but if it's that easy to do it might
> > > be worth it.
> >
> > Isn't this only protecting against unbalanced braces? That might be a
> > sensible check to do regardless, but does it protect against recursion
> > blowing the stack if you just happen to have many opening braces?
> >
> > Might be I'm missing something.
>
> It protects against recrusion blowing the stack because it removes the
> recursive call to unquote_comment(). ;)
Doh. Of course it does, I completely missed the removals.
> The "depth" stuff is there because without recursion, we have to know
> when we've hit the correct closing paren (as opposed to an embedded
> one).
Yes, makes sense now. The patches look good to me, thanks!
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
* Re: [PATCH v2 2/2] fetch: no redundant error message for atomic fetch
From: Patrick Steinhardt @ 2023-12-15 9:56 UTC (permalink / raw)
To: Jiang Xin; +Cc: Git List, Junio C Hamano, Jiang Xin
In-Reply-To: <6fb83a00000563a79f3948f9087c634ae507b9f5.1702556642.git.zhiyou.jx@alibaba-inc.com>
[-- Attachment #1: Type: text/plain, Size: 4346 bytes --]
On Thu, Dec 14, 2023 at 08:33:12PM +0800, Jiang Xin wrote:
> From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
>
> If an error occurs during an atomic fetch, a redundant error message
> will appear at the end of do_fetch(). It was introduced in b3a804663c
> (fetch: make `--atomic` flag cover backfilling of tags, 2022-02-17).
>
> In function do_fetch(), a failure message is already shown before the
> retcode is set, so we should not call additional error() at the end of
> this function.
>
> We can remove the redundant error() function, because we know that
> the function ref_transaction_abort() never fails.
Okay, so this still suffers from the same issue as discussed in the
thread at <ZTYue-3gAS1aGXNa@tanuki>, but now it's documented in the
commit message. I'm still not convinced that is a good argument to say
that the function never fails, and if it ever would it would populate
the error message. Especially now where there's churn to introduce the
new reftable backend this could change any time.
For the record, I'm proposing to do something like the following:
diff --git a/builtin/fetch.c b/builtin/fetch.c
index fd134ba74d..80b8bc549d 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -1651,7 +1651,7 @@ static int do_fetch(struct transport *transport,
if (atomic_fetch) {
transaction = ref_transaction_begin(&err);
if (!transaction) {
- retcode = error("%s", err.buf);
+ retcode = -1;
goto cleanup;
}
}
@@ -1711,7 +1711,6 @@ static int do_fetch(struct transport *transport,
retcode = ref_transaction_commit(transaction, &err);
if (retcode) {
- error("%s", err.buf);
ref_transaction_free(transaction);
transaction = NULL;
goto cleanup;
@@ -1775,9 +1774,13 @@ static int do_fetch(struct transport *transport,
}
cleanup:
+ if (retcode && err.len)
+ error("%s", err.buf);
if (retcode && transaction) {
+ strbuf_reset(&err);
ref_transaction_abort(transaction, &err);
- error("%s", err.buf);
+ if (err.len)
+ error("%s", err.buf);
}
display_state_release(&display_state);
This would both fix the issue you observed, but also fixes issues in
case the ref backend failed without writing an error message to the
buffer. It also fixes issues if there were multiple failures, where we'd
print the initial error printed to the buffer twice.
I know this is mostly solidifying us against potential future changes,
but if it's comparatively easy like this I don't see much of a reason
against it.
Patrick
> While we can find a
> common pattern for calling ref_transaction_abort() by running command
> "git grep -A1 ref_transaction_abort", e.g.:
>
> if (ref_transaction_abort(transaction, &error))
> error("abort: %s", error.buf);
>
> We can fix this issue follow this pattern, and the test case "fetch
> porcelain output (atomic)" in t5574 will also be fixed. If in the future
> we decide that we don't need to check the return value of the function
> ref_transaction_abort(), this change can be fixed along with it.
>
> Signed-off-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
> ---
> builtin/fetch.c | 4 +---
> t/t5574-fetch-output.sh | 2 +-
> 2 files changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index fd134ba74d..01a573cf8d 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -1775,10 +1775,8 @@ static int do_fetch(struct transport *transport,
> }
>
> cleanup:
> - if (retcode && transaction) {
> - ref_transaction_abort(transaction, &err);
> + if (retcode && transaction && ref_transaction_abort(transaction, &err))
> error("%s", err.buf);
> - }
>
> display_state_release(&display_state);
> close_fetch_head(&fetch_head);
> diff --git a/t/t5574-fetch-output.sh b/t/t5574-fetch-output.sh
> index bc747efefc..8d01e36b3d 100755
> --- a/t/t5574-fetch-output.sh
> +++ b/t/t5574-fetch-output.sh
> @@ -98,7 +98,7 @@ do
> opt=
> ;;
> esac
> - test_expect_failure "fetch porcelain output ${opt:+(atomic)}" '
> + test_expect_success "fetch porcelain output ${opt:+(atomic)}" '
> test_when_finished "rm -rf porcelain" &&
>
> # Clone and pre-seed the repositories. We fetch references into two
> --
> 2.41.0.232.g2f6f0bca4f.agit.8.0.4.dev
>
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply related
* Re: [PATCH v2 1/2] t5574: test porcelain output of atomic fetch
From: Patrick Steinhardt @ 2023-12-15 9:56 UTC (permalink / raw)
To: Jiang Xin; +Cc: Git List, Junio C Hamano, Jiang Xin
In-Reply-To: <210191917bcfa9293622908c291652059576f3e5.1702556642.git.zhiyou.jx@alibaba-inc.com>
[-- Attachment #1: Type: text/plain, Size: 2011 bytes --]
On Thu, Dec 14, 2023 at 08:33:11PM +0800, Jiang Xin wrote:
> From: Jiang Xin <zhiyou.jx@alibaba-inc.com>
[snip]
> @@ -91,36 +85,61 @@ test_expect_success 'fetch porcelain output' '
> git checkout force-updated &&
> git reset --hard HEAD~ &&
> test_commit --no-tag force-update-new &&
> - FORCE_UPDATED_NEW=$(git rev-parse HEAD) &&
> -
> - cat >expect <<-EOF &&
> - - $MAIN_OLD $ZERO_OID refs/forced/deleted-branch
> - - $MAIN_OLD $ZERO_OID refs/unforced/deleted-branch
> - $MAIN_OLD $FAST_FORWARD_NEW refs/unforced/fast-forward
> - ! $FORCE_UPDATED_OLD $FORCE_UPDATED_NEW refs/unforced/force-updated
> - * $ZERO_OID $MAIN_OLD refs/unforced/new-branch
> - $MAIN_OLD $FAST_FORWARD_NEW refs/forced/fast-forward
> - + $FORCE_UPDATED_OLD $FORCE_UPDATED_NEW refs/forced/force-updated
> - * $ZERO_OID $MAIN_OLD refs/forced/new-branch
> - $MAIN_OLD $FAST_FORWARD_NEW refs/remotes/origin/fast-forward
> - + $FORCE_UPDATED_OLD $FORCE_UPDATED_NEW refs/remotes/origin/force-updated
> - * $ZERO_OID $MAIN_OLD refs/remotes/origin/new-branch
> - EOF
> -
> - # Execute a dry-run fetch first. We do this to assert that the dry-run
> - # and non-dry-run fetches produces the same output. Execution of the
> - # fetch is expected to fail as we have a rejected reference update.
> - test_must_fail git -C porcelain fetch \
> - --porcelain --dry-run --prune origin $refspecs >actual &&
> - test_cmp expect actual &&
> -
> - # And now we perform a non-dry-run fetch.
> - test_must_fail git -C porcelain fetch \
> - --porcelain --prune origin $refspecs >actual 2>stderr &&
> - test_cmp expect actual &&
> - test_must_be_empty stderr
> + FORCE_UPDATED_NEW=$(git rev-parse HEAD)
> '
>
> +for opt in off on
> +do
> + case $opt in
> + on)
> + opt=--atomic
> + ;;
> + off)
> + opt=
> + ;;
> + esac
Nit: you could also do `for opt in "--atomic" ""` directly to get rid of
this case statement. Not sure whether this is worth a reroll though,
probably not.
Patrick
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox