Git development

Git development
 help / color / mirror / Atom feed

* [PATCH v2 1/2] diff --no-index: follow symlinks
From: Dennis Kaarsemaker @ 2017-01-13 10:20 UTC (permalink / raw)
  To: git; +Cc: Dennis Kaarsemaker
In-Reply-To: <20170113102021.6054-1-dennis@kaarsemaker.net>

Git's diff machinery does not follow symlinks, which makes sense as git
itself also does not, but stores the symlink destination.

In --no-index mode however, it is useful for diff to to follow symlinks,
matching the behaviour of ordinary diff. A new --no-dereference (name
copied from diff) option has been added to disable this behaviour.

Signed-off-by: Dennis Kaarsemaker <dennis@kaarsemaker.net>
---
 Documentation/diff-options.txt |  7 +++++++
 diff-no-index.c                |  7 ++++---
 diff.c                         | 10 ++++++++--
 diff.h                         |  2 +-
 t/t4053-diff-no-index.sh       | 30 ++++++++++++++++++++++++++++++
 5 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 2d77a19626..48bcf3cc5e 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -216,6 +216,13 @@ any of those replacements occurred.
 	commit range.  Defaults to `diff.submodule` or the 'short' format
 	if the config option is unset.
 
+ifdef::git-diff[]
+--no-dereference::
+	Normally, "git diff --no-index" will dereference symlinks and compare
+	the contents of the linked files, mimicking ordinary diff. This
+	option disables that behaviour.
+endif::git-diff[]
+
 --color[=<when>]::
 	Show colored diff.
 	`--color` (i.e. without '=<when>') is the same as `--color=always`.
diff --git a/diff-no-index.c b/diff-no-index.c
index f420786039..826fe97ffc 100644
--- a/diff-no-index.c
+++ b/diff-no-index.c
@@ -40,7 +40,7 @@ static int read_directory_contents(const char *path, struct string_list *list)
  */
 static const char file_from_standard_input[] = "-";
 
-static int get_mode(const char *path, int *mode)
+static int get_mode(const char *path, int *mode, int no_dereference)
 {
 	struct stat st;
 
@@ -52,7 +52,7 @@ static int get_mode(const char *path, int *mode)
 #endif
 	else if (path == file_from_standard_input)
 		*mode = create_ce_mode(0666);
-	else if (lstat(path, &st))
+	else if (no_dereference ? lstat(path, &st) : stat(path, &st))
 		return error("Could not access '%s'", path);
 	else
 		*mode = st.st_mode;
@@ -93,7 +93,8 @@ static int queue_diff(struct diff_options *o,
 {
 	int mode1 = 0, mode2 = 0;
 
-	if (get_mode(name1, &mode1) || get_mode(name2, &mode2))
+	if (get_mode(name1, &mode1, DIFF_OPT_TST(o, NO_DEREFERENCE)) ||
+		get_mode(name2, &mode2, DIFF_OPT_TST(o, NO_DEREFERENCE)))
 		return -1;
 
 	if (mode1 && mode2 && S_ISDIR(mode1) != S_ISDIR(mode2)) {
diff --git a/diff.c b/diff.c
index be11e4ef2b..2fc0226338 100644
--- a/diff.c
+++ b/diff.c
@@ -2815,7 +2815,7 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
 		s->size = xsize_t(st.st_size);
 		if (!s->size)
 			goto empty;
-		if (S_ISLNK(st.st_mode)) {
+		if (S_ISLNK(s->mode)) {
 			struct strbuf sb = STRBUF_INIT;
 
 			if (strbuf_readlink(&sb, s->path, s->size))
@@ -2825,6 +2825,10 @@ int diff_populate_filespec(struct diff_filespec *s, unsigned int flags)
 			s->should_free = 1;
 			return 0;
 		}
+		if (S_ISLNK(st.st_mode)) {
+			stat(s->path, &st);
+			s->size = xsize_t(st.st_size);
+		}
 		if (size_only)
 			return 0;
 		if ((flags & CHECK_BINARY) &&
@@ -3884,7 +3888,9 @@ int diff_opt_parse(struct diff_options *options,
 	else if (!strcmp(arg, "--no-follow")) {
 		DIFF_OPT_CLR(options, FOLLOW_RENAMES);
 		DIFF_OPT_CLR(options, DEFAULT_FOLLOW_RENAMES);
-	} else if (!strcmp(arg, "--color"))
+	} else if (!strcmp(arg, "--no-dereference"))
+		DIFF_OPT_SET(options, NO_DEREFERENCE);
+	else if (!strcmp(arg, "--color"))
 		options->use_color = 1;
 	else if (skip_prefix(arg, "--color=", &arg)) {
 		int value = git_config_colorbool(NULL, arg);
diff --git a/diff.h b/diff.h
index 25ae60d5ff..74883db1eb 100644
--- a/diff.h
+++ b/diff.h
@@ -69,7 +69,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data)
 #define DIFF_OPT_FIND_COPIES_HARDER  (1 <<  6)
 #define DIFF_OPT_FOLLOW_RENAMES      (1 <<  7)
 #define DIFF_OPT_RENAME_EMPTY        (1 <<  8)
-/* (1 <<  9) unused */
+#define DIFF_OPT_NO_DEREFERENCE      (1 <<  9)
 #define DIFF_OPT_HAS_CHANGES         (1 << 10)
 #define DIFF_OPT_QUICK               (1 << 11)
 #define DIFF_OPT_NO_INDEX            (1 << 12)
diff --git a/t/t4053-diff-no-index.sh b/t/t4053-diff-no-index.sh
index 453e6c35eb..c6046fef19 100755
--- a/t/t4053-diff-no-index.sh
+++ b/t/t4053-diff-no-index.sh
@@ -127,4 +127,34 @@ test_expect_success 'diff --no-index from repo subdir respects config (implicit)
 	test_cmp expect actual.head
 '
 
+test_expect_success SYMLINKS 'diff --no-index follows symlinks' '
+	echo a >1 &&
+	echo b >2 &&
+	ln -s 1 3 &&
+	ln -s 2 4 &&
+	cat >expect <<\EOF
+		--- a/3
+		+++ b/4
+		@@ -1 +1 @@
+		-a
+		+b
+	EOF
+	test_expect_code 1 git diff --no-index 3 4 | tail -n +3 >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success SYMLINKS 'diff --no-index --no-dereference does not follow symlinks' '
+	cat >expect <<\EOF
+		--- a/3
+		+++ b/4
+		@@ -1 +1 @@
+		-1
+		\ No newline at end of file
+		+2
+		\ No newline at end of file
+	EOF
+	test_expect_code 1 git diff --no-index --no-dereference 3 4 | tail -n +3 > actual &&
+	test_cmp expect actual
+'
+
 test_done
-- 
2.11.0-234-gaf85957


^ permalink raw reply related

* Re: gitk: "lime" color incompatible with older Tk versions
From: David Aguilar @ 2017-01-13 11:20 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Andrew Janke, Paul Mackerras, git@vger.kernel.org
In-Reply-To: <CAGZ79kaO9T+Qc=M6s_ZdpAfLZCVQEYNF=zNxDWArDmsA7jjCWg@mail.gmail.com>

On Mon, May 02, 2016 at 09:20:43AM -0700, Stefan Beller wrote:
> + Paul Mackerras, who maintains gitk
> 
> On Sun, May 1, 2016 at 10:03 AM, Andrew Janke <floss@apjanke.net> wrote:
> > Hi, git folks,
> >
> > I'm having trouble running gitk on Mac OS X 10.9.5. The gitk program uses
> > the color "lime", which is not present in older versions of Tk, apparently
> > including the Tk 8.5 which ships with 10.9.

Ping.. it would be nice to get this patch applied.
I can verify that gitk on Mac OS X 10.11 also has this problem.
gitk is usually pretty good about backwards-compatibility.

> > This compatibility problem was noted before back in 2012, in
> > http://www.mail-archive.com/git%40vger.kernel.org/msg14496.html.
> >
> > Would you consider switching from lime to a hex value color, for
> > compatibility with users of older versions of Tk? A patch to do so is below;
> > only the file gitk-git/gitk needs to be changed.

I can recreate and resend this patch if needed; it's simply:
:%s/lime/"#99FF00"/g

Would a re-roll of this patch be accepted, or is it not worth
bothering?

Google for "gitk lime" to get a taste for some of the fallout
caused by this problem.

The fact that multiple pages, with different OS's, have examples
of users stumbling over this change is a good hint that it's
worth fixing.

Thoughts?
-- 
David

^ permalink raw reply

* [PATCH] Documentation/bisect: improve on (bad|new) and (good|bad)
From: Christian Couder @ 2017-01-13 14:44 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Manuel Ullmann, Matthieu Moy, Christian Couder

The following part of the description:

git bisect (bad|new) [<rev>]
git bisect (good|old) [<rev>...]

may be a bit confusing, as a reader may wonder if instead it should be:

git bisect (bad|good) [<rev>]
git bisect (old|new) [<rev>...]

Of course the difference between "[<rev>]" and "[<rev>...]" should hint
that there is a good reason for the way it is.

But we can further clarify and complete the description by adding
"<term-new>" and "<term-old>" to the "bad|new" and "good|old"
alternatives.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
 Documentation/git-bisect.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-bisect.txt b/Documentation/git-bisect.txt
index 2bb9a577a2..bdd915a66b 100644
--- a/Documentation/git-bisect.txt
+++ b/Documentation/git-bisect.txt
@@ -18,8 +18,8 @@ on the subcommand:
 
  git bisect start [--term-{old,good}=<term> --term-{new,bad}=<term>]
 		  [--no-checkout] [<bad> [<good>...]] [--] [<paths>...]
- git bisect (bad|new) [<rev>]
- git bisect (good|old) [<rev>...]
+ git bisect (bad|new|<term-new>) [<rev>]
+ git bisect (good|old|<term-old>) [<rev>...]
  git bisect terms [--term-good | --term-bad]
  git bisect skip [(<rev>|<range>)...]
  git bisect reset [<commit>]
-- 
2.11.0.313.g11b7cc88e6.dirty


^ permalink raw reply related

* Re: [PATCH] Remove dependency on deprecated Net::SMTP::SSL
From: Renato Botelho @ 2017-01-13 14:59 UTC (permalink / raw)
  To: brian m. carlson; +Cc: Mike Fisher, git
In-Reply-To: <20161120215344.jaqt4owlhovig3hz@genre.crustytoothpaste.net>

> On 20 Nov 2016, at 19:53, brian m. carlson <sandals@crustytoothpaste.net> wrote:
> 
> On Sun, Nov 20, 2016 at 04:18:16PM -0500, Mike Fisher wrote:
>> Refactor send_message() to remove dependency on deprecated
>> Net::SMTP::SSL:
>> 
>> <http://search.cpan.org/~rjbs/Net-SMTP-SSL-1.04/lib/Net/SMTP/SSL.pm#DEPRECATED>
> 
> As much as I hate to say this, I think this is going to cause
> compatibility problems.  Net::SMTP is part of core Perl (as of v5.7.3),
> but the version you want to rely on (which you did not provide an
> explicit dependency on) is from October 2014.
> 
> That basically means that no Perl on a Red Hat or CentOS system is going
> to provide that support, since RHEL 7 was released in June 2014.
> Providing an updated Git on those platforms would require replacing the
> system Perl or parts of it, which would be undesirable.  This would
> affect Debian 7 as well.
> 
> We currently support Perl 5.8 [0], so if you want to remove support for
> Net::SMTP::SSL, I'd recommend a solution that works with that version.
> 
> [0] I personally believe we should drop support for Perl older than
> 5.10.1 (if not newer), but that's my opinion and it isn't shared by
> other list regulars.
> -- 
> brian m. carlson / brian with sandals: Houston, Texas, US
> +1 832 623 2791 | https://www.crustytoothpaste.net/~bmc | My opinion only
> OpenPGP: https://keybase.io/bk2204

Net::SMTP::SSL is marked as DEPRECATED on FreeBSD ports tree and will be removed in 2017-03-31. When it happens users will not be able to run git-send-email anymore. I’m considering to add Mike’s patch to FreeBSD ports tree as an alternative but it would be good to have a official solution for this problem.

FreeBSD bug report can be found at https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=214335

--
Renato Botelho


^ permalink raw reply

* Re: [PATCH 2/2] Use 'env' to find perl instead of fixed path
From: Johannes Schindelin @ 2017-01-13 15:21 UTC (permalink / raw)
  To: Pat Pannuto; +Cc: Junio C Hamano, Johannes Sixt, git
In-Reply-To: <CAAnLKaGvz4Wzs36gMSdoYCg+tzx6KFCe59FNnk5zNQ-L58ww1g@mail.gmail.com>

Hi Pat,

On Thu, 12 Jan 2017, Pat Pannuto wrote:

> You may still want the 1/2 patch in this series, just to make things
> internally consistent with "-w" vs "use warnings;" inside git's perl
> scripts.

Yes, I like that patch.

Ciao,
Johannes

^ permalink raw reply

* Re: [PATCH 0/3] updates to gitk & git-gui doc now gitview has gone
From: Johannes Schindelin @ 2017-01-13 15:23 UTC (permalink / raw)
  To: Philip Oakley; +Cc: GitList, Junio C Hamano
In-Reply-To: <20170112213240.7972-1-philipoakley@iee.org>

Hi Philip,

On Thu, 12 Jan 2017, Philip Oakley wrote:

> gitview was removed recently.
> 
> > Sent: Tuesday, January 10, 2017 11:48 PM
> > Subject: What's cooking in git.git (Jan 2017, #01; Tue, 10)
> 
> > * sb/remove-gitview (2017-01-07) 1 commit
> >   (merged to 'next' on 2017-01-10 at dcb3abd146)
> >  + contrib: remove gitview
> 
> > Will merge to 'master'.
> 
> 
> Lets remove the reference in the gitk man page, and do some other
> fixups while in the area.

These changes all look sensible to me.

Ciao,
Dscho

^ permalink raw reply

* Re: [PATCH 2/2] Use 'env' to find perl instead of fixed path
From: Johannes Schindelin @ 2017-01-13 15:27 UTC (permalink / raw)
  To: Eric Wong; +Cc: Pat Pannuto, Junio C Hamano, Johannes Sixt, git
In-Reply-To: <20170113024842.GA20572@starla>

Hi Eric,

On Fri, 13 Jan 2017, Eric Wong wrote:

> Pat Pannuto <pat.pannuto@gmail.com> wrote:
> > You may still want the 1/2 patch in this series, just to make things
> > internally consistent with "-w" vs "use warnings;" inside git's perl
> > scripts.
> 
> No, that is a step back.  "-w" affects the entire process, so it
> spots more potential problems.

I guess I do not understand, still, what the difference is between using
-w and adding `use warnings` *very early* in the script... Could you give
an example where it makes a difference?

Ciao,
Dscho

^ permalink raw reply

* [ANNOUNCE] Git for Windows 2.11.0(2)
From: Johannes Schindelin @ 2017-01-13 15:43 UTC (permalink / raw)
  To: git-for-windows, git; +Cc: Johannes Schindelin

MIME-Version: 1.0

Fcc: Sent

Dear Git users,

It is my pleasure to announce that Git for Windows 2.11.0(2) is available from:

	https://git-for-windows.github.io/

Changes since Git for Windows v2.11.0 (December 1st 2016)

New Features

  • Reading a large index has been speeded up using pthreads.
  • The checkout operation was speeded up for the common cases.
  • The status operation was made faster in large worktrees with many
    changes.
  • The diff operation saw performance improvements when working on a
    huge number of renamed files.
  • PuTTY's plink.exe can now be used in GIT_SSH_COMMAND without
    jumping through hoops, too.
  • The MSYS2 runtime was synchronized with Cygwin 2.6.1.

Bug Fixes

  • Non-ASCII characters are now shown properly again in Git Bash.
  • Implicit NTLM authentication works again when accessing a remote
    repository via HTTP/HTTPS without having to specify empty user name
    and password.
  • Our poll() emulation now uses 64-bit tick counts to avoid the (very
    rare) wraparound issue where it could miscalculate time differences
    every 49 days.
  • The --no-lock-index option of git status is now also respected also
    in submodules.
  • The regression of v2.11.0 where Git could no longer push to shared
    folders via UNC paths is fixed.
  • A bug in the MSYS2 runtime where it performed POSIX->Windows
    argument conversion incorrectly was fixed.
  • The MSYS2 runtime was prepared to access the FAST_CWD internal data
    structure in upcoming Windows versions.
  • Fixed a bug in the experimental builtin difftool where it would not
    handle copied/renamed files properly.
Filename | SHA-256
-------- | -------
Git-2.11.0.2-64-bit.exe | c7c6f8ba88a6da491117e03df559abd0fece1352f40d8b2d9bffc9c6c12c5800
Git-2.11.0.2-32-bit.exe | f7862331c994072402f9d7f03a4a6e2caec8ce0e91581ffbc6114631e920d9c9
PortableGit-2.11.0.2-64-bit.7z.exe | d4de119186bd63535fb792d73437cd0e2eb640ad50572ef7e04013f96aa70270
PortableGit-2.11.0.2-32-bit.7z.exe | 1f871552d1736bf86b08f81a55a29ff5aba9943d3c77b16294bdffca8c066f09
MinGit-2.11.0.2-64-bit.zip | 77558f381d21175dc017e90bc9cab90c8850bff6348a7dd112780f4f1f256449
MinGit-2.11.0.2-32-bit.zip | 23167f05f1274e169d42677a99bbc3e03e879250c71d5dcce6b0fbf3164013b8
Git-2.11.0.2-64-bit.tar.bz2 | 28d6d35db9f706c0439a55183ff68d9bbef9b67d66b11ada3775b2492a1d67bb
Git-2.11.0.2-32-bit.tar.bz2 | 592bbfd08c91b5fc826116b0ee29d9949e8e95b3094b81f0d1acea045e98fb64
Ciao,
Johannes















^ permalink raw reply

* [RFC] Add support for downloading blobs on demand
From: Ben Peart @ 2017-01-13 15:52 UTC (permalink / raw)
  To: git; +Cc: benpeart

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain, Size: 4344 bytes --]

Goal
~~~~

To be able to better handle repos with many files that any individual 
developer doesn’t need it would be nice if clone/fetch only brought down 
those files that were actually needed.  

To enable that, we are proposing adding a flag to clone/fetch that will
instruct the server to limit the objects it sends to commits and trees
and to not send any blobs.  

When git performs an operation that requires a blob that isn’t currently
available locally, it will download the missing blob and add it to the
local object store.

Design
~~~~~~

Clone and fetch will pass a “--lazy-clone” flag (open to a better name 
here) similar to “--depth” that instructs the server to only return 
commits and trees and to ignore blobs.

Later during git operations like checkout, when a blob cannot be found
after checking all the regular places (loose, pack, alternates, etc), 
git will download the missing object and place it into the local object 
store (currently as a loose object) then resume the operation.

To prevent git from accidentally downloading all missing blobs, some git
operations are updated to be aware of the potential for missing blobs.  
The most obvious being check_connected which will return success as if 
everything in the requested commits is available locally.

To minimize the impact on the server, the existing dumb HTTP protocol 
endpoint “objects/<sha>” can be used to retrieve the individual missing
blobs when needed.

Performance considerations
~~~~~~~~~~~~~~~~~~~~~~~~~~

We found that downloading commits and trees on demand had a significant 
negative performance impact.  In addition, many git commands assume all 
commits and trees are available locally so they quickly got pulled down 
anyway.  Even in very large repos the commits and trees are relatively 
small so bringing them down with the initial commit and subsequent fetch 
commands was reasonable.  

After cloning, the developer can use sparse-checkout to limit the set of 
files to the subset they need (typically only 1-10% in these large 
repos).  This allows the initial checkout to only download the set of 
files actually needed to complete their task.  At any point, the 
sparse-checkout file can be updated to include additional files which 
will be fetched transparently on demand.

Typical source files are relatively small so the overhead of connecting 
and authenticating to the server for a single file at a time is 
substantial.  As a result, having a long running process that is started 
with the first request and can cache connection information between 
requests is a significant performance win.

Now some numbers
~~~~~~~~~~~~~~~~

One repo has 3+ million files at tip across 500K folders with 5-6K 
active developers.  They have done a lot of work to remove large files 
from the repo so it is down to < 100GB.

Before changes: clone took hours to transfer the 87GB .pack + 119MB .idx

After changes: clone took 4 minutes to transfer 305MB .pack + 37MB .idx

After hydrating 35K files (the typical number any individual developer 
needs to do their work), there was an additional 460 MB of loose files 
downloaded.

Total savings: 86.24 GB * 6000 developers = 517 Terabytes saved!

We have another repo (3.1 M files, 618 GB at tip with no history with 
3K+ active developers) where the savings are even greater.

Future Work
~~~~~~~~~~~

The current prototype calls a new hook proc in sha1_object_info_extended 
and read_object, to download each missing blob.  A better solution would 
be to implement this via a long running process that is spawned on the 
first download and listens for requests to download additional objects 
until it terminates when the parent git operation exits (similar to the 
recent long running smudge and clean filter work).

Need to do more investigation into possible code paths that can trigger 
unnecessary blobs to be downloaded.  For example, we have determined 
that the rename detection logic in status can also trigger unnecessary 
blobs to be downloaded making status slow.

Need to investigate an alternate batching scheme where we can make a 
single request for a set of "related" blobs and receive single a 
packfile (especially during checkout).

Need to investigate adding a new endpoint in the smart protocol that can 
download both individual blobs as well as a batch of blobs.

^ permalink raw reply

* [PATCH 2/3] xdiff: -W: include immediately preceding non-empty lines in context
From: Vegard Nossum @ 2017-01-13 16:15 UTC (permalink / raw)
  To: Junio C Hamano, git; +Cc: Vegard Nossum, René Scharfe
In-Reply-To: <1484324112-17773-1-git-send-email-vegard.nossum@oracle.com>

When using -W to include the whole function in the diff context, you
are typically doing this to be able to review the change in its entirety
within the context of the function. It is therefore almost always
desirable to include any comments that immediately precede the function.

This also the fixes the case for C where the declaration is split across
multiple lines (where the first line of the declaration would not be
included in the output), e.g.:

	void
	dummy(void)
	{
		...
	}

We can include these lines by simply scanning upwards from the place of
the detected function start until we hit the first non-blank line.

Cc: René Scharfe <l.s.r@web.de>
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
---
 xdiff/xemit.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 8c88dbd..3a3060d 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -200,6 +200,8 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			}
 
 			fs1 = get_func_line(xe, xecfg, NULL, i1, -1);
+			while (fs1 > 0 && !is_empty_rec(&xe->xdf1, fs1 - 1))
+				fs1--;
 			if (fs1 < 0)
 				fs1 = 0;
 			if (fs1 < s1) {
@@ -220,6 +222,8 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			long fe1 = get_func_line(xe, xecfg, NULL,
 						 xche->i1 + xche->chg1,
 						 xe->xdf1.nrec);
+			while (fe1 > 0 && !is_empty_rec(&xe->xdf1, fe1 - 1))
+				fe1--;
 			while (fe1 > 0 && is_empty_rec(&xe->xdf1, fe1 - 1))
 				fe1--;
 			if (fe1 < 0)
-- 
2.7.4


^ permalink raw reply related

* [PATCH 1/3] xdiff: -W: relax end-of-file function detection
From: Vegard Nossum @ 2017-01-13 16:15 UTC (permalink / raw)
  To: Junio C Hamano, git; +Cc: Vegard Nossum, René Scharfe

When adding a new function to the end of a file, it's enough to know
that 1) the addition is at the end of the file; and 2) there is a
function _somewhere_ in there.

If we had simply been changing the end of an existing function, then we
would also be deleting something from the old version.

This fixes the case where we add e.g.

	// Begin of dummy
	static int dummy(void)
	{
	}

to the end of the file.

Cc: René Scharfe <l.s.r@web.de>
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
---
 xdiff/xemit.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 7389ce4..8c88dbd 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -183,16 +183,14 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 
 				/*
 				 * We don't need additional context if
-				 * a whole function was added, possibly
-				 * starting with empty lines.
+				 * a whole function was added.
 				 */
-				while (i2 < xe->xdf2.nrec &&
-				       is_empty_rec(&xe->xdf2, i2))
+				while (i2 < xe->xdf2.nrec) {
+					if (match_func_rec(&xe->xdf2, xecfg, i2,
+						dummy, sizeof(dummy)) >= 0)
+						goto post_context_calculation;
 					i2++;
-				if (i2 < xe->xdf2.nrec &&
-				    match_func_rec(&xe->xdf2, xecfg, i2,
-						   dummy, sizeof(dummy)) >= 0)
-					goto post_context_calculation;
+				}
 
 				/*
 				 * Otherwise get more context from the
-- 
2.7.4


^ permalink raw reply related

* [PATCH 3/3] t/t4051-diff-function-context: improve tests for new diff -W behaviour
From: Vegard Nossum @ 2017-01-13 16:15 UTC (permalink / raw)
  To: Junio C Hamano, git; +Cc: Vegard Nossum, René Scharfe
In-Reply-To: <1484324112-17773-1-git-send-email-vegard.nossum@oracle.com>

We now include non-empty lines immediately before (and after) a function
as belonging to the function.

We can test this new functionality by moving the "// Begin" markers on
each function to the previous line.

This commit is intentionally not part of the previous commits in order
to show that the tests do not break even when changing the behaviour of
'diff -W' in the previous commits.

Cc: René Scharfe <l.s.r@web.de>
Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
---
 t/t4051-diff-function-context.sh | 2 +-
 t/t4051/appended1.c              | 3 ++-
 t/t4051/dummy.c                  | 3 ++-
 t/t4051/hello.c                  | 3 ++-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/t/t4051-diff-function-context.sh b/t/t4051-diff-function-context.sh
index 6154acb..d1a646f 100755
--- a/t/t4051-diff-function-context.sh
+++ b/t/t4051-diff-function-context.sh
@@ -72,7 +72,7 @@ test_expect_success 'setup' '
 
 	# overlap function context of 1st change and -u context of 2nd change
 	grep -v "delete me from hello" <"$dir/hello.c" >file.c &&
-	sed 2p <"$dir/dummy.c" >>file.c &&
+	sed 3p <"$dir/dummy.c" >>file.c &&
 	commit_and_tag changed_hello_dummy file.c &&
 
 	git checkout initial &&
diff --git a/t/t4051/appended1.c b/t/t4051/appended1.c
index a9f56f1..8683983 100644
--- a/t/t4051/appended1.c
+++ b/t/t4051/appended1.c
@@ -1,5 +1,6 @@
 
-int appended(void) // Begin of first part
+// Begin of first part
+int appended(void)
 {
 	int i;
 	char *s = "a string";
diff --git a/t/t4051/dummy.c b/t/t4051/dummy.c
index a43016e..db227a8 100644
--- a/t/t4051/dummy.c
+++ b/t/t4051/dummy.c
@@ -1,5 +1,6 @@
 
-static int dummy(void)	// Begin of dummy
+// Begin of dummy
+static int dummy(void)
 {
 	int rc = 0;
 
diff --git a/t/t4051/hello.c b/t/t4051/hello.c
index 63b1a1e..75eac1d 100644
--- a/t/t4051/hello.c
+++ b/t/t4051/hello.c
@@ -1,5 +1,6 @@
 
-static void hello(void)	// Begin of hello
+// Begin of hello
+static void hello(void)
 {
 	/*
 	 * Classic.
-- 
2.7.4


^ permalink raw reply related

* Re: [PATCH 2/2] Use 'env' to find perl instead of fixed path
From: Eric Wong @ 2017-01-13 16:58 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Pat Pannuto, Junio C Hamano, Johannes Sixt, git
In-Reply-To: <alpine.DEB.2.20.1701131626160.3469@virtualbox>

Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> I guess I do not understand, still, what the difference is between using
> -w and adding `use warnings` *very early* in the script... Could you give
> an example where it makes a difference?

"use warnings" won't leak across files/modules.  In the following
example, only the "useless use of join or string in void context"
from void.perl gets shown w/o -w.  The VoidExample.pm warning
can get lost.

----- VoidExample.pm ------
package VoidExample;
use strict;
# use warnings; # uncomment to trigger warning on next line:
join('', qw(a b c));

1;
------ void.perl ------
#!/usr/bin/perl
use strict;
use warnings;
use VoidExample;

join('', qw(a b c)); # warns
----------8<----------

$ perl -I . void.perl    # 1 warning
$ perl -w -I . void.perl # 2 warnings

^ permalink raw reply

* Re: [PATCH 2/2] Use 'env' to find perl instead of fixed path
From: Johannes Schindelin @ 2017-01-13 17:13 UTC (permalink / raw)
  To: Eric Wong; +Cc: Pat Pannuto, Junio C Hamano, Johannes Sixt, git
In-Reply-To: <20170113165819.GA6069@starla>

Hi Eric,

On Fri, 13 Jan 2017, Eric Wong wrote:

> Johannes Schindelin <Johannes.Schindelin@gmx.de> wrote:
> > I guess I do not understand, still, what the difference is between using
> > -w and adding `use warnings` *very early* in the script... Could you give
> > an example where it makes a difference?
> 
> "use warnings" won't leak across files/modules.  In the following
> example, only the "useless use of join or string in void context"
> from void.perl gets shown w/o -w.  The VoidExample.pm warning
> can get lost.

Okay, thanks!
Dscho

^ permalink raw reply

* Re: [RFC/PATCH 0/4] working tree operations: support superprefix
From: Brian J. Davis @ 2017-01-13 17:56 UTC (permalink / raw)
  To: sbeller; +Cc: bmwill, git, novalis
In-Reply-To: <152c0fbf-084c-847f-2a30-a45ea3dd26f2@gmail.com>

Resending original as plain text as git@verger.kernel.org rejected HTML 
encoding as potential virus.  Apologies for dupes in inbox. Hopefully 
this works.

In response to a post at:

https://groups.google.com/forum/#!topic/git-users/BVLcKHhSUKo

I was asked to post here to explain a potential problem with current 
modules implementation.  Apologies if I am posting in the wrong place... 
so good bad or otherwise here goes:

+-------------------------------
With:

git push origin branchname

or

git push server2 branchname

I can push or pull from multiple servers so no problem as git supports this.

Now the problem with use of submodules

submodules are listed:

submodule.directory_to_
checkout/proj1_dir.url=https://git.someserver1/git/proj1_dir 
<https://git.someserver1/git/proj1_dir>

but if say I want to pull from some server 2 and do a

git submodule update --init --recursive

what I would get is from someserver1 not someserver2 as there is no 
"origin" support for submodules.  Is this correct?  If so can origin 
support be added to submodules?
+-------------------------------

So above was post from google group.  Maybe above is enough to explain 
the problem that can result, but if not here is a discussion as to why 
this can be confusing resulting to pushing or pulling from the incorrect 
server.

Lets say projects starts at origin on https://server0.org. Project is 
then brought in house to server https://indhouse.corp by developer A.

Developer A then changes the submodule projects to point to indhouse and 
changes submodules in super project to point to indhouse so everything 
is great.

Then Developer B clones from indhouse makes changes to submodule1 and 
submodule1 and pushes them to indhouse.  Dev A tells Dev B hey thoes 
changes are great why don't you push to public server0 so every one can 
get thoes changes.  Dev B then git push origin branch_name on submodule1 
and push origin on submodule 2 and superproject.  And everything is 
great ... right?

Yes by now those who know git know what dev B  forgot or more to the 
point what git does not support in a "clean" way.  For those who don't 
enter the life of dev C

So dev C clones from server0 and performs a git submodule update --init 
--recursive.  Now Dev C is on the www and can't see indhouse.corp and 
... kerpow... Dev B and Dev C just experienced one of the many SW mines 
on the battlefield.

I ask that git devs first see if I am correct with this assessment as I 
could be wrong... maybe there is a way of doing this... and if not add a 
feature to git to handle submodules and multiple origins cleanly.... and 
yes beating dev B with rubber chicken might be a solution though I am 
looking for a slightly better option.

^ permalink raw reply

* [PATCH 0/6] loose-object fsck fixes/tightening
From: Jeff King @ 2017-01-13 17:52 UTC (permalink / raw)
  To: John Szakmeister; +Cc: Dennis Kaarsemaker, git
In-Reply-To: <CAEBDL5Vf=rvb4fZF87pNYci4sicmzhS_qPJYHHOGcnPTMBhhWg@mail.gmail.com>

On Fri, Jan 13, 2017 at 04:15:42AM -0500, John Szakmeister wrote:

> > I did notice another interesting case when looking at this. Fsck ends up
> > in fsck_loose(), which has the sha1 and path of the loose object. It
> > passes the sha1 to fsck_sha1(), and ignores the path entirely!
> >
> > So if you have a duplicate copy of the object in a pack, we'd actually
> > find and check the duplicate. This can happen, e.g., if you had a loose
> > object and fetched a thin-pack which made a copy of the loose object to
> > complete the pack).
> >
> > Probably fsck_loose() should be more picky about making sure we are
> > reading the data from the loose version we found.
> 
> Interesting find!  Thanks for the information Peff!

So I figured I would knock this out as a fun morning exercise. But
sheesh, it turned out to be a slog, because most of the functions rely
on map_sha1_file() to convert the sha1 to an object path at the lowest
level.

But I finally got something working, so here it is. I found another bug
on the way, along with a few cleanups. And then I did the trailing
garbage detection at the end, because by that point I knew right where
it needed to go. :)

  [1/6]: t1450: refactor loose-object removal
  [2/6]: sha1_file: fix error message for alternate objects
  [3/6]: t1450: test fsck of packed objects
  [4/6]: sha1_file: add read_loose_object() function
  [5/6]: fsck: parse loose object paths directly
  [6/6]: fsck: detect trailing garbage in all object types

 builtin/fsck.c  |  46 +++++++++++----
 cache.h         |  13 ++++
 sha1_file.c     | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 t/t1450-fsck.sh |  86 +++++++++++++++++++++++----
 4 files changed, 284 insertions(+), 41 deletions(-)

-Peff

^ permalink raw reply

* [PATCH 1/6] t1450: refactor loose-object removal
From: Jeff King @ 2017-01-13 17:54 UTC (permalink / raw)
  To: John Szakmeister; +Cc: Dennis Kaarsemaker, git
In-Reply-To: <20170113175258.e66taigy4wpokohk@sigill.intra.peff.net>

Commit 90cf590f5 (fsck: optionally show more helpful info
for broken links, 2016-07-17) added a remove_loose_object()
helper, but we already had a remove_object() helper that did
the same thing. Let's combine these into one.

The implementations had a few subtle differences, so I've
tried to take the best of both:

  - the original used "sed", but the newer version avoids
    spawning an extra process

  - the original processed "$*", which was nonsense, as it
    assumed only a single sha1. Use "$1" to make that more
    clear.

  - the newer version ran an extra rev-parse, but it was not
    necessary; it's sole caller already converted the
    argument into a raw sha1

  - the original used "rm -f", whereas the new one uses
    "rm". The latter is better because it may notice a bug
    or other unexpected failure in the test. (The original
    does check that the object exists before we remove it,
    which is good, but that's a subset of the possible
    unexpected conditions).

Signed-off-by: Jeff King <peff@peff.net>
---
 t/t1450-fsck.sh | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index ee7d4736d..3297d4cb2 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -43,13 +43,13 @@ test_expect_success 'HEAD is part of refs, valid objects appear valid' '
 
 test_expect_success 'setup: helpers for corruption tests' '
 	sha1_file() {
-		echo "$*" | sed "s#..#.git/objects/&/#"
+		remainder=${1#??} &&
+		firsttwo=${1%$remainder} &&
+		echo ".git/objects/$firsttwo/$remainder"
 	} &&
 
 	remove_object() {
-		file=$(sha1_file "$*") &&
-		test -e "$file" &&
-		rm -f "$file"
+		rm "$(sha1_file "$1")"
 	}
 '
 
@@ -535,13 +535,6 @@ test_expect_success 'fsck --connectivity-only' '
 	)
 '
 
-remove_loose_object () {
-	sha1="$(git rev-parse "$1")" &&
-	remainder=${sha1#??} &&
-	firsttwo=${sha1%$remainder} &&
-	rm .git/objects/$firsttwo/$remainder
-}
-
 test_expect_success 'fsck --name-objects' '
 	rm -rf name-objects &&
 	git init name-objects &&
@@ -550,7 +543,7 @@ test_expect_success 'fsck --name-objects' '
 		test_commit julius caesar.t &&
 		test_commit augustus &&
 		test_commit caesar &&
-		remove_loose_object $(git rev-parse julius:caesar.t) &&
+		remove_object $(git rev-parse julius:caesar.t) &&
 		test_must_fail git fsck --name-objects >out &&
 		tree=$(git rev-parse --verify julius:) &&
 		grep "$tree (\(refs/heads/master\|HEAD\)@{[0-9]*}:" out
-- 
2.11.0.629.g10075098c


^ permalink raw reply related

* [PATCH 2/6] sha1_file: fix error message for alternate objects
From: Jeff King @ 2017-01-13 17:54 UTC (permalink / raw)
  To: John Szakmeister; +Cc: Dennis Kaarsemaker, git
In-Reply-To: <20170113175258.e66taigy4wpokohk@sigill.intra.peff.net>

When we fail to open a corrupt loose object, we report an
error and mention the filename via sha1_file_name().
However, that function will always give us a path in the
local repository, whereas the corrupt object may have come
from an alternate. The result is a very misleading error
message.

Teach the open_sha1_file() and stat_sha1_file() helpers to
pass back the path they found, so that we can report it
correctly.

Note that the pointers we return go to static storage (e.g.,
from sha1_file_name()), which is slightly dangerous.
However, these helpers are static local helpers, and the
names are used for immediately generating error messages.
The simplicity is an acceptable tradeoff for the danger.

Signed-off-by: Jeff King <peff@peff.net>
---
 sha1_file.c     | 46 +++++++++++++++++++++++++++++++---------------
 t/t1450-fsck.sh | 10 ++++++++++
 2 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 1eb47f611..c6b990f41 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1630,39 +1630,54 @@ int git_open_cloexec(const char *name, int flags)
 	return fd;
 }
 
-static int stat_sha1_file(const unsigned char *sha1, struct stat *st)
+/*
+ * Find "sha1" as a loose object in the local repository or in an alternate.
+ * Returns 0 on success, negative on failure.
+ *
+ * The "path" out-parameter will give the path of the object we found (if any).
+ * Note that it may point to static storage and is only valid until another
+ * call to sha1_file_name(), etc.
+ */
+static int stat_sha1_file(const unsigned char *sha1, struct stat *st,
+			  const char **path)
 {
 	struct alternate_object_database *alt;
 
-	if (!lstat(sha1_file_name(sha1), st))
+	*path = sha1_file_name(sha1);
+	if (!lstat(*path, st))
 		return 0;
 
 	prepare_alt_odb();
 	errno = ENOENT;
 	for (alt = alt_odb_list; alt; alt = alt->next) {
-		const char *path = alt_sha1_path(alt, sha1);
-		if (!lstat(path, st))
+		*path = alt_sha1_path(alt, sha1);
+		if (!lstat(*path, st))
 			return 0;
 	}
 
 	return -1;
 }
 
-static int open_sha1_file(const unsigned char *sha1)
+/*
+ * Like stat_sha1_file(), but actually open the object and return the
+ * descriptor. See the caveats on the "path" parameter above.
+ */
+static int open_sha1_file(const unsigned char *sha1, const char **path)
 {
 	int fd;
 	struct alternate_object_database *alt;
 	int most_interesting_errno;
 
-	fd = git_open(sha1_file_name(sha1));
+	*path = sha1_file_name(sha1);
+	fd = git_open(*path);
 	if (fd >= 0)
 		return fd;
 	most_interesting_errno = errno;
 
 	prepare_alt_odb();
 	for (alt = alt_odb_list; alt; alt = alt->next) {
-		const char *path = alt_sha1_path(alt, sha1);
-		fd = git_open(path);
+		*path = alt_sha1_path(alt, sha1);
+		fd = git_open(*path);
 		if (fd >= 0)
 			return fd;
 		if (most_interesting_errno == ENOENT)
@@ -1674,10 +1689,11 @@ static int open_sha1_file(const unsigned char *sha1)
 
 void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 {
+	const char *path;
 	void *map;
 	int fd;
 
-	fd = open_sha1_file(sha1);
+	fd = open_sha1_file(sha1, &path);
 	map = NULL;
 	if (fd >= 0) {
 		struct stat st;
@@ -1686,7 +1702,7 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 			*size = xsize_t(st.st_size);
 			if (!*size) {
 				/* mmap() is forbidden on empty files */
-				error("object file %s is empty", sha1_file_name(sha1));
+				error("object file %s is empty", path);
 				return NULL;
 			}
 			map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
@@ -2806,8 +2822,9 @@ static int sha1_loose_object_info(const unsigned char *sha1,
 	 * object even exists.
 	 */
 	if (!oi->typep && !oi->typename && !oi->sizep) {
+		const char *path;
 		struct stat st;
-		if (stat_sha1_file(sha1, &st) < 0)
+		if (stat_sha1_file(sha1, &st, &path) < 0)
 			return -1;
 		if (oi->disk_sizep)
 			*oi->disk_sizep = st.st_size;
@@ -3003,6 +3020,8 @@ void *read_sha1_file_extended(const unsigned char *sha1,
 {
 	void *data;
 	const struct packed_git *p;
+	const char *path;
+	struct stat st;
 	const unsigned char *repl = lookup_replace_object_extended(sha1, flag);
 
 	errno = 0;
@@ -3018,12 +3037,9 @@ void *read_sha1_file_extended(const unsigned char *sha1,
 		die("replacement %s not found for %s",
 		    sha1_to_hex(repl), sha1_to_hex(sha1));
 
-	if (has_loose_object(repl)) {
-		const char *path = sha1_file_name(sha1);
-
+	if (!stat_sha1_file(repl, &st, &path))
 		die("loose object %s (stored in %s) is corrupt",
 		    sha1_to_hex(repl), path);
-	}
 
 	if ((p = has_packed_and_bad(repl)) != NULL)
 		die("packed object %s (stored in %s) is corrupt",
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 3297d4cb2..f95174c9d 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -550,4 +550,14 @@ test_expect_success 'fsck --name-objects' '
 	)
 '
 
+test_expect_success 'alternate objects are correctly blamed' '
+	test_when_finished "rm -rf alt.git .git/objects/info/alternates" &&
+	git init --bare alt.git &&
+	echo "../../alt.git/objects" >.git/objects/info/alternates &&
+	mkdir alt.git/objects/12 &&
+	>alt.git/objects/12/34567890123456789012345678901234567890 &&
+	test_must_fail git fsck >out 2>&1 &&
+	grep alt.git out
+'
+
 test_done
-- 
2.11.0.629.g10075098c


^ permalink raw reply related

* [PATCH 3/6] t1450: test fsck of packed objects
From: Jeff King @ 2017-01-13 17:55 UTC (permalink / raw)
  To: John Szakmeister; +Cc: Dennis Kaarsemaker, git
In-Reply-To: <20170113175258.e66taigy4wpokohk@sigill.intra.peff.net>

The code paths in fsck for packed and loose objects are
quite different, and it is not immediately obvious that the
packed case behaves well. In particular:

  1. The fsck_loose() function always returns "0" to tell the
     iterator to keep checking more objects. Whereas
     fsck_obj_buffer() (which handles packed objects)
     returns -1. This is OK, because the callback machinery
     for verify_pack() does not stop when it sees a non-zero
     return.

  2. The fsck_loose() function sets the ERROR_OBJECT bit
     when fsck_obj() fails, whereas fsck_obj_buffer() sets it
     only when it sees a corrupt object. This turns out not
     to matter. We don't actually do anything with this bit
     except exit the program with a non-zero code, and that
     is handled already by the non-zero return from the
     function.

So there are no bugs here, but it was certainly confusing to
me. And we do not test either of the properties in t1450
(neither that a non-corruption error will caused a non-zero
exit for a packed object, nor that we keep going after
seeing the first error). Let's test both of those
conditions, so that we'll notice if any of those assumptions
becomes invalid.

Signed-off-by: Jeff King <peff@peff.net>
---
 t/t1450-fsck.sh | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index f95174c9d..c39d42120 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -560,4 +560,25 @@ test_expect_success 'alternate objects are correctly blamed' '
 	grep alt.git out
 '
 
+test_expect_success 'fsck errors in packed objects' '
+	git cat-file commit HEAD >basis &&
+	sed "s/</one/" basis >one &&
+	sed "s/</foo/" basis >two &&
+	one=$(git hash-object -t commit -w one) &&
+	two=$(git hash-object -t commit -w two) &&
+	pack=$(
+		{
+			echo $one &&
+			echo $two
+		} | git pack-objects .git/objects/pack/pack
+	) &&
+	test_when_finished "rm -f .git/objects/pack/pack-$pack.*" &&
+	remove_object $one &&
+	remove_object $two &&
+	test_must_fail git fsck 2>out &&
+	grep "error in commit $one.* - bad name" out &&
+	grep "error in commit $two.* - bad name" out &&
+	! grep corrupt out
+'
+
 test_done
-- 
2.11.0.629.g10075098c


^ permalink raw reply related

* Re: [PATCH 1/3] xdiff: -W: relax end-of-file function detection
From: René Scharfe @ 2017-01-13 17:49 UTC (permalink / raw)
  To: Vegard Nossum, Junio C Hamano, git
In-Reply-To: <1484324112-17773-1-git-send-email-vegard.nossum@oracle.com>

Am 13.01.2017 um 17:15 schrieb Vegard Nossum:
> When adding a new function to the end of a file, it's enough to know
> that 1) the addition is at the end of the file; and 2) there is a
> function _somewhere_ in there.
>
> If we had simply been changing the end of an existing function, then we
> would also be deleting something from the old version.

That makes sense, thanks.

> This fixes the case where we add e.g.
>
> 	// Begin of dummy
> 	static int dummy(void)
> 	{
> 	}
>
> to the end of the file.

Without this patch the unchanged function before the added lines is 
shown in its entirety as (uncalled for) context.

>
> Cc: René Scharfe <l.s.r@web.de>
> Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com>
> ---
>  xdiff/xemit.c | 14 ++++++--------
>  1 file changed, 6 insertions(+), 8 deletions(-)
>
> diff --git a/xdiff/xemit.c b/xdiff/xemit.c
> index 7389ce4..8c88dbd 100644
> --- a/xdiff/xemit.c
> +++ b/xdiff/xemit.c
> @@ -183,16 +183,14 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
>
>  				/*
>  				 * We don't need additional context if
> -				 * a whole function was added, possibly
> -				 * starting with empty lines.
> +				 * a whole function was added.
>  				 */
> -				while (i2 < xe->xdf2.nrec &&
> -				       is_empty_rec(&xe->xdf2, i2))
> +				while (i2 < xe->xdf2.nrec) {
> +					if (match_func_rec(&xe->xdf2, xecfg, i2,
> +						dummy, sizeof(dummy)) >= 0)

Nit: I don't like the indentation here.  Giving "dummy" its own line is 
also not exactly pretty, but at least would allow the parameters to be 
aligned on the opening parenthesis.

> +						goto post_context_calculation;
>  					i2++;
> -				if (i2 < xe->xdf2.nrec &&
> -				    match_func_rec(&xe->xdf2, xecfg, i2,
> -						   dummy, sizeof(dummy)) >= 0)
> -					goto post_context_calculation;
> +				}
>
>  				/*
>  				 * Otherwise get more context from the
>

^ permalink raw reply

* [PATCHv3 2/2] builtin/commit.c: switch to xstrfmt(), instead of snprintf,
From: Elia Pinto @ 2017-01-13 17:58 UTC (permalink / raw)
  To: git; +Cc: Elia Pinto
In-Reply-To: <20170113175801.39468-1-gitter.spiros@gmail.com>

In this patch, instead of using xnprintf instead of snprintf, which asserts
that we don't truncate, we are switching to dynamic allocation with  xstrfmt(),
, so we can avoid dealing with magic numbers in the code and reduce the cognitive burden from
the programmers, because they no longer have to count bytes needed for static allocation.
As a side effect of this patch we have also reduced the snprintf() calls, that may silently truncate 
results if the programmer is not careful.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Jeff King <peff@peff.net> 
Signed-off-by: Elia Pinto <gitter.spiros@gmail.com>
---
This is the third  version of the patch.

Changes from the first version: I have split the original commit in two, as discussed here
http://public-inbox.org/git/20161213132717.42965-1-gitter.spiros@gmail.com/.

Changes from the second version:
- Changed the commit message to clarify the purpose of the patch (
as suggested by Junio)
https://public-inbox.org/git/xmqqtw95mfo3.fsf@gitster.mtv.corp.google.com/T/#m2e6405a8a78a8ca1ed770614c91398290574c4a1



 builtin/commit.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 09bcc0f13..37228330c 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1526,12 +1526,10 @@ static int git_commit_config(const char *k, const char *v, void *cb)
 static int run_rewrite_hook(const unsigned char *oldsha1,
 			    const unsigned char *newsha1)
 {
-	/* oldsha1 SP newsha1 LF NUL */
-	static char buf[2*40 + 3];
+	char *buf;
 	struct child_process proc = CHILD_PROCESS_INIT;
 	const char *argv[3];
 	int code;
-	size_t n;
 
 	argv[0] = find_hook("post-rewrite");
 	if (!argv[0])
@@ -1547,11 +1545,11 @@ static int run_rewrite_hook(const unsigned char *oldsha1,
 	code = start_command(&proc);
 	if (code)
 		return code;
-	n = snprintf(buf, sizeof(buf), "%s %s\n",
-		     sha1_to_hex(oldsha1), sha1_to_hex(newsha1));
+	buf = xstrfmt("%s %s\n", sha1_to_hex(oldsha1), sha1_to_hex(newsha1));
 	sigchain_push(SIGPIPE, SIG_IGN);
-	write_in_full(proc.in, buf, n);
+	write_in_full(proc.in, buf, strlen(buf));
 	close(proc.in);
+	free(buf);
 	sigchain_pop(SIGPIPE);
 	return finish_command(&proc);
 }
-- 
2.11.0.154.g5f5f154


^ permalink raw reply related

* [PATCHv3 1/2] builtin/commit.c: removes the PATH_MAX limitation via dynamic allocation
From: Elia Pinto @ 2017-01-13 17:58 UTC (permalink / raw)
  To: git; +Cc: Elia Pinto

This patch removes the PATH_MAX limitation from the environment 
setting that points to a filename, we'd want to handle larger
paths anyway, so we switch to dynamic allocation. As a side effect 
of this patch we have also reduced the snprintf() calls, that 
may silently truncate results if the programmer is not careful.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Elia Pinto <gitter.spiros@gmail.com>
---
This is the third  version of the patch.

Changes from the first version: I have split the original commit in two, as discussed here
http://public-inbox.org/git/20161213132717.42965-1-gitter.spiros@gmail.com/.

Changes from the second version:
- Changed the commit message to clarify the purpose of the patch (
as suggested by Junio)
- drop the arg_array_clear call after exit(1)
https://public-inbox.org/git/xmqqtw95mfo3.fsf@gitster.mtv.corp.google.com/T/#m2e6405a8a78a8ca1ed770614c91398290574c4a1


 builtin/commit.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 0ed634b26..09bcc0f13 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -960,15 +960,15 @@ static int prepare_to_commit(const char *index_file, const char *prefix,
 		return 0;
 
 	if (use_editor) {
-		char index[PATH_MAX];
-		const char *env[2] = { NULL };
-		env[0] =  index;
-		snprintf(index, sizeof(index), "GIT_INDEX_FILE=%s", index_file);
-		if (launch_editor(git_path_commit_editmsg(), NULL, env)) {
+		struct argv_array env = ARGV_ARRAY_INIT;
+
+		argv_array_pushf(&env, "GIT_INDEX_FILE=%s", index_file);
+		if (launch_editor(git_path_commit_editmsg(), NULL, env.argv)) {
 			fprintf(stderr,
 			_("Please supply the message using either -m or -F option.\n"));
 			exit(1);
 		}
+		argv_array_clear(&env);
 	}
 
 	if (!no_verify &&
@@ -1557,23 +1557,22 @@ static int run_rewrite_hook(const unsigned char *oldsha1,
 
 int run_commit_hook(int editor_is_used, const char *index_file, const char *name, ...)
 {
-	const char *hook_env[3] =  { NULL };
-	char index[PATH_MAX];
+	struct argv_array hook_env = ARGV_ARRAY_INIT;
 	va_list args;
 	int ret;
 
-	snprintf(index, sizeof(index), "GIT_INDEX_FILE=%s", index_file);
-	hook_env[0] = index;
+	argv_array_pushf(&hook_env, "GIT_INDEX_FILE=%s", index_file);
 
 	/*
 	 * Let the hook know that no editor will be launched.
 	 */
 	if (!editor_is_used)
-		hook_env[1] = "GIT_EDITOR=:";
+		argv_array_push(&hook_env, "GIT_EDITOR=:");
 
 	va_start(args, name);
-	ret = run_hook_ve(hook_env, name, args);
+	ret = run_hook_ve(hook_env.argv,name, args);
 	va_end(args);
+	argv_array_clear(&hook_env);
 
 	return ret;
 }
-- 
2.11.0.154.g5f5f154


^ permalink raw reply related

* [PATCH 4/6] sha1_file: add read_loose_object() function
From: Jeff King @ 2017-01-13 17:58 UTC (permalink / raw)
  To: John Szakmeister; +Cc: Dennis Kaarsemaker, git
In-Reply-To: <20170113175258.e66taigy4wpokohk@sigill.intra.peff.net>

It's surprisingly hard to ask the sha1_file code to open a
_specific_ incarnation of a loose object. Most of the
functions take a sha1, and loop over the various object
types (packed versus loose) and locations (local versus
alternates) at a low level.

However, some tools like fsck need to look at a specific
file. This patch gives them a function they can use to open
the loose object at a given path.

The implementation unfortunately ends up repeating bits of
related functions, but there's not a good way around it
without some major refactoring of the whole sha1_file stack.
We need to mmap the specific file, then partially read the
zlib stream to know whether we're streaming or not, and then
finally either stream it or copy the data to a buffer.

We can do that by assembling some of the more arcane
internal sha1_file functions, but we end up having to
essentially reimplement unpack_sha1_file(), along with the
streaming bits of check_sha1_signature().

Still, most of the ugliness is contained in the new
function, and the interface is clean enough that it may be
reusable (though it seems unlikely anything but git-fsck
would care about opening a specific file).

Signed-off-by: Jeff King <peff@peff.net>
---
 cache.h     |  13 ++++++
 sha1_file.c | 133 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/cache.h b/cache.h
index 1b67f078d..33f1c2fa7 100644
--- a/cache.h
+++ b/cache.h
@@ -1140,6 +1140,19 @@ extern int finalize_object_file(const char *tmpfile, const char *filename);
 
 extern int has_sha1_pack(const unsigned char *sha1);
 
+/*
+ * Open the loose object at path, check its sha1, and return the contents,
+ * type, and size. If the object is a blob, then "contents" may return NULL,
+ * to allow streaming of large blobs.
+ *
+ * Returns 0 on success, negative on error (details may be written to stderr).
+ */
+int read_loose_object(const char *path,
+		      const unsigned char *expected_sha1,
+		      enum object_type *type,
+		      unsigned long *size,
+		      void **contents);
+
 /*
  * Return true iff we have an object named sha1, whether local or in
  * an alternate object database, and whether packed or loose.  This
diff --git a/sha1_file.c b/sha1_file.c
index c6b990f41..c0fccb73c 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1687,13 +1687,21 @@ static int open_sha1_file(const unsigned char *sha1, const char **path)
 	return -1;
 }
 
-void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+/*
+ * Map the loose object at "path" if it is not NULL, or the path found by
+ * searching for a loose object named "sha1".
+ */
+static void *map_sha1_file_1(const char *path,
+			     const unsigned char *sha1,
+			     unsigned long *size)
 {
-	const char *path;
 	void *map;
 	int fd;
 
-	fd = open_sha1_file(sha1, &path);
+	if (path)
+		fd = git_open(path);
+	else
+		fd = open_sha1_file(sha1, &path);
 	map = NULL;
 	if (fd >= 0) {
 		struct stat st;
@@ -1712,6 +1720,11 @@ void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
 	return map;
 }
 
+void *map_sha1_file(const unsigned char *sha1, unsigned long *size)
+{
+	return map_sha1_file_1(NULL, sha1, size);
+}
+
 unsigned long unpack_object_header_buffer(const unsigned char *buf,
 		unsigned long len, enum object_type *type, unsigned long *sizep)
 {
@@ -3809,3 +3822,117 @@ int for_each_packed_object(each_packed_object_fn cb, void *data, unsigned flags)
 	}
 	return r ? r : pack_errors;
 }
+
+static int check_stream_sha1(git_zstream *stream,
+			     const char *hdr,
+			     unsigned long size,
+			     const char *path,
+			     const unsigned char *expected_sha1)
+{
+	git_SHA_CTX c;
+	unsigned char real_sha1[GIT_SHA1_RAWSZ];
+	unsigned char buf[4096];
+	unsigned long total_read;
+	int status = Z_OK;
+
+	git_SHA1_Init(&c);
+	git_SHA1_Update(&c, hdr, stream->total_out);
+
+	/*
+	 * We already read some bytes into hdr, but the ones up to the NUL
+	 * do not count against the object's content size.
+	 */
+	total_read = stream->total_out - strlen(hdr) - 1;
+
+	/*
+	 * This size comparison must be "<=" to read the final zlib packets;
+	 * see the comment in unpack_sha1_rest for details.
+	 */
+	while (total_read <= size &&
+	       (status == Z_OK || status == Z_BUF_ERROR)) {
+		stream->next_out = buf;
+		stream->avail_out = sizeof(buf);
+		if (size - total_read < stream->avail_out)
+			stream->avail_out = size - total_read;
+		status = git_inflate(stream, Z_FINISH);
+		git_SHA1_Update(&c, buf, stream->next_out - buf);
+		total_read += stream->next_out - buf;
+	}
+	git_inflate_end(stream);
+
+	if (status != Z_STREAM_END) {
+		error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
+		return -1;
+	}
+
+	git_SHA1_Final(real_sha1, &c);
+	if (hashcmp(expected_sha1, real_sha1)) {
+		error("sha1 mismatch for %s (expected %s)", path,
+		      sha1_to_hex(expected_sha1));
+		return -1;
+	}
+
+	return 0;
+}
+
+int read_loose_object(const char *path,
+		      const unsigned char *expected_sha1,
+		      enum object_type *type,
+		      unsigned long *size,
+		      void **contents)
+{
+	int ret = -1;
+	int fd = -1;
+	void *map = NULL;
+	unsigned long mapsize;
+	git_zstream stream;
+	char hdr[32];
+
+	*contents = NULL;
+
+	map = map_sha1_file_1(path, NULL, &mapsize);
+	if (!map) {
+		error_errno("unable to mmap %s", path);
+		goto out;
+	}
+
+	if (unpack_sha1_header(&stream, map, mapsize, hdr, sizeof(hdr)) < 0) {
+		error("unable to unpack header of %s", path);
+		goto out;
+	}
+
+	*type = parse_sha1_header(hdr, size);
+	if (*type < 0) {
+		error("unable to parse header of %s", path);
+		git_inflate_end(&stream);
+		goto out;
+	}
+
+	if (*type == OBJ_BLOB) {
+		if (check_stream_sha1(&stream, hdr, *size, path, expected_sha1) < 0)
+			goto out;
+	} else {
+		*contents = unpack_sha1_rest(&stream, hdr, *size, expected_sha1);
+		if (!*contents) {
+			error("unable to unpack contents of %s", path);
+			git_inflate_end(&stream);
+			goto out;
+		}
+		if (check_sha1_signature(expected_sha1, *contents,
+					 *size, typename(*type))) {
+			error("sha1 mismatch for %s (expected %s)", path,
+			      sha1_to_hex(expected_sha1));
+			free(*contents);
+			goto out;
+		}
+	}
+
+	ret = 0; /* everything checks out */
+
+out:
+	if (map)
+		munmap(map, mapsize);
+	if (fd >= 0)
+		close(fd);
+	return ret;
+}
-- 
2.11.0.629.g10075098c


^ permalink raw reply related

* [PATCH 5/6] fsck: parse loose object paths directly
From: Jeff King @ 2017-01-13 17:59 UTC (permalink / raw)
  To: John Szakmeister; +Cc: Dennis Kaarsemaker, git
In-Reply-To: <20170113175258.e66taigy4wpokohk@sigill.intra.peff.net>

When we iterate over the list of loose objects to check, we
get the actual path of each object. But we then throw it
away and pass just the sha1 to fsck_sha1(), which will do a
fresh lookup. Usually it would find the same object, but it
may not if an object exists both as a loose and a packed
object. We may end up checking the packed object twice, and
never look at the loose one.

In practice this isn't too terrible, because if fsck doesn't
complain, it means you have at least one good copy. But
since the point of fsck is to look for corruption, we should
be thorough.

The new read_loose_object() interface can help us get the
data from disk, and then we replace parse_object() with
parse_object_buffer(). As a bonus, our error messages now
mention the path to a corrupted object, which should make it
easier to track down errors when they do happen.

Signed-off-by: Jeff King <peff@peff.net>
---
 builtin/fsck.c  | 46 +++++++++++++++++++++++++++++++++-------------
 t/t1450-fsck.sh | 16 ++++++++++++++++
 2 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/builtin/fsck.c b/builtin/fsck.c
index f01b81eeb..4b91ee95e 100644
--- a/builtin/fsck.c
+++ b/builtin/fsck.c
@@ -362,18 +362,6 @@ static int fsck_obj(struct object *obj)
 	return 0;
 }
 
-static int fsck_sha1(const unsigned char *sha1)
-{
-	struct object *obj = parse_object(sha1);
-	if (!obj) {
-		errors_found |= ERROR_OBJECT;
-		return error("%s: object corrupt or missing",
-			     sha1_to_hex(sha1));
-	}
-	obj->flags |= HAS_OBJ;
-	return fsck_obj(obj);
-}
-
 static int fsck_obj_buffer(const unsigned char *sha1, enum object_type type,
 			   unsigned long size, void *buffer, int *eaten)
 {
@@ -488,9 +476,41 @@ static void get_default_heads(void)
 	}
 }
 
+static struct object *parse_loose_object(const unsigned char *sha1,
+					 const char *path)
+{
+	struct object *obj;
+	void *contents;
+	enum object_type type;
+	unsigned long size;
+	int eaten;
+
+	if (read_loose_object(path, sha1, &type, &size, &contents) < 0)
+		return NULL;
+
+	if (!contents && type != OBJ_BLOB)
+		die("BUG: read_loose_object streamed a non-blob");
+
+	obj = parse_object_buffer(sha1, type, size, contents, &eaten);
+
+	if (!eaten)
+		free(contents);
+	return obj;
+}
+
 static int fsck_loose(const unsigned char *sha1, const char *path, void *data)
 {
-	if (fsck_sha1(sha1))
+	struct object *obj = parse_loose_object(sha1, path);
+
+	if (!obj) {
+		errors_found |= ERROR_OBJECT;
+		error("%s: object corrupt or missing: %s",
+		      sha1_to_hex(sha1), path);
+		return 0; /* keep checking other objects */
+	}
+
+	obj->flags = HAS_OBJ;
+	if (fsck_obj(obj))
 		errors_found |= ERROR_OBJECT;
 	return 0;
 }
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index c39d42120..455c186fe 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -581,4 +581,20 @@ test_expect_success 'fsck errors in packed objects' '
 	! grep corrupt out
 '
 
+test_expect_success 'fsck finds problems in duplicate loose objects' '
+	rm -rf broken-duplicate &&
+	git init broken-duplicate &&
+	(
+		cd broken-duplicate &&
+		test_commit duplicate &&
+		# no "-d" here, so we end up with duplicates
+		git repack &&
+		# now corrupt the loose copy
+		file=$(sha1_file "$(git rev-parse HEAD)") &&
+		rm "$file" &&
+		echo broken >"$file" &&
+		test_must_fail git fsck
+	)
+'
+
 test_done
-- 
2.11.0.629.g10075098c


^ permalink raw reply related

* [PATCH 6/6] fsck: detect trailing garbage in all object types
From: Jeff King @ 2017-01-13 18:00 UTC (permalink / raw)
  To: John Szakmeister; +Cc: Dennis Kaarsemaker, git
In-Reply-To: <20170113175258.e66taigy4wpokohk@sigill.intra.peff.net>

When a loose tree or commit is read by fsck (or any git
program), unpack_sha1_rest() checks whether there is extra
cruft at the end of the object file, after the zlib data.
Blobs that are streamed, however, do not have this check.

For normal git operations, it's not a big deal. We know the
sha1 and size checked out, so we have the object bytes we
wanted.  The trailing garbage doesn't affect what we're
trying to do.

But since the point of fsck is to find corruption or other
problems, it should be more thorough. This patch teaches its
loose-sha1 reader to detect extra bytes after the zlib
stream and complain.

Signed-off-by: Jeff King <peff@peff.net>
---
 sha1_file.c     |  5 +++++
 t/t1450-fsck.sh | 22 ++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/sha1_file.c b/sha1_file.c
index c0fccb73c..b77ab6d5c 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -3864,6 +3864,11 @@ static int check_stream_sha1(git_zstream *stream,
 		error("corrupt loose object '%s'", sha1_to_hex(expected_sha1));
 		return -1;
 	}
+	if (stream->avail_in) {
+		error("garbage at end of loose object '%s'",
+		      sha1_to_hex(expected_sha1));
+		return -1;
+	}
 
 	git_SHA1_Final(real_sha1, &c);
 	if (hashcmp(expected_sha1, real_sha1)) {
diff --git a/t/t1450-fsck.sh b/t/t1450-fsck.sh
index 455c186fe..8975b4d1b 100755
--- a/t/t1450-fsck.sh
+++ b/t/t1450-fsck.sh
@@ -597,4 +597,26 @@ test_expect_success 'fsck finds problems in duplicate loose objects' '
 	)
 '
 
+test_expect_success 'fsck detects trailing loose garbage (commit)' '
+	git cat-file commit HEAD >basis &&
+	echo bump-commit-sha1 >>basis &&
+	commit=$(git hash-object -w -t commit basis) &&
+	file=$(sha1_file $commit) &&
+	test_when_finished "remove_object $commit" &&
+	chmod +w "$file" &&
+	echo garbage >>"$file" &&
+	test_must_fail git fsck 2>out &&
+	test_i18ngrep "garbage.*$commit" out
+'
+
+test_expect_success 'fsck detects trailing loose garbage (blob)' '
+	blob=$(echo trailing | git hash-object -w --stdin) &&
+	file=$(sha1_file $blob) &&
+	test_when_finished "remove_object $blob" &&
+	chmod +w "$file" &&
+	echo garbage >>"$file" &&
+	test_must_fail git fsck 2>out &&
+	test_i18ngrep "garbage.*$blob" out
+'
+
 test_done
-- 
2.11.0.629.g10075098c

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox