git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] Allow git mv FileA fILEa on case ignore file systems
@ 2011-03-19 14:28 Torsten Bögershausen
  2011-03-19 18:20 ` Erik Faye-Lund
  2011-03-20  5:50 ` Junio C Hamano
  0 siblings, 2 replies; 8+ messages in thread
From: Torsten Bögershausen @ 2011-03-19 14:28 UTC (permalink / raw)
  To: kusmabite, git; +Cc: tboegi

The typical use case is when a file "FileA" should be renamed into fILEa
and we are on a case insenstive file system (system core.ignorecase = true).
Source and destination are the same file, it can be accessed under both names.
This makes git think that the destination file exists.
Unless used with --forced, git will refuse the "git mv FileA fILEa".
This change will allow "git mv FileA fILEa" under the following condition:
On Linux/Unix/Mac OS X the move is allowed when the inode of the source and
destination are equal (and they are on the same device).
This allows renames of MÄRCHEN into Märchen on Mac OS X.
(As a side effect, a file can be renamed to a name which is already
hard-linked to the same inode).
On Windows, the function win_is_same_file() from compat/win32/same-file.c
is used.
It calls GetFileInformationByHandle() to check if both files are
"the same".

Signed-off-by: Torsten Bögershausen <tboegi@web.de>
---
 Makefile                 |    8 +++++---
 builtin/mv.c             |    2 +-
 compat/win32/same-file.c |   26 ++++++++++++++++++++++++++
 git-compat-util.h        |   15 +++++++++++++++
 t/t7001-mv.sh            |   29 +++++++++++++++++++++++++++++
 5 files changed, 76 insertions(+), 4 deletions(-)
 create mode 100644 compat/win32/same-file.c

diff --git a/Makefile b/Makefile
index 5c2b797..55b9a05 100644
--- a/Makefile
+++ b/Makefile
@@ -924,7 +924,7 @@ ifeq ($(uname_O),Cygwin)
 	# Try commenting this out if you suspect MMAP is more efficient
 	NO_MMAP = YesPlease
 	X = .exe
-	COMPAT_OBJS += compat/cygwin.o
+	COMPAT_OBJS += compat/cygwin.o compat/win32/same-file.o
 	UNRELIABLE_FSTAT = UnfortunatelyYes
 endif
 ifeq ($(uname_S),FreeBSD)
@@ -1104,7 +1104,8 @@ ifeq ($(uname_S),Windows)
 	BASIC_CFLAGS = -nologo -I. -I../zlib -Icompat/vcbuild -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
 	COMPAT_OBJS = compat/msvc.o compat/winansi.o \
 		compat/win32/pthread.o compat/win32/syslog.o \
-		compat/win32/sys/poll.o compat/win32/dirent.o
+		compat/win32/sys/poll.o compat/win32/dirent.o \
+		compat/win32/same-file.o
 	COMPAT_CFLAGS = -D__USE_MINGW_ACCESS -DNOGDI -DHAVE_STRING_H -DHAVE_ALLOCA_H -Icompat -Icompat/regex -Icompat/win32 -DSTRIP_EXTENSION=\".exe\"
 	BASIC_LDFLAGS = -IGNORE:4217 -IGNORE:4049 -NOLOGO -SUBSYSTEM:CONSOLE -NODEFAULTLIB:MSVCRT.lib
 	EXTLIBS = user32.lib advapi32.lib shell32.lib wininet.lib ws2_32.lib
@@ -1177,7 +1178,8 @@ ifneq (,$(findstring MINGW,$(uname_S)))
 	COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
 	COMPAT_OBJS += compat/mingw.o compat/winansi.o \
 		compat/win32/pthread.o compat/win32/syslog.o \
-		compat/win32/sys/poll.o compat/win32/dirent.o
+		compat/win32/sys/poll.o compat/win32/dirent.o \
+		compat/win32/same-file.o
 	EXTLIBS += -lws2_32
 	PTHREAD_LIBS =
 	X = .exe
diff --git a/builtin/mv.c b/builtin/mv.c
index 93e8995..96792bd 100644
--- a/builtin/mv.c
+++ b/builtin/mv.c
@@ -166,7 +166,7 @@ int cmd_mv(int argc, const char **argv, const char *prefix)
 			bad = "not under version control";
 		else if (lstat(dst, &st) == 0) {
 			bad = "destination exists";
-			if (force) {
+			if (force || is_same_file(src, dst)) {
 				/*
 				 * only files can overwrite each other:
 				 * check both source and destination
diff --git a/compat/win32/same-file.c b/compat/win32/same-file.c
new file mode 100644
index 0000000..bb1a791
--- /dev/null
+++ b/compat/win32/same-file.c
@@ -0,0 +1,26 @@
+#include "../../git-compat-util.h"
+#include "../win32.h"
+
+int win_is_same_file(const char *a, const char *b)
+{
+	BY_HANDLE_FILE_INFORMATION hia, hib;
+	HANDLE h;
+
+	h = CreateFile(a, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
+	if (INVALID_HANDLE_VALUE == h)
+		return 0;
+	if (!(GetFileInformationByHandle(h,&hia)))
+		return 0;
+  CloseHandle(h);
+
+	h = CreateFile(b, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
+	if (INVALID_HANDLE_VALUE == h)
+		return 0;
+	if (!(GetFileInformationByHandle(h,&hib)))
+		return 0;
+  CloseHandle(h);
+
+	return hia.dwVolumeSerialNumber == hib.dwVolumeSerialNumber &&
+	       hia.nFileSizeLow == hib.nFileSizeLow &&
+	       hia.nFileSizeHigh == hib.nFileSizeHigh;
+}
diff --git a/git-compat-util.h b/git-compat-util.h
index 49b50ee..df95458 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -152,6 +152,21 @@
 #include "compat/msvc.h"
 #endif
 
+#if defined (WIN32) || defined(__CYGWIN__)
+/* MinGW or MSVC or cygwin */
+int win_is_same_file(const char *a, const char *b);
+#define is_same_file(a,b) win_is_same_file((a),(b))
+#else
+static inline int is_same_file(const char *a, const char *b)
+{
+	struct stat sta, stb;
+	if (lstat(a, &sta) ||
+	    lstat(b, &stb))
+		return 0;
+	return sta.st_ino && sta.st_dev == stb.st_dev && sta.st_ino == stb.st_ino;
+}
+#endif
+
 #ifndef NO_LIBGEN_H
 #include <libgen.h>
 #else
diff --git a/t/t7001-mv.sh b/t/t7001-mv.sh
index a845b15..d0e73ee 100755
--- a/t/t7001-mv.sh
+++ b/t/t7001-mv.sh
@@ -255,4 +255,33 @@ test_expect_success SYMLINKS 'git mv should overwrite file with a symlink' '
 
 rm -f moved symlink
 
+touch x
+if ln x y 2>/dev/null; then
+	hardlinks=1
+fi
+rm -f x y
+
+if test "$(git config --bool core.ignorecase)" = true -o "$hardlinks"; then
+	test_expect_success 'git mv FileA fILEa' '
+
+		rm -fr .git * &&
+		git init &&
+		echo FileA > FileA &&
+		git add FileA &&
+		git commit -m add FileA &&
+		{
+			if ! test -f fILEa; then
+				ln FileA fILEa
+			fi
+		} &&
+		git mv FileA fILEa &&
+		git commit -m "mv FileA fILEa" &&
+		rm -f FileA fILEa &&
+		git reset --hard &&
+		test "$(echo *)" = fILEa
+	'
+else
+	say "Neither ignorecase nor hardlinks, skipping git mv FileA fILEa"
+fi
+
 test_done
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Allow git mv FileA fILEa on case ignore file systems
  2011-03-19 14:28 [PATCH v2] Allow git mv FileA fILEa on case ignore file systems Torsten Bögershausen
@ 2011-03-19 18:20 ` Erik Faye-Lund
  2011-03-19 19:30   ` Piotr Krukowiecki
  2011-03-20  5:50 ` Junio C Hamano
  1 sibling, 1 reply; 8+ messages in thread
From: Erik Faye-Lund @ 2011-03-19 18:20 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: git

2011/3/19 Torsten Bögershausen <tboegi@web.de>:
> diff --git a/compat/win32/same-file.c b/compat/win32/same-file.c
> new file mode 100644
> index 0000000..bb1a791
> --- /dev/null
> +++ b/compat/win32/same-file.c
> @@ -0,0 +1,26 @@
> +#include "../../git-compat-util.h"
> +#include "../win32.h"
> +
> +int win_is_same_file(const char *a, const char *b)

The compat/win32-folder is usually used to provide implementations of
POSIX APIs lacking on Windows. I don't think this is appropriate
functionality to be put there...

> +{
> +       BY_HANDLE_FILE_INFORMATION hia, hib;
> +       HANDLE h;
> +
> +       h = CreateFile(a, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
> +       if (INVALID_HANDLE_VALUE == h)
> +               return 0;
> +       if (!(GetFileInformationByHandle(h,&hia)))
> +               return 0;
> +  CloseHandle(h);

Indentation-slip?

> +
> +       h = CreateFile(b, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
> +       if (INVALID_HANDLE_VALUE == h)
> +               return 0;
> +       if (!(GetFileInformationByHandle(h,&hib)))
> +               return 0;
> +  CloseHandle(h);
> +

Same as above...?

> +#if defined (WIN32) || defined(__CYGWIN__)
> +/* MinGW or MSVC or cygwin */
> +int win_is_same_file(const char *a, const char *b);
> +#define is_same_file(a,b) win_is_same_file((a),(b))
> +#else
> +static inline int is_same_file(const char *a, const char *b)
> +{
> +       struct stat sta, stb;
> +       if (lstat(a, &sta) ||
> +           lstat(b, &stb))
> +               return 0;
> +       return sta.st_ino && sta.st_dev == stb.st_dev && sta.st_ino == stb.st_ino;
> +}
> +#endif

This isn't how we usually do things like this. We usually define stuff
in compat/mingw.h (which should really be called compat/win32.h
instead, since it's used by our MSVC builds as well. But that's a
different discussion), and check if it's defined near the bottom of
git-compat-util.h.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Allow git mv FileA fILEa on case ignore file systems
  2011-03-19 18:20 ` Erik Faye-Lund
@ 2011-03-19 19:30   ` Piotr Krukowiecki
  0 siblings, 0 replies; 8+ messages in thread
From: Piotr Krukowiecki @ 2011-03-19 19:30 UTC (permalink / raw)
  To: kusmabite; +Cc: Torsten Bögershausen, git

W dniu 19.03.2011 19:20, Erik Faye-Lund pisze:
> 2011/3/19 Torsten Bögershausen <tboegi@web.de>:
>> +       if (!(GetFileInformationByHandle(h,&hia)))
>> +               return 0;
>> +  CloseHandle(h);
> 
> Indentation-slip?

I suspect so. CloseHandle line does not use tabs.

Out of curiosity I've roughly counted number of lines that have
correct and incorrect indentation. 

About 8% (~12600 out of 151000 indented lines) are incorrectly indented.
Don't know if that good or bad :)


I have used following to get the numbers:

Bad indentation (includes "Correctly indented comments"):
git$ TAB="`echo -e '\t'`" && grep -E "^[ $TAB]* [ $TAB]*[^ $TAB]" *.sh *.[ch] */*.sh */*.[ch] | wc -l
22495

Correctly indented comments:
git$ TAB="`echo -e '\t'`" && grep -E "^[ $TAB]* [*]" *.sh *.[ch] */*.sh */*.[ch] | wc -l
9911

Good indentation (does not include "Correctly indented comments"):
git$ TAB="`echo -e '\t'`" && grep -E "^$TAB+[^ $TAB]" *.sh *.[ch] */*.sh */*.[ch] | wc -l
128173


-- 
Piotr Krukowiecki

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Allow git mv FileA fILEa on case ignore file systems
  2011-03-19 14:28 [PATCH v2] Allow git mv FileA fILEa on case ignore file systems Torsten Bögershausen
  2011-03-19 18:20 ` Erik Faye-Lund
@ 2011-03-20  5:50 ` Junio C Hamano
  2011-04-10  5:48   ` Torsten Bögershausen
  1 sibling, 1 reply; 8+ messages in thread
From: Junio C Hamano @ 2011-03-20  5:50 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: kusmabite, git

Torsten Bögershausen <tboegi@web.de> writes:

> The typical use case is when a file "FileA" should be renamed into fILEa
> and we are on a case insenstive file system (system core.ignorecase = true).
> Source and destination are the same file, it can be accessed under both names.
> This makes git think that the destination file exists.
> Unless used with --forced, git will refuse the "git mv FileA fILEa".
> This change will allow "git mv FileA fILEa" under the following condition:
> On Linux/Unix/Mac OS X the move is allowed when the inode of the source and
> destination are equal (and they are on the same device).
> This allows renames of MÄRCHEN into Märchen on Mac OS X.
> (As a side effect, a file can be renamed to a name which is already
> hard-linked to the same inode).
> On Windows, the function win_is_same_file() from compat/win32/same-file.c
> is used.
> It calls GetFileInformationByHandle() to check if both files are
> "the same".

Yeek; is it just me or the above single block of text too dense to be
readable? Can you use paragraph breaks?

> The typical use case is when a file "FileA" should be renamed into fILEa
> and we are on a case insenstive file system (system core.ignorecase = true).

Huh? I don't think renaming "FileA" to "fILEa" is typical at all. It is
very rarely done.

> (As a side effect, a file can be renamed to a name which is already
> hard-linked to the same inode).

It is unclear "a side effect" means "an added bonus" or "a regression" in
this sentence. I think this is latter.

Allowing filesystem specific logic to detect that two different "names"
actually refer to a single "file" and help renaming succeed is a sane
approach, but I think this particular implementation is flawed.

The important thing to notice is that "names" and "file" above refer to
the entities from the end user's point of view. Two files hardlinked
together on a filesystem with sane pathname handling are never the same
"file". I would probably have called it equivalent_filenames() to stress
the fact that two _different_ names alias to the same file. is_same_file()
sounds more like you got two different filenames from the filesystem
(i.e. readdir() gave you both at the same time) and you are trying to see
if they are the same file, but that is not the case here.

I also find it a bad taste to make this feature depend on win32; doesn't a
Linux box mounting a vfat filesystem have the same issue that we should be
able to solve with the same code?  Can't we instead have a configuration
variable that tells git that the working tree is on a filesystem with
broken pathname semantics, and what kind of workaround is needed?  Isn't
core.ignorecase already that configuration variable for case insensitve
filesystems [*1*]?

I would imagine that the implementation of equivalent_filenames(a,b) may
be !strcmp(a,b) for a sane filesystem [*2*] and !strcasecmp(a,b) for a
case insensitive filesystem.  For a totally wacky filesystem, your
lstat(2) based one might end up to be the best we could do [*3*].

When two different _names_ "A" and "a" refer to a single file, the only
thing that should happen for "git mv A a" is for the cache entry for "A"
to be moved to cache entry for "a", and no rename("A", "a") should be run,
but I don't see any such change in the code. It may happen to work (or be
a no-op) on Windows, but because builtin/mv.c is supposed to be generic
(and that is the reason you introduced the is_same_file() abstraction in
the first place), I'd still see this as a breakage.


[Footnote]

*1* Off the top of my head, perhaps core.ignorecase may have to grow into
boolean plus extra to cover "this is not just case insensitive, but isn't
even case preserving" kind of broken filesystems like HFS+, but I didn't
think things through.

*2* Incidentally, wouldn't "!strcmp(a,b)" solution suddenly start allowing
"git mv Makefile Makefile" that we currently disallow? Is it a regression
(less safety against an unexpected input) or a good change?

*3* If we can find a solution that does not involve any calls to the
filesystem, it would be ideal, as we can reuse it later in codepaths where
neither file "a" or file "b" exists on the filesystem yet (think: "we are
about to create 'a' and 'b'---is that sensible, or will they overwrite
with each other on this filesystem?").

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Allow git mv FileA fILEa on case ignore file systems
  2011-03-20  5:50 ` Junio C Hamano
@ 2011-04-10  5:48   ` Torsten Bögershausen
  2011-04-11 16:55     ` Junio C Hamano
  0 siblings, 1 reply; 8+ messages in thread
From: Torsten Bögershausen @ 2011-04-10  5:48 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: kusmabite, git

On 20.03.11 06:50, Junio C Hamano wrote:
========================
>Torsten Bögershausen <tboegi@web.de> writes:
>
>> > The typical use case is when a file "FileA" should be renamed into fILEa
>> > and we are on a case insenstive file system (system core.ignorecase = true).
>> > Source and destination are the same file, it can be accessed under both names.
>> > This makes git think that the destination file exists.
>> > Unless used with --forced, git will refuse the "git mv FileA fILEa".
>> > This change will allow "git mv FileA fILEa" under the following condition:
>> > On Linux/Unix/Mac OS X the move is allowed when the inode of the source and
>> > destination are equal (and they are on the same device).
>> > This allows renames of MÄRCHEN into Märchen on Mac OS X.
>> > (As a side effect, a file can be renamed to a name which is already
>> > hard-linked to the same inode).
>> > On Windows, the function win_is_same_file() from compat/win32/same-file.c
>> > is used.
>> > It calls GetFileInformationByHandle() to check if both files are
>> > "the same".
>Yeek; is it just me or the above single block of text too dense to be
>readable? Can you use paragraph breaks?
Yes
>
>> > The typical use case is when a file "FileA" should be renamed into fILEa
>> > and we are on a case insenstive file system (system core.ignorecase = true).
>Huh? I don't think renaming "FileA" to "fILEa" is typical at all. It is
>very rarely done.
Probably this is not a good example, I changed it to "mv FILE File"

>
>> > (As a side effect, a file can be renamed to a name which is already
>> > hard-linked to the same inode).
>It is unclear "a side effect" means "an added bonus" or "a regression" in
>this sentence. I think this is latter.
If it is too much regression, we can check that the link count in struct stat
"if (1 == st_nlink)" .


>
>Allowing filesystem specific logic to detect that two different "names"
>actually refer to a single "file" and help renaming succeed is a sane
>approach, but I think this particular implementation is flawed.
>
>The important thing to notice is that "names" and "file" above refer to
>the entities from the end user's point of view. Two files hardlinked
>together on a filesystem with sane pathname handling are never the same
>"file". I would probably have called it equivalent_filenames() to stress
>the fact that two _different_ names alias to the same file. is_same_file()
>sounds more like you got two different filenames from the filesystem
>(i.e. readdir() gave you both at the same time) and you are trying to see
>if they are the same file, but that is not the case here.
Ok, I need to distinct beetwen file names and files.


>I also find it a bad taste to make this feature depend on win32; doesn't a
>Linux box mounting a vfat filesystem have the same issue that we should be
>able to solve with the same code?  Can't we instead have a configuration
>variable that tells git that the working tree is on a filesystem with
>broken pathname semantics, and what kind of workaround is needed?  Isn't
>core.ignorecase already that configuration variable for case insensitve
>filesystems [*1*]?
Agreed about the bad taste ;-)
The suggested patch works for Linux/UNIX/MAC OS X and Windows, 
but the description wasn't to clear about it.
The same code is used for all OS, except Windows that needs some specific code 
which is located in cygwin.c and mingw.c

>
>I would imagine that the implementation of equivalent_filenames(a,b) may
>be !strcmp(a,b) for a sane filesystem [*2*] and !strcasecmp(a,b) for a
>case insensitive filesystem.  For a totally wacky filesystem, your
>lstat(2) based one might end up to be the best we could do [*3*].
>
>When two different _names_ "A" and "a" refer to a single file, the only
>thing that should happen for "git mv A a" is for the cache entry for "A"
>to be moved to cache entry for "a", and no rename("A", "a") should be run,
>but I don't see any such change in the code. It may happen to work (or be
>a no-op) on Windows, but because builtin/mv.c is supposed to be generic
>(and that is the reason you introduced the is_same_file() abstraction in
>the first place), I'd still see this as a breakage.
>
Why shouldn't the rename() be done?
"git mv A B" changes both the indes and the file system.
Isn't it natural to have file name  "a" both in the index and in the 
file system after "git mv A a"?
Note: Windows and MAC OS X allow "mv A a" from command line, 
while Linux on VFAT gives an error "'A' and 'a' are the same file".

>
>[Footnote]
>
>*1* Off the top of my head, perhaps core.ignorecase may have to grow into
>boolean plus extra to cover "this is not just case insensitive, but isn't
>even case preserving" kind of broken filesystems like HFS+, but I didn't
>think things through.
It is not that bad.
My HFS+ here is both case insenstive and case preserving,
so that core.ignorecase can be used as it is.

Note: HFS+ can be formated to be case sensitive (at least at MAC OS X 10.6),
but the default is case insensitive.

>
>*2* Incidentally, wouldn't "!strcmp(a,b)" solution suddenly start allowing
>"git mv Makefile Makefile" that we currently disallow? Is it a regression
>(less safety against an unexpected input) or a good change?

When running "git mv git.c git.c", the current git says:
"fatal: can not move directory into itself, source=git.c, destination=git.c"
That test is done earlier in the code path, 
so that this move is still not allowed.
The error message is somewhat confusing, as git.c is a file.
Is it worth to send a separate patch for that?

>
>*3* If we can find a solution that does not involve any calls to the
>filesystem, it would be ideal, as we can reuse it later in codepaths where
>neither file "a" or file "b" exists on the filesystem yet (think: "we are
>about to create 'a' and 'b'---is that sensible, or will they overwrite
>with each other on this filesystem?").
>
That would mean, that git needs to know the encoding of the local filesystem.
But we don't have that at the moment.
What do you have in mind?


I run a test script (see at the end of the mail, save it under test-mv.sh and
execute it with /bin/sh test-mv.sh).
I tested Linux, MAC OS X, cygwin 1.5, cygwin 1.7 and current msys(git).
Short summary: There are different flavors which filenames are equivalent.

I send out a new patch, thank you for reading this long stuff.

/Torsten


=======================
Linux 2.6.34.7-0.7-default
/dev/sda6 on /D type vfat (rw,noexec,nosuid,nodev,gid=100,umask=0002,utf8=true)
enc=UTF-8
a=A    Case ignored
mv: `a' and `A' are the same file

æ=Æ    Case ignored
mv: `æ' and `Æ' are the same file

ø=ø    Case sensitive
mv:  ø  -->  Ø  OK

ä=Ä    Case ignored
mv: `ä' and `Ä' are the same file
==================================================================
Linux 2.6.34.7-0.7-default
/dev/sda6 on /D type vfat (rw)
enc=UTF-8
a=A    Case ignored
mv: `a' and `A' are the same file

æ=æ    Case sensitive
mv:  æ  -->  Æ  OK

ø=ø    Case sensitive
mv:  ø  -->  Ø  OK

ä=ä    Case sensitive
mv:  ä  -->  Ä  OK
==================================================================
Linux 2.6.34.7-0.7-default
//birne/tb on /birne/tb type cifs (rw)
enc=UTF-8
a=A    Case ignored
mv:  a  -->  A  OK

æ=Æ    Case ignored
mv:  æ  -->  Æ  OK

ø=Ø    Case ignored
mv:  ø  -->  Ø  OK

ä=Ä    Case ignored
mv:  ä  -->  Ä  OK
==================================================================
Darwin 10.7.0
/dev/disk0s2 on / (hfs, local, journaled)
enc=UTF-8
a=A    Case ignored
mv:  a  -->  A  OK

æ=Æ    Case ignored
mv:  æ  -->  Æ  OK

ø=Ø    Case ignored
mv:  ø  -->  Ø  OK

ä=Ä    Case ignored
mv:  ä  -->  Ä  OK
==================================================================
Darwin 10.7.0
/dev/disk1s3 on /Volumes/LACIEFAT (msdos, local, nodev, nosuid, noowners)
enc=UTF-8
a=A    Case ignored
mv:  a  -->  A  OK

æ=Æ    Case ignored
mv:  æ  -->  Æ  OK

ø=Ø    Case ignored
mv:  ø  -->  Ø  OK

ä=Ä    Case ignored
mv:  ä  -->  Ä  OK
==================================================================
Darwin 10.7.0
/dev/disk1s2 on /Volumes/LacieMacOS (hfs, local, nodev, nosuid, journaled)
enc=UTF-8
a=a    Case sensitive
mv:  a  -->  A  OK

æ=æ    Case sensitive
mv:  æ  -->  Æ  OK

ø=ø    Case sensitive
mv:  ø  -->  Ø  OK

ä=ä    Case sensitive
mv:  ä  -->  Ä  OK
==================================================================
CYGWIN_NT-5.1 1.5.25(0.156/4/2)
C:\cygwin on / type system (binmode)
enc=ISO-8859-1
a=A    Case ignored
mv:  a  -->  A  OK

æ=Æ    Case ignored
mv: `æ' and `Æ' are the same file

ø=Ø    Case ignored
mv: `ø' and `Ø' are the same file

ä=Ä    Case ignored
mv: `ä' and `Ä' are the same file
==================================================================
CYGWIN_NT-5.1 1.7.8(0.236/5/3)
C:/cygwin on / type ntfs (binary,auto)
enc=UTF-8
a=A    Case ignored
mv:  a  -->  A  OK

æ=Æ    Case ignored
mv: `æ' and `Æ' are the same file

ø=Ø    Case ignored
mv: `ø' and `Ø' are the same file

ä=Ä    Case ignored
mv: `ä' and `Ä' are the same file
==================================================================
MINGW32_NT-5.1 1.0.12(0.46/3/2)
C:\msysgit\msysgit on / type user (binmode,noumount)
enc=ISO-8859-1
a=A    Case ignored
mv:  a  -->  A  OK

æ=Æ    Case ignored
mv:  æ  -->  Æ  OK

ø=Ø    Case ignored
mv:  ø  -->  Ø  OK

ä=Ä    Case ignored
mv:  ä  -->  Ä  OK





==========================================================
#!/bin/sh
testmv() {
  (echo $1 > $1 && echo $2 > $2) || {
    echo >&2 "Wrong encoding $enc"
    cd ..
    rm -rf $$.trash
    exit 1
  }
  a=$(cat $1)
  printf "$1=$a"
  if test $(cat $1) != $1; then
    echo "    Case ignored"
  else
    echo "    Case sensitive"
  fi
  rm -f $1 $2

  echo $1 > $1
  if mv $1 $2; then
    echo "mv:  $1  -->  $2  OK"
  fi
  rm -f $1 $2
  echo
  return 0
}

rm -rf $$.trash
mkdir $$.trash || {
  echo >&2 "mkdir $$.trash failed"
  exit 1
}

cd $$.trash || {
  echo >&2 "cd $$.trash failed"
  exit 1
}
uname -sr
# get root dir
rdir=$PWD
while test "$rdir" != ""
do
  if mount | grep $rdir; then
    break
  fi
  rdir=${rdir%/*}
done
if test -z $rdir; then
  mount | grep " / "
fi

enc=ISO-8859-1
#Find out if utf-8 is used
case "$LANG" in
  *[uU][tT][fF]*8)
  enc=UTF-8
  ;;
esac
case "$LC_CTYPE" in
  *[uU][tT][fF]*8)
  enc=UTF-8
  ;;
esac

case $(uname) in
  Darwin)
  enc=UTF-8
  ;;
esac

echo enc=$enc

case "$enc" in
  UTF-8)
  ae=$(printf '\303\246')
  AE=$(printf '\303\206')
  oe=$(printf '\303\270')
  OE=$(printf '\303\230')
  auml=$(printf '\303\244')
  Auml=$(printf '\303\204')
  ;;
  *)
  ae=$(printf '\346')
  AE=$(printf '\306')
  oe=$(printf '\370')
  OE=$(printf '\330')
  auml=$(printf '\344')
  Auml=$(printf '\304')
  ;;
esac

testmv a A
testmv $ae $AE
testmv $oe $OE
testmv $auml $Auml

cd ..
rm -rf $$.trash

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Allow git mv FileA fILEa on case ignore file systems
  2011-04-10  5:48   ` Torsten Bögershausen
@ 2011-04-11 16:55     ` Junio C Hamano
  2011-04-11 20:05       ` Torsten Bögershausen
  2011-04-12  6:16       ` Johannes Sixt
  0 siblings, 2 replies; 8+ messages in thread
From: Junio C Hamano @ 2011-04-11 16:55 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: kusmabite, git

Torsten Bögershausen <tboegi@web.de> writes:

>>When two different _names_ "A" and "a" refer to a single file, the only
>>thing that should happen for "git mv A a" is for the cache entry for "A"
>>to be moved to cache entry for "a", and no rename("A", "a") should be run,
>>but I don't see any such change in the code. It may happen to work (or be
>>a no-op) on Windows, but because builtin/mv.c is supposed to be generic
>>(and that is the reason you introduced the is_same_file() abstraction in
>>the first place), I'd still see this as a breakage.
>>
> Why shouldn't the rename() be done?
> "git mv A B" changes both the indes and the file system.
> Isn't it natural to have file name  "a" both in the index and in the 
> file system after "git mv A a"?
> Note: Windows and MAC OS X allow "mv A a" from command line, 
> while Linux on VFAT gives an error "'A' and 'a' are the same file".

Yeah, I forgot about the primary thing we are trying to do in this
discussion.  Sorry about that.  My thinking stopped at 'if we rename "A"
to "a in the index, that is sufficient.  We already know that we can still
open("A") because the filesystem is case insensitive.'

In fact, we want both the index entry "A" renamed to "a" _and_ also we
want to see next "/bin/ls" to show "a", not "A".  For the latter, we do
want to run rename(2) on them.

There was another thing that made me worry about running rename(2) on two
equivalent filenames.  You said on Linux VFAT "mv A a" fails.  Does the
underlying rename(2) fail when you do this?

	status = rename("A", "a");

If old and new resolve to the same existing directory entry or different
directory entries for the same existing file, POSIX says that rename(old,
new) should succeed and perform no other action, so the above should
succeed on correctly implemented case insensitive filesystems.

But we know not all FS implementations are perfect.  If this can result in
an unnecessary failure, it would be far better if we are careful to avoid
running rename("A", "a") and just rename the index entry to make sure "git
mv A a" succeeds, than trying to fulfil the "we want to see next '/bin/ls'
to show 'a' not 'A'" wish and make the whole "git mv A a" fail.

But of course we can always blame the breakage on filesystems.  I am
leaning to prefer running rename("A", "a") unconditionally.

Thanks.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Allow git mv FileA fILEa on case ignore file systems
  2011-04-11 16:55     ` Junio C Hamano
@ 2011-04-11 20:05       ` Torsten Bögershausen
  2011-04-12  6:16       ` Johannes Sixt
  1 sibling, 0 replies; 8+ messages in thread
From: Torsten Bögershausen @ 2011-04-11 20:05 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: kusmabite, Git Mailing List, tboegi

On 04/11/2011 06:55 PM, Junio C Hamano wrote:
> Torsten Bögershausen<tboegi@web.de>  writes:
>
>>> When two different _names_ "A" and "a" refer to a single file, the only
>>> thing that should happen for "git mv A a" is for the cache entry for "A"
>>> to be moved to cache entry for "a", and no rename("A", "a") should be run,
>>> but I don't see any such change in the code. It may happen to work (or be
>>> a no-op) on Windows, but because builtin/mv.c is supposed to be generic
>>> (and that is the reason you introduced the is_same_file() abstraction in
>>> the first place), I'd still see this as a breakage.
>>>
>> Why shouldn't the rename() be done?
>> "git mv A B" changes both the indes and the file system.
>> Isn't it natural to have file name  "a" both in the index and in the
>> file system after "git mv A a"?
>> Note: Windows and MAC OS X allow "mv A a" from command line,
>> while Linux on VFAT gives an error "'A' and 'a' are the same file".
> Yeah, I forgot about the primary thing we are trying to do in this
> discussion.  Sorry about that.  My thinking stopped at 'if we rename "A"
> to "a in the index, that is sufficient.  We already know that we can still
> open("A") because the filesystem is case insensitive.'
>
> In fact, we want both the index entry "A" renamed to "a" _and_ also we
> want to see next "/bin/ls" to show "a", not "A".  For the latter, we do
> want to run rename(2) on them.
>
> There was another thing that made me worry about running rename(2) on two
> equivalent filenames.  You said on Linux VFAT "mv A a" fails.  Does the
> underlying rename(2) fail when you do this?
>
> 	status = rename("A", "a");
>
> If old and new resolve to the same existing directory entry or different
> directory entries for the same existing file, POSIX says that rename(old,
> new) should succeed and perform no other action, so the above should
> succeed on correctly implemented case insensitive filesystems.
>
> But we know not all FS implementations are perfect.  If this can result in
> an unnecessary failure, it would be far better if we are careful to avoid
> running rename("A", "a") and just rename the index entry to make sure "git
> mv A a" succeeds, than trying to fulfil the "we want to see next '/bin/ls'
> to show 'a' not 'A'" wish and make the whole "git mv A a" fail.
>
> But of course we can always blame the breakage on filesystems.  I am
> leaning to prefer running rename("A", "a") unconditionally.
>
> Thanks.
Thanks for reading and for the reply.

>You said on Linux VFAT "mv A a" fails

My excuses for being unclear, as "fails" often means returning -1 and 
setting errno.
But this is not the case here. I should have said:
rename("A", "a") returns 0 but does not rename the file.

Here comes the  long version. I wrote a short program called "rename",  
and run some tests.
The /D is a VFAT partition, mounted writable for root only.
-----------------------------------
tb@wanderer:/D/test> ls
A  B  x
tb@wanderer:/D/test> rename A A
rename A A res=0 Success
tb@wanderer:/D/test> ls
A  B  x
tb@wanderer:/D/test> rename A a
rename A a res=0 Success
tb@wanderer:/D/test> ls
A  B  x
tb@wanderer:/D/test> rename A D
rename A D res=-1 Permission denied
tb@wanderer:/D/test>
------------------------------------

I had a short look into the Linux kernel. A comment in namei_vfat.c 
indicates that
rename "filename" "FILENAME" is not supported for now.
(and all respect and thanks from my side to the people who made VFAT 
working).
(and of course to the git people)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2] Allow git mv FileA fILEa on case ignore file systems
  2011-04-11 16:55     ` Junio C Hamano
  2011-04-11 20:05       ` Torsten Bögershausen
@ 2011-04-12  6:16       ` Johannes Sixt
  1 sibling, 0 replies; 8+ messages in thread
From: Johannes Sixt @ 2011-04-12  6:16 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Torsten Bögershausen, kusmabite, git

Am 4/11/2011 18:55, schrieb Junio C Hamano:
> Yeah, I forgot about the primary thing we are trying to do in this
> discussion.  Sorry about that.  My thinking stopped at 'if we rename "A"
> to "a in the index, that is sufficient.  We already know that we can still
> open("A") because the filesystem is case insensitive.'
> 
> In fact, we want both the index entry "A" renamed to "a" _and_ also we
> want to see next "/bin/ls" to show "a", not "A".  For the latter, we do
> want to run rename(2) on them.

I wonder why we need this at all. You can always break this into two
operations:

  git mv A a1
  git mv a1 a

Is 'git mv A a' on a case-insensitive filesystem such an every-day
operation that we have to provide special support for it in git?

-- Hannes

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-04-12  6:16 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-03-19 14:28 [PATCH v2] Allow git mv FileA fILEa on case ignore file systems Torsten Bögershausen
2011-03-19 18:20 ` Erik Faye-Lund
2011-03-19 19:30   ` Piotr Krukowiecki
2011-03-20  5:50 ` Junio C Hamano
2011-04-10  5:48   ` Torsten Bögershausen
2011-04-11 16:55     ` Junio C Hamano
2011-04-11 20:05       ` Torsten Bögershausen
2011-04-12  6:16       ` Johannes Sixt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).