From: "René Scharfe" <l.s.r@web.de>
To: Ulrike Fischer <luatex@nililand.de>, git@vger.kernel.org
Cc: Junio C Hamano <gitster@pobox.com>
Subject: [PATCH v2] archive-zip: mark text files in archives
Date: Thu, 05 Mar 2015 20:06:02 +0100 [thread overview]
Message-ID: <54F8A91A.2000003@web.de> (raw)
In-Reply-To: <54F77573.80109@web.de>
Set the text flag for ZIP archive entries that look like text files so
that unzip -a can be used to perform end-of-line conversions. Info-ZIP
zip does the same.
Detect binary files the same way as git diff and git grep do, namely by
checking for the attribute "diff" and its negation "-diff", and if none
is found by falling back to checking for the presence of NUL bytes in
the first few bytes of the file contents.
7-Zip, Windows' built-in ZIP functionality and Info-ZIP unzip without
the switch -a are not affected by the change and still extract text
files without doing any end-of-line conversions.
NB: The actual end-of-line style used in the archive entries doesn't
matter to unzip -a, as it converts any CR, CRLF and LF to the line end
characters appropriate for the platform it is running on.
Suggested-by: Ulrike Fischer <luatex@nililand.de>
Signed-off-by: Rene Scharfe <l.s.r@web.de>
---
archive-zip.c | 25 ++++++++++++++++++++++++-
t/t5003-archive-zip.sh | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 70 insertions(+), 2 deletions(-)
diff --git a/archive-zip.c b/archive-zip.c
index 4bde019..0f9e87f 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -5,6 +5,8 @@
#include "archive.h"
#include "streaming.h"
#include "utf8.h"
+#include "userdiff.h"
+#include "xdiff-interface.h"
static int zip_date;
static int zip_time;
@@ -189,6 +191,16 @@ static int has_only_ascii(const char *s)
}
}
+static int entry_is_binary(const char *path, const void *buffer, size_t size)
+{
+ struct userdiff_driver *driver = userdiff_find_by_path(path);
+ if (!driver)
+ driver = userdiff_find_by_name("default");
+ if (driver->binary != -1)
+ return driver->binary;
+ return buffer_is_binary(buffer, size);
+}
+
#define STREAM_BUFFER_SIZE (1024 * 16)
static int write_zip_entry(struct archiver_args *args,
@@ -210,6 +222,8 @@ static int write_zip_entry(struct archiver_args *args,
struct git_istream *stream = NULL;
unsigned long flags = 0;
unsigned long size;
+ int is_binary = -1;
+ const char *path_without_prefix = path + args->baselen;
crc = crc32(0, NULL, 0);
@@ -256,6 +270,8 @@ static int write_zip_entry(struct archiver_args *args,
return error("cannot read %s",
sha1_to_hex(sha1));
crc = crc32(crc, buffer, size);
+ is_binary = entry_is_binary(path_without_prefix,
+ buffer, size);
out = buffer;
}
compressed_size = (method == 0) ? size : 0;
@@ -300,7 +316,6 @@ static int write_zip_entry(struct archiver_args *args,
copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE);
copy_le16(dirent.comment_length, 0);
copy_le16(dirent.disk, 0);
- copy_le16(dirent.attr1, 0);
copy_le32(dirent.attr2, attr2);
copy_le32(dirent.offset, zip_offset);
@@ -328,6 +343,9 @@ static int write_zip_entry(struct archiver_args *args,
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
+ if (is_binary == -1)
+ is_binary = entry_is_binary(path_without_prefix,
+ buf, readlen);
write_or_die(1, buf, readlen);
}
close_istream(stream);
@@ -361,6 +379,9 @@ static int write_zip_entry(struct archiver_args *args,
if (readlen <= 0)
break;
crc = crc32(crc, buf, readlen);
+ if (is_binary == -1)
+ is_binary = entry_is_binary(path_without_prefix,
+ buf, readlen);
zstream.next_in = buf;
zstream.avail_in = readlen;
@@ -405,6 +426,8 @@ static int write_zip_entry(struct archiver_args *args,
free(deflated);
free(buffer);
+ copy_le16(dirent.attr1, !is_binary);
+
memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE);
zip_dir_offset += ZIP_DIR_HEADER_SIZE;
memcpy(zip_dir + zip_dir_offset, path, pathlen);
diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
index c929db5..14744b2 100755
--- a/t/t5003-archive-zip.sh
+++ b/t/t5003-archive-zip.sh
@@ -33,6 +33,37 @@ check_zip() {
test_expect_success UNZIP " validate file contents" "
diff -r a ${dir_with_prefix}a
"
+
+ dir=eol_$1
+ dir_with_prefix=$dir/$2
+ extracted=${dir_with_prefix}a
+ original=a
+
+ test_expect_success UNZIP " extract ZIP archive with EOL conversion" '
+ (mkdir $dir && cd $dir && "$GIT_UNZIP" -a ../$zipfile)
+ '
+
+ test_expect_success UNZIP " validate that text files are converted" "
+ test_cmp_bin $extracted/text.cr $extracted/text.crlf &&
+ test_cmp_bin $extracted/text.cr $extracted/text.lf
+ "
+
+ test_expect_success UNZIP " validate that binary files are unchanged" "
+ test_cmp_bin $original/binary.cr $extracted/binary.cr &&
+ test_cmp_bin $original/binary.crlf $extracted/binary.crlf &&
+ test_cmp_bin $original/binary.lf $extracted/binary.lf
+ "
+
+ test_expect_success UNZIP " validate that diff files are converted" "
+ test_cmp_bin $extracted/diff.cr $extracted/diff.crlf &&
+ test_cmp_bin $extracted/diff.cr $extracted/diff.lf
+ "
+
+ test_expect_success UNZIP " validate that -diff files are unchanged" "
+ test_cmp_bin $original/nodiff.cr $extracted/nodiff.cr &&
+ test_cmp_bin $original/nodiff.crlf $extracted/nodiff.crlf &&
+ test_cmp_bin $original/nodiff.lf $extracted/nodiff.lf
+ "
}
test_expect_success \
@@ -41,6 +72,18 @@ test_expect_success \
echo simple textfile >a/a &&
mkdir a/bin &&
cp /bin/sh a/bin &&
+ printf "text\r" >a/text.cr &&
+ printf "text\r\n" >a/text.crlf &&
+ printf "text\n" >a/text.lf &&
+ printf "text\r" >a/nodiff.cr &&
+ printf "text\r\n" >a/nodiff.crlf &&
+ printf "text\n" >a/nodiff.lf &&
+ printf "\0\r" >a/binary.cr &&
+ printf "\0\r\n" >a/binary.crlf &&
+ printf "\0\n" >a/binary.lf &&
+ printf "\0\r" >a/diff.cr &&
+ printf "\0\r\n" >a/diff.crlf &&
+ printf "\0\n" >a/diff.lf &&
printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 &&
printf "A not substituted O" >a/substfile2 &&
(p=long_path_to_a_file && cd a &&
@@ -66,7 +109,9 @@ test_expect_success 'add files to repository' '
GIT_COMMITTER_DATE="2005-05-27 22:00" git commit -m initial
'
-test_expect_success 'setup export-subst' '
+test_expect_success 'setup export-subst and diff attributes' '
+ echo "a/nodiff.* -diff" >>.git/info/attributes &&
+ echo "a/diff.* diff" >>.git/info/attributes &&
echo "substfile?" export-subst >>.git/info/attributes &&
git log --max-count=1 "--pretty=format:A${SUBSTFORMAT}O" HEAD \
>a/substfile1
--
2.3.1
prev parent reply other threads:[~2015-03-05 19:06 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-02-23 13:58 zip files created with git archive flags text files as binaries Ulrike Fischer
2015-02-23 19:30 ` René Scharfe
2015-03-04 21:13 ` René Scharfe
2015-03-04 21:13 ` [PATCH] archive-zip: add --text parameter René Scharfe
2015-03-05 2:16 ` Junio C Hamano
2015-03-05 15:27 ` René Scharfe
2015-03-05 15:27 ` René Scharfe
2015-03-05 19:06 ` René Scharfe [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=54F8A91A.2000003@web.de \
--to=l.s.r@web.de \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=luatex@nililand.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).