From 4aff646d17e81084000100da586bef8f9d6a17a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Thu, 5 Mar 2015 20:06:02 +0100 Subject: archive-zip: mark text files in archives Set the text flag for ZIP archive entries that look like text files so that unzip -a can be used to perform end-of-line conversions. Info-ZIP zip does the same. Detect binary files the same way as git diff and git grep do, namely by checking for the attribute "diff" and its negation "-diff", and if none is found by falling back to checking for the presence of NUL bytes in the first few bytes of the file contents. 7-Zip, Windows' built-in ZIP functionality and Info-ZIP unzip without the switch -a are not affected by the change and still extract text files without doing any end-of-line conversions. NB: The actual end-of-line style used in the archive entries doesn't matter to unzip -a, as it converts any CR, CRLF and LF to the line end characters appropriate for the platform it is running on. Suggested-by: Ulrike Fischer Signed-off-by: Rene Scharfe Signed-off-by: Junio C Hamano --- archive-zip.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'archive-zip.c') diff --git a/archive-zip.c b/archive-zip.c index 4bde019bc..0f9e87f46 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -5,6 +5,8 @@ #include "archive.h" #include "streaming.h" #include "utf8.h" +#include "userdiff.h" +#include "xdiff-interface.h" static int zip_date; static int zip_time; @@ -189,6 +191,16 @@ static int has_only_ascii(const char *s) } } +static int entry_is_binary(const char *path, const void *buffer, size_t size) +{ + struct userdiff_driver *driver = userdiff_find_by_path(path); + if (!driver) + driver = userdiff_find_by_name("default"); + if (driver->binary != -1) + return driver->binary; + return buffer_is_binary(buffer, size); +} + #define STREAM_BUFFER_SIZE (1024 * 16) static int write_zip_entry(struct archiver_args *args, @@ -210,6 +222,8 @@ static int write_zip_entry(struct archiver_args *args, struct git_istream *stream = NULL; unsigned long flags = 0; unsigned long size; + int is_binary = -1; + const char *path_without_prefix = path + args->baselen; crc = crc32(0, NULL, 0); @@ -256,6 +270,8 @@ static int write_zip_entry(struct archiver_args *args, return error("cannot read %s", sha1_to_hex(sha1)); crc = crc32(crc, buffer, size); + is_binary = entry_is_binary(path_without_prefix, + buffer, size); out = buffer; } compressed_size = (method == 0) ? size : 0; @@ -300,7 +316,6 @@ static int write_zip_entry(struct archiver_args *args, copy_le16(dirent.extra_length, ZIP_EXTRA_MTIME_SIZE); copy_le16(dirent.comment_length, 0); copy_le16(dirent.disk, 0); - copy_le16(dirent.attr1, 0); copy_le32(dirent.attr2, attr2); copy_le32(dirent.offset, zip_offset); @@ -328,6 +343,9 @@ static int write_zip_entry(struct archiver_args *args, if (readlen <= 0) break; crc = crc32(crc, buf, readlen); + if (is_binary == -1) + is_binary = entry_is_binary(path_without_prefix, + buf, readlen); write_or_die(1, buf, readlen); } close_istream(stream); @@ -361,6 +379,9 @@ static int write_zip_entry(struct archiver_args *args, if (readlen <= 0) break; crc = crc32(crc, buf, readlen); + if (is_binary == -1) + is_binary = entry_is_binary(path_without_prefix, + buf, readlen); zstream.next_in = buf; zstream.avail_in = readlen; @@ -405,6 +426,8 @@ static int write_zip_entry(struct archiver_args *args, free(deflated); free(buffer); + copy_le16(dirent.attr1, !is_binary); + memcpy(zip_dir + zip_dir_offset, &dirent, ZIP_DIR_HEADER_SIZE); zip_dir_offset += ZIP_DIR_HEADER_SIZE; memcpy(zip_dir + zip_dir_offset, path, pathlen); -- cgit v1.2.1