aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile3
-rw-r--r--cache.h5
-rw-r--r--config.c5
-rw-r--r--convert.c186
-rw-r--r--diff.c17
-rw-r--r--entry.c16
-rw-r--r--environment.c1
-rw-r--r--sha1_file.c23
8 files changed, 251 insertions, 5 deletions
diff --git a/Makefile b/Makefile
index 40bdcff69..60496ff95 100644
--- a/Makefile
+++ b/Makefile
@@ -262,7 +262,8 @@ LIB_OBJS = \
revision.o pager.o tree-walk.o xdiff-interface.o \
write_or_die.o trace.o list-objects.o grep.o \
alloc.o merge-file.o path-list.o help.o unpack-trees.o $(DIFF_OBJS) \
- color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o
+ color.o wt-status.o archive-zip.o archive-tar.o shallow.o utf8.o \
+ convert.o
BUILTIN_OBJS = \
builtin-add.o \
diff --git a/cache.h b/cache.h
index c62b0b090..9c019e8bb 100644
--- a/cache.h
+++ b/cache.h
@@ -201,6 +201,7 @@ extern const char *apply_default_whitespace;
extern int zlib_compression_level;
extern size_t packed_git_window_size;
extern size_t packed_git_limit;
+extern int auto_crlf;
#define GIT_REPO_VERSION 0
extern int repository_format_version;
@@ -468,4 +469,8 @@ extern int nfvasprintf(char **str, const char *fmt, va_list va);
extern void trace_printf(const char *format, ...);
extern void trace_argv_printf(const char **argv, int count, const char *format, ...);
+/* convert.c */
+extern int convert_to_git(const char *path, char **bufp, unsigned long *sizep);
+extern int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep);
+
#endif /* CACHE_H */
diff --git a/config.c b/config.c
index d82107124..ffe02129a 100644
--- a/config.c
+++ b/config.c
@@ -324,6 +324,11 @@ int git_default_config(const char *var, const char *value)
return 0;
}
+ if (!strcmp(var, "core.autocrlf")) {
+ auto_crlf = git_config_bool(var, value);
+ return 0;
+ }
+
if (!strcmp(var, "user.name")) {
strlcpy(git_default_name, value, sizeof(git_default_name));
return 0;
diff --git a/convert.c b/convert.c
new file mode 100644
index 000000000..13beb7058
--- /dev/null
+++ b/convert.c
@@ -0,0 +1,186 @@
+#include "cache.h"
+/*
+ * convert.c - convert a file when checking it out and checking it in.
+ *
+ * This should use the pathname to decide on whether it wants to do some
+ * more interesting conversions (automatic gzip/unzip, general format
+ * conversions etc etc), but by default it just does automatic CRLF<->LF
+ * translation when the "auto_crlf" option is set.
+ */
+
+struct text_stat {
+ /* CR, LF and CRLF counts */
+ unsigned cr, lf, crlf;
+
+ /* These are just approximations! */
+ unsigned printable, nonprintable;
+};
+
+static void gather_stats(const char *buf, unsigned long size, struct text_stat *stats)
+{
+ unsigned long i;
+
+ memset(stats, 0, sizeof(*stats));
+
+ for (i = 0; i < size; i++) {
+ unsigned char c = buf[i];
+ if (c == '\r') {
+ stats->cr++;
+ if (i+1 < size && buf[i+1] == '\n')
+ stats->crlf++;
+ continue;
+ }
+ if (c == '\n') {
+ stats->lf++;
+ continue;
+ }
+ if (c == 127)
+ /* DEL */
+ stats->nonprintable++;
+ else if (c < 32) {
+ switch (c) {
+ /* BS, HT, ESC and FF */
+ case '\b': case '\t': case '\033': case '\014':
+ stats->printable++;
+ break;
+ default:
+ stats->nonprintable++;
+ }
+ }
+ else
+ stats->printable++;
+ }
+}
+
+/*
+ * The same heuristics as diff.c::mmfile_is_binary()
+ */
+static int is_binary(unsigned long size, struct text_stat *stats)
+{
+
+ if ((stats->printable >> 7) < stats->nonprintable)
+ return 1;
+ /*
+ * Other heuristics? Average line length might be relevant,
+ * as might LF vs CR vs CRLF counts..
+ *
+ * NOTE! It might be normal to have a low ratio of CRLF to LF
+ * (somebody starts with a LF-only file and edits it with an editor
+ * that adds CRLF only to lines that are added..). But do we
+ * want to support CR-only? Probably not.
+ */
+ return 0;
+}
+
+int convert_to_git(const char *path, char **bufp, unsigned long *sizep)
+{
+ char *buffer, *nbuf;
+ unsigned long size, nsize;
+ struct text_stat stats;
+
+ /*
+ * FIXME! Other pluggable conversions should go here,
+ * based on filename patterns. Right now we just do the
+ * stupid auto-CRLF one.
+ */
+ if (!auto_crlf)
+ return 0;
+
+ size = *sizep;
+ if (!size)
+ return 0;
+ buffer = *bufp;
+
+ gather_stats(buffer, size, &stats);
+
+ /* No CR? Nothing to convert, regardless. */
+ if (!stats.cr)
+ return 0;
+
+ /*
+ * We're currently not going to even try to convert stuff
+ * that has bare CR characters. Does anybody do that crazy
+ * stuff?
+ */
+ if (stats.cr != stats.crlf)
+ return 0;
+
+ /*
+ * And add some heuristics for binary vs text, of course...
+ */
+ if (is_binary(size, &stats))
+ return 0;
+
+ /*
+ * Ok, allocate a new buffer, fill it in, and return true
+ * to let the caller know that we switched buffers on it.
+ */
+ nsize = size - stats.crlf;
+ nbuf = xmalloc(nsize);
+ *bufp = nbuf;
+ *sizep = nsize;
+ do {
+ unsigned char c = *buffer++;
+ if (c != '\r')
+ *nbuf++ = c;
+ } while (--size);
+
+ return 1;
+}
+
+int convert_to_working_tree(const char *path, char **bufp, unsigned long *sizep)
+{
+ char *buffer, *nbuf;
+ unsigned long size, nsize;
+ struct text_stat stats;
+ unsigned char last;
+
+ /*
+ * FIXME! Other pluggable conversions should go here,
+ * based on filename patterns. Right now we just do the
+ * stupid auto-CRLF one.
+ */
+ if (!auto_crlf)
+ return 0;
+
+ size = *sizep;
+ if (!size)
+ return 0;
+ buffer = *bufp;
+
+ gather_stats(buffer, size, &stats);
+
+ /* No LF? Nothing to convert, regardless. */
+ if (!stats.lf)
+ return 0;
+
+ /* Was it already in CRLF format? */
+ if (stats.lf == stats.crlf)
+ return 0;
+
+ /* If we have any bare CR characters, we're not going to touch it */
+ if (stats.cr != stats.crlf)
+ return 0;
+
+ if (is_binary(size, &stats))
+ return 0;
+
+ /*
+ * Ok, allocate a new buffer, fill it in, and return true
+ * to let the caller know that we switched buffers on it.
+ */
+ nsize = size + stats.lf - stats.crlf;
+ nbuf = xmalloc(nsize);
+ *bufp = nbuf;
+ *sizep = nsize;
+ last = 0;
+ do {
+ unsigned char c = *buffer++;
+ if (c == '\n' && last != '\r')
+ *nbuf++ = '\r';
+ *nbuf++ = c;
+ last = c;
+ } while (--size);
+
+ return 1;
+}
diff --git a/diff.c b/diff.c
index 13b9b6c56..561587cac 100644
--- a/diff.c
+++ b/diff.c
@@ -1332,6 +1332,9 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
reuse_worktree_file(s->path, s->sha1, 0)) {
struct stat st;
int fd;
+ char *buf;
+ unsigned long size;
+
if (lstat(s->path, &st) < 0) {
if (errno == ENOENT) {
err_empty:
@@ -1364,7 +1367,19 @@ int diff_populate_filespec(struct diff_filespec *s, int size_only)
s->data = xmmap(NULL, s->size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
s->should_munmap = 1;
- /* FIXME! CRLF -> LF conversion goes here, based on "s->path" */
+
+ /*
+ * Convert from working tree format to canonical git format
+ */
+ buf = s->data;
+ size = s->size;
+ if (convert_to_git(s->path, &buf, &size)) {
+ munmap(s->data, s->size);
+ s->should_munmap = 0;
+ s->data = buf;
+ s->size = size;
+ s->should_free = 1;
+ }
}
else {
char type[20];
diff --git a/entry.c b/entry.c
index c2641ddef..472a9ef32 100644
--- a/entry.c
+++ b/entry.c
@@ -78,6 +78,9 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
path, sha1_to_hex(ce->sha1));
}
switch (ntohl(ce->ce_mode) & S_IFMT) {
+ char *buf;
+ unsigned long nsize;
+
case S_IFREG:
if (to_tempfile) {
strcpy(path, ".merge_file_XXXXXX");
@@ -89,7 +92,18 @@ static int write_entry(struct cache_entry *ce, char *path, struct checkout *stat
return error("git-checkout-index: unable to create file %s (%s)",
path, strerror(errno));
}
- /* FIXME: LF -> CRLF conversion goes here, based on "ce->name" */
+
+ /*
+ * Convert from git internal format to working tree format
+ */
+ buf = new;
+ nsize = size;
+ if (convert_to_working_tree(ce->name, &buf, &nsize)) {
+ free(new);
+ new = buf;
+ size = nsize;
+ }
+
wrote = write_in_full(fd, new, size);
close(fd);
free(new);
diff --git a/environment.c b/environment.c
index 54c22f824..2fa096041 100644
--- a/environment.c
+++ b/environment.c
@@ -28,6 +28,7 @@ size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
int pager_in_use;
int pager_use_color = 1;
+int auto_crlf = 0;
static const char *git_dir;
static char *git_object_dir, *git_index_file, *git_refs_dir, *git_graft_file;
diff --git a/sha1_file.c b/sha1_file.c
index 8ad7fad82..6ec67b292 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2082,7 +2082,7 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, con
{
unsigned long size = st->st_size;
void *buf;
- int ret;
+ int ret, re_allocated = 0;
buf = "";
if (size)
@@ -2091,11 +2091,30 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, con
if (!type)
type = blob_type;
- /* FIXME: CRLF -> LF conversion here for blobs! We'll need the path! */
+
+ /*
+ * Convert blobs to git internal format
+ */
+ if (!strcmp(type, blob_type)) {
+ unsigned long nsize = size;
+ char *nbuf = buf;
+ if (convert_to_git(NULL, &nbuf, &nsize)) {
+ if (size)
+ munmap(buf, size);
+ size = nsize;
+ buf = nbuf;
+ re_allocated = 1;
+ }
+ }
+
if (write_object)
ret = write_sha1_file(buf, size, type, sha1);
else
ret = hash_sha1_file(buf, size, type, sha1);
+ if (re_allocated) {
+ free(buf);
+ return ret;
+ }
if (size)
munmap(buf, size);
return ret;