aboutsummaryrefslogtreecommitdiff
path: root/convert.c
diff options
context:
space:
mode:
Diffstat (limited to 'convert.c')
-rw-r--r--convert.c131
1 files changed, 100 insertions, 31 deletions
diff --git a/convert.c b/convert.c
index 9a5612e93..8cd6222a2 100644
--- a/convert.c
+++ b/convert.c
@@ -13,6 +13,11 @@
* translation when the "text" attribute or "auto_crlf" option is set.
*/
+/* Stat bits: When BIN is set, the txt bits are unset */
+#define CONVERT_STAT_BITS_TXT_LF 0x1
+#define CONVERT_STAT_BITS_TXT_CRLF 0x2
+#define CONVERT_STAT_BITS_BIN 0x4
+
enum crlf_action {
CRLF_GUESS = -1,
CRLF_BINARY = 0,
@@ -75,26 +80,75 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
/*
* The same heuristics as diff.c::mmfile_is_binary()
+ * We treat files with bare CR as binary
*/
-static int is_binary(unsigned long size, struct text_stat *stats)
+static int convert_is_binary(unsigned long size, const struct text_stat *stats)
{
-
+ if (stats->cr != stats->crlf)
+ return 1;
if (stats->nul)
return 1;
if ((stats->printable >> 7) < stats->nonprintable)
return 1;
- /*
- * Other heuristics? Average line length might be relevant,
- * as might LF vs CR vs CRLF counts..
- *
- * NOTE! It might be normal to have a low ratio of CRLF to LF
- * (somebody starts with a LF-only file and edits it with an editor
- * that adds CRLF only to lines that are added..). But do we
- * want to support CR-only? Probably not.
- */
return 0;
}
+static unsigned int gather_convert_stats(const char *data, unsigned long size)
+{
+ struct text_stat stats;
+ if (!data || !size)
+ return 0;
+ gather_stats(data, size, &stats);
+ if (convert_is_binary(size, &stats))
+ return CONVERT_STAT_BITS_BIN;
+ else if (stats.crlf && stats.crlf == stats.lf)
+ return CONVERT_STAT_BITS_TXT_CRLF;
+ else if (stats.crlf && stats.lf)
+ return CONVERT_STAT_BITS_TXT_CRLF | CONVERT_STAT_BITS_TXT_LF;
+ else if (stats.lf)
+ return CONVERT_STAT_BITS_TXT_LF;
+ else
+ return 0;
+}
+
+static const char *gather_convert_stats_ascii(const char *data, unsigned long size)
+{
+ unsigned int convert_stats = gather_convert_stats(data, size);
+
+ if (convert_stats & CONVERT_STAT_BITS_BIN)
+ return "-text";
+ switch (convert_stats) {
+ case CONVERT_STAT_BITS_TXT_LF:
+ return "lf";
+ case CONVERT_STAT_BITS_TXT_CRLF:
+ return "crlf";
+ case CONVERT_STAT_BITS_TXT_LF | CONVERT_STAT_BITS_TXT_CRLF:
+ return "mixed";
+ default:
+ return "none";
+ }
+}
+
+const char *get_cached_convert_stats_ascii(const char *path)
+{
+ const char *ret;
+ unsigned long sz;
+ void *data = read_blob_data_from_cache(path, &sz);
+ ret = gather_convert_stats_ascii(data, sz);
+ free(data);
+ return ret;
+}
+
+const char *get_wt_convert_stats_ascii(const char *path)
+{
+ const char *ret = "";
+ struct strbuf sb = STRBUF_INIT;
+ if (strbuf_read_file(&sb, path, 0) >= 0)
+ ret = gather_convert_stats_ascii(sb.buf, sb.len);
+ strbuf_release(&sb);
+ return ret;
+}
+
static enum eol output_eol(enum crlf_action crlf_action)
{
switch (crlf_action) {
@@ -187,18 +241,7 @@ static int crlf_to_git(const char *path, const char *src, size_t len,
gather_stats(src, len, &stats);
if (crlf_action == CRLF_AUTO || crlf_action == CRLF_GUESS) {
- /*
- * We're currently not going to even try to convert stuff
- * that has bare CR characters. Does anybody do that crazy
- * stuff?
- */
- if (stats.cr != stats.crlf)
- return 0;
-
- /*
- * And add some heuristics for binary vs text, of course...
- */
- if (is_binary(len, &stats))
+ if (convert_is_binary(len, &stats))
return 0;
if (crlf_action == CRLF_GUESS) {
@@ -277,11 +320,7 @@ static int crlf_to_worktree(const char *path, const char *src, size_t len,
return 0;
}
- /* If we have any bare CR characters, we're not going to touch it */
- if (stats.cr != stats.crlf)
- return 0;
-
- if (is_binary(len, &stats))
+ if (convert_is_binary(len, &stats))
return 0;
}
@@ -356,9 +395,14 @@ static int filter_buffer_or_fd(int in, int out, void *data)
sigchain_push(SIGPIPE, SIG_IGN);
if (params->src) {
- write_err = (write_in_full(child_process.in, params->src, params->size) < 0);
+ write_err = (write_in_full(child_process.in,
+ params->src, params->size) < 0);
+ if (errno == EPIPE)
+ write_err = 0;
} else {
write_err = copy_fd(params->fd, child_process.in);
+ if (write_err == COPY_WRITE_ERROR && errno == EPIPE)
+ write_err = 0;
}
if (close(child_process.in))
@@ -390,7 +434,7 @@ static int apply_filter(const char *path, const char *src, size_t len, int fd,
struct async async;
struct filter_params params;
- if (!cmd)
+ if (!cmd || !*cmd)
return 0;
if (!dst)
@@ -772,6 +816,30 @@ int would_convert_to_git_filter_fd(const char *path)
return apply_filter(path, NULL, 0, -1, NULL, ca.drv->clean);
}
+const char *get_convert_attr_ascii(const char *path)
+{
+ struct conv_attrs ca;
+ enum crlf_action crlf_action;
+
+ convert_attrs(&ca, path);
+ crlf_action = input_crlf_action(ca.crlf_action, ca.eol_attr);
+ switch (crlf_action) {
+ case CRLF_GUESS:
+ return "";
+ case CRLF_BINARY:
+ return "-text";
+ case CRLF_TEXT:
+ return "text";
+ case CRLF_INPUT:
+ return "text eol=lf";
+ case CRLF_CRLF:
+ return "text=auto eol=crlf";
+ case CRLF_AUTO:
+ return "text=auto";
+ }
+ return "";
+}
+
int convert_to_git(const char *path, const char *src, size_t len,
struct strbuf *dst, enum safe_crlf checksafe)
{
@@ -1284,7 +1352,8 @@ static struct stream_filter *ident_filter(const unsigned char *sha1)
{
struct ident_filter *ident = xmalloc(sizeof(*ident));
- sprintf(ident->ident, ": %s $", sha1_to_hex(sha1));
+ xsnprintf(ident->ident, sizeof(ident->ident),
+ ": %s $", sha1_to_hex(sha1));
strbuf_init(&ident->left, 0);
ident->filter.vtbl = &ident_vtbl;
ident->state = 0;