diff options
author | Junio C Hamano <gitster@pobox.com> | 2012-07-13 15:37:51 -0700 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2012-07-13 15:37:51 -0700 |
commit | b856ad623e4f686815986c0b9341dd1bfd791e71 (patch) | |
tree | ddce49b4afdb7428c1cba49628020bd796704661 /compat | |
parent | 6a9aa0c9b224517db0549d9252fdbc5177e6c0e2 (diff) | |
parent | 76759c7dff53e8c84e975b88cb8245587c14c7ba (diff) | |
download | git-b856ad623e4f686815986c0b9341dd1bfd791e71.tar.gz git-b856ad623e4f686815986c0b9341dd1bfd791e71.tar.xz |
Merge branch 'tb/sanitize-decomposed-utf-8-pathname'
Teaches git to normalize pathnames read from readdir(3) and all
arguments from the command line into precomposed UTF-8 (assuming
that they come as decomposed UTF-8) to work around issues on Mac OS.
I think there still are other places that need conversion
(e.g. paths that are read from stdin for some commands), but this
should be a good first step in the right direction.
* tb/sanitize-decomposed-utf-8-pathname:
git on Mac OS and precomposed unicode
Diffstat (limited to 'compat')
-rw-r--r-- | compat/precompose_utf8.c | 190 | ||||
-rw-r--r-- | compat/precompose_utf8.h | 45 |
2 files changed, 235 insertions, 0 deletions
diff --git a/compat/precompose_utf8.c b/compat/precompose_utf8.c new file mode 100644 index 000000000..d40d1b380 --- /dev/null +++ b/compat/precompose_utf8.c @@ -0,0 +1,190 @@ +/* + * Converts filenames from decomposed unicode into precomposed unicode. + * Used on MacOS X. +*/ + + +#define PRECOMPOSE_UNICODE_C + +#include "cache.h" +#include "utf8.h" +#include "precompose_utf8.h" + +typedef char *iconv_ibp; +const static char *repo_encoding = "UTF-8"; +const static char *path_encoding = "UTF-8-MAC"; + + +static size_t has_utf8(const char *s, size_t maxlen, size_t *strlen_c) +{ + const uint8_t *utf8p = (const uint8_t*) s; + size_t strlen_chars = 0; + size_t ret = 0; + + if ((!utf8p) || (!*utf8p)) { + return 0; + } + + while((*utf8p) && maxlen) { + if (*utf8p & 0x80) + ret++; + strlen_chars++; + utf8p++; + maxlen--; + } + if (strlen_c) + *strlen_c = strlen_chars; + + return ret; +} + + +void probe_utf8_pathname_composition(char *path, int len) +{ + const static char *auml_nfc = "\xc3\xa4"; + const static char *auml_nfd = "\x61\xcc\x88"; + int output_fd; + if (precomposed_unicode != -1) + return; /* We found it defined in the global config, respect it */ + path[len] = 0; + strcpy(path + len, auml_nfc); + output_fd = open(path, O_CREAT|O_EXCL|O_RDWR, 0600); + if (output_fd >=0) { + close(output_fd); + path[len] = 0; + strcpy(path + len, auml_nfd); + /* Indicate to the user, that we can configure it to true */ + if (0 == access(path, R_OK)) + git_config_set("core.precomposeunicode", "false"); + /* To be backward compatible, set precomposed_unicode to 0 */ + precomposed_unicode = 0; + path[len] = 0; + strcpy(path + len, auml_nfc); + unlink(path); + } +} + + +void precompose_argv(int argc, const char **argv) +{ + int i = 0; + const char *oldarg; + char *newarg; + iconv_t ic_precompose; + + if (precomposed_unicode != 1) + return; + + ic_precompose = iconv_open(repo_encoding, path_encoding); + if (ic_precompose == (iconv_t) -1) + return; + + while (i < argc) { + size_t namelen; + oldarg = argv[i]; + if (has_utf8(oldarg, (size_t)-1, &namelen)) { + newarg = reencode_string_iconv(oldarg, namelen, ic_precompose); + if (newarg) + argv[i] = newarg; + } + i++; + } + iconv_close(ic_precompose); +} + + +PREC_DIR *precompose_utf8_opendir(const char *dirname) +{ + PREC_DIR *prec_dir = xmalloc(sizeof(PREC_DIR)); + prec_dir->dirent_nfc = xmalloc(sizeof(dirent_prec_psx)); + prec_dir->dirent_nfc->max_name_len = sizeof(prec_dir->dirent_nfc->d_name); + + prec_dir->dirp = opendir(dirname); + if (!prec_dir->dirp) { + free(prec_dir->dirent_nfc); + free(prec_dir); + return NULL; + } else { + int ret_errno = errno; + prec_dir->ic_precompose = iconv_open(repo_encoding, path_encoding); + /* if iconv_open() fails, die() in readdir() if needed */ + errno = ret_errno; + } + + return prec_dir; +} + +struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *prec_dir) +{ + struct dirent *res; + res = readdir(prec_dir->dirp); + if (res) { + size_t namelenz = strlen(res->d_name) + 1; /* \0 */ + size_t new_maxlen = namelenz; + + int ret_errno = errno; + + if (new_maxlen > prec_dir->dirent_nfc->max_name_len) { + size_t new_len = sizeof(dirent_prec_psx) + new_maxlen - + sizeof(prec_dir->dirent_nfc->d_name); + + prec_dir->dirent_nfc = xrealloc(prec_dir->dirent_nfc, new_len); + prec_dir->dirent_nfc->max_name_len = new_maxlen; + } + + prec_dir->dirent_nfc->d_ino = res->d_ino; + prec_dir->dirent_nfc->d_type = res->d_type; + + if ((precomposed_unicode == 1) && has_utf8(res->d_name, (size_t)-1, NULL)) { + if (prec_dir->ic_precompose == (iconv_t)-1) { + die("iconv_open(%s,%s) failed, but needed:\n" + " precomposed unicode is not supported.\n" + " If you wnat to use decomposed unicode, run\n" + " \"git config core.precomposeunicode false\"\n", + repo_encoding, path_encoding); + } else { + iconv_ibp cp = (iconv_ibp)res->d_name; + size_t inleft = namelenz; + char *outpos = &prec_dir->dirent_nfc->d_name[0]; + size_t outsz = prec_dir->dirent_nfc->max_name_len; + size_t cnt; + errno = 0; + cnt = iconv(prec_dir->ic_precompose, &cp, &inleft, &outpos, &outsz); + if (errno || inleft) { + /* + * iconv() failed and errno could be E2BIG, EILSEQ, EINVAL, EBADF + * MacOS X avoids illegal byte sequemces. + * If they occur on a mounted drive (e.g. NFS) it is not worth to + * die() for that, but rather let the user see the original name + */ + namelenz = 0; /* trigger strlcpy */ + } + } + } + else + namelenz = 0; + + if (!namelenz) + strlcpy(prec_dir->dirent_nfc->d_name, res->d_name, + prec_dir->dirent_nfc->max_name_len); + + errno = ret_errno; + return prec_dir->dirent_nfc; + } + return NULL; +} + + +int precompose_utf8_closedir(PREC_DIR *prec_dir) +{ + int ret_value; + int ret_errno; + ret_value = closedir(prec_dir->dirp); + ret_errno = errno; + if (prec_dir->ic_precompose != (iconv_t)-1) + iconv_close(prec_dir->ic_precompose); + free(prec_dir->dirent_nfc); + free(prec_dir); + errno = ret_errno; + return ret_value; +} diff --git a/compat/precompose_utf8.h b/compat/precompose_utf8.h new file mode 100644 index 000000000..3b73585fc --- /dev/null +++ b/compat/precompose_utf8.h @@ -0,0 +1,45 @@ +#ifndef PRECOMPOSE_UNICODE_H +#include <sys/stat.h> +#include <sys/types.h> +#include <dirent.h> +#include <iconv.h> + + +typedef struct dirent_prec_psx { + ino_t d_ino; /* Posix */ + size_t max_name_len; /* See below */ + unsigned char d_type; /* available on all systems git runs on */ + + /* + * See http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/dirent.h.html + * NAME_MAX + 1 should be enough, but some systems have + * NAME_MAX=255 and strlen(d_name) may return 508 or 510 + * Solution: allocate more when needed, see precompose_utf8_readdir() + */ + char d_name[NAME_MAX+1]; +} dirent_prec_psx; + + +typedef struct { + iconv_t ic_precompose; + DIR *dirp; + struct dirent_prec_psx *dirent_nfc; +} PREC_DIR; + +void precompose_argv(int argc, const char **argv); +void probe_utf8_pathname_composition(char *, int); + +PREC_DIR *precompose_utf8_opendir(const char *dirname); +struct dirent_prec_psx *precompose_utf8_readdir(PREC_DIR *dirp); +int precompose_utf8_closedir(PREC_DIR *dirp); + +#ifndef PRECOMPOSE_UNICODE_C +#define dirent dirent_prec_psx +#define opendir(n) precompose_utf8_opendir(n) +#define readdir(d) precompose_utf8_readdir(d) +#define closedir(d) precompose_utf8_closedir(d) +#define DIR PREC_DIR +#endif /* PRECOMPOSE_UNICODE_C */ + +#define PRECOMPOSE_UNICODE_H +#endif /* PRECOMPOSE_UNICODE_H */ |