diff options
author | Beat Bolli <dev+git@drbeat.li> | 2016-12-14 00:31:39 +0100 |
---|---|---|
committer | Junio C Hamano <gitster@pobox.com> | 2016-12-13 16:12:47 -0800 |
commit | f3eb54920eff356a30df144d50b45a5581e5eb13 (patch) | |
tree | 34be6ffc87b5ec37419a587fe00c0a613da39b08 /contrib | |
parent | 32c239d1fb23d47aa95cc8a47f99dc25f152021d (diff) | |
download | git-f3eb54920eff356a30df144d50b45a5581e5eb13.tar.gz git-f3eb54920eff356a30df144d50b45a5581e5eb13.tar.xz |
update_unicode.sh: move it into contrib/update-unicode
As it's used only by a tiny minority of the Git developer population,
this script does not belong into the main Git source directory.
Move it into contrib/ and adjust the paths to account for the new
location.
Signed-off-by: Beat Bolli <dev+git@drbeat.li>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/update-unicode/.gitignore | 3 | ||||
-rw-r--r-- | contrib/update-unicode/README | 20 | ||||
-rwxr-xr-x | contrib/update-unicode/update_unicode.sh | 38 |
3 files changed, 61 insertions, 0 deletions
diff --git a/contrib/update-unicode/.gitignore b/contrib/update-unicode/.gitignore new file mode 100644 index 000000000..b0ebc6aad --- /dev/null +++ b/contrib/update-unicode/.gitignore @@ -0,0 +1,3 @@ +uniset/ +UnicodeData.txt +EastAsianWidth.txt diff --git a/contrib/update-unicode/README b/contrib/update-unicode/README new file mode 100644 index 000000000..b9e2fc854 --- /dev/null +++ b/contrib/update-unicode/README @@ -0,0 +1,20 @@ +TL;DR: Run update_unicode.sh after the publication of a new Unicode +standard and commit the resulting unicode_widths.h file. + +The long version +================ + +The Git source code ships the file unicode_widths.h which contains +tables of zero and double width Unicode code points, respectively. +These tables are generated using update_unicode.sh in this directory. +update_unicode.sh itself uses a third-party tool, uniset, to query two +Unicode data files for the interesting code points. + +On first run, update_unicode.sh clones uniset from Github and builds it. +This requires a current-ish version of autoconf (2.69 works per December +2016). + +On each run, update_unicode.sh checks whether more recent Unicode data +files are available from the Unicode consortium, and rebuilds the header +unicode_widths.h with the new data. The new header can then be +committed. diff --git a/contrib/update-unicode/update_unicode.sh b/contrib/update-unicode/update_unicode.sh new file mode 100755 index 000000000..7b901266c --- /dev/null +++ b/contrib/update-unicode/update_unicode.sh @@ -0,0 +1,38 @@ +#!/bin/sh +#See http://www.unicode.org/reports/tr44/ +# +#Me Enclosing_Mark an enclosing combining mark +#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width) +#Cf Format a format control character +# +cd "$(dirname "$0")" +UNICODEWIDTH_H=$(git rev-parse --show-toplevel)/unicode_width.h +( + if ! test -f UnicodeData.txt; then + wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt + fi && + if ! test -f EastAsianWidth.txt; then + wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt + fi && + if ! test -d uniset; then + git clone https://github.com/depp/uniset.git + fi && + ( + cd uniset && + if ! test -x uniset; then + autoreconf -i && + ./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb' + fi && + make + ) && + UNICODE_DIR=. && export UNICODE_DIR && + cat >$UNICODEWIDTH_H <<-EOF + static const struct interval zero_width[] = { + $(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD | + grep -v plane) + }; + static const struct interval double_width[] = { + $(uniset/uniset --32 eaw:F,W) + }; + EOF +) |