Merge branch 'bb/unicode-9.0'

The character width table has been updated to match Unicode 9.0 * bb/unicode-9.0: unicode_width.h: update the width tables to Unicode 9.0 update_unicode.sh: remove the plane filter update_unicode.sh: automatically download newer definition files update_unicode.sh: pin the uniset repo to a known good commit update_unicode.sh: remove an unnecessary subshell level update_unicode.sh: move it into contrib/update-unicode
author: Junio C Hamano <gitster@pobox.com> 2016-12-19 14:45:36 -0800
committer: Junio C Hamano <gitster@pobox.com> 2016-12-19 14:45:36 -0800
commit: 0cfdda3479443c2c9282547796b59c4e727b5343 (patch)
tree: 8790c24cd2b9a4c51748d624a470b719d6c9e6d1 /contrib
parent: 3da9366eb086f8cdfc9328c3638d67a01ea69cc9 (diff)
parent: 9e6e9aefdfd4b0625900b65c825a4149b1b834e1 (diff)
download: git-0cfdda3479443c2c9282547796b59c4e727b5343.tar.gz
git-0cfdda3479443c2c9282547796b59c4e727b5343.tar.xz
3 files changed, 56 insertions, 0 deletions
diff --git a/contrib/update-unicode/.gitignore b/contrib/update-unicode/.gitignore
new file mode 100644
index 000000000..b0ebc6aad
--- /dev/null
+++ b/contrib/update-unicode/.gitignore
@@ -0,0 +1,3 @@
+uniset/
+UnicodeData.txt
+EastAsianWidth.txt
diff --git a/contrib/update-unicode/README b/contrib/update-unicode/README
new file mode 100644
index 000000000..b9e2fc854
--- /dev/null
+++ b/contrib/update-unicode/README
@@ -0,0 +1,20 @@
+TL;DR: Run update_unicode.sh after the publication of a new Unicode
+standard and commit the resulting unicode_widths.h file.
+
+The long version
+================
+
+The Git source code ships the file unicode_widths.h which contains
+tables of zero and double width Unicode code points, respectively.
+These tables are generated using update_unicode.sh in this directory.
+update_unicode.sh itself uses a third-party tool, uniset, to query two
+Unicode data files for the interesting code points.
+
+On first run, update_unicode.sh clones uniset from Github and builds it.
+This requires a current-ish version of autoconf (2.69 works per December
+2016).
+
+On each run, update_unicode.sh checks whether more recent Unicode data
+files are available from the Unicode consortium, and rebuilds the header
+unicode_widths.h with the new data. The new header can then be
+committed.
diff --git a/contrib/update-unicode/update_unicode.sh b/contrib/update-unicode/update_unicode.sh
new file mode 100755
index 000000000..e05db92d3
--- /dev/null
+++ b/contrib/update-unicode/update_unicode.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+#See http://www.unicode.org/reports/tr44/
+#
+#Me Enclosing_Mark  an enclosing combining mark
+#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
+#Cf Format          a format control character
+#
+cd "$(dirname "$0")"
+UNICODEWIDTH_H=$(git rev-parse --show-toplevel)/unicode_width.h
+
+wget -N http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt \
+	http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt &&
+if ! test -d uniset; then
+	git clone https://github.com/depp/uniset.git &&
+	( cd uniset && git checkout 4b186196dd )
+fi &&
+(
+	cd uniset &&
+	if ! test -x uniset; then
+		autoreconf -i &&
+		./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
+	fi &&
+	make
+) &&
+UNICODE_DIR=. && export UNICODE_DIR &&
+cat >$UNICODEWIDTH_H <<-EOF
+static const struct interval zero_width[] = {
+	$(uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD)
+};
+static const struct interval double_width[] = {
+	$(uniset/uniset --32 eaw:F,W)
+};
+EOF
author	Junio C Hamano <gitster@pobox.com>	2016-12-19 14:45:36 -0800
committer	Junio C Hamano <gitster@pobox.com>	2016-12-19 14:45:36 -0800
commit	0cfdda3479443c2c9282547796b59c4e727b5343 (patch)
tree	8790c24cd2b9a4c51748d624a470b719d6c9e6d1 /contrib
parent	3da9366eb086f8cdfc9328c3638d67a01ea69cc9 (diff)
parent	9e6e9aefdfd4b0625900b65c825a4149b1b834e1 (diff)
download	git-0cfdda3479443c2c9282547796b59c4e727b5343.tar.gz git-0cfdda3479443c2c9282547796b59c4e727b5343.tar.xz