author Bastien Orivel <eijebong@bananium.fr>

Mon, 21 May 2018 22:34:18 +0200

changeset 797864 59217bdcab741f23bb5ec851c7d9bbae07c47224

parent 797863 15296312f69bdd293c1a0686a7b0b7e8ad86c5b8

push id 110608

push user bmo:eijebong@bananium.fr

push date Mon, 21 May 2018 21:16:37 +0000

reviewers ato, jgraham

bugs 1463251

milestone 62.0a1

third_party/rust/memchr-1.0.2/.cargo-checksum.json file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/.travis.yml file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/COPYING file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/Cargo.toml file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/LICENSE-MIT file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/Makefile file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/README.md file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/UNLICENSE file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/appveyor.yml file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/benches/bench.rs file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/ctags.rust file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/session.vim file | annotate | diff | comparison | revisions

third_party/rust/memchr-1.0.2/src/lib.rs file | annotate | diff | comparison | revisions

third_party/rust/memchr/.cargo-checksum.json file | annotate | diff | comparison | revisions

third_party/rust/memchr/Cargo.toml file | annotate | diff | comparison | revisions

third_party/rust/memchr/src/lib.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/.cargo-checksum.json file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/.travis.yml file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/CHANGELOG.md file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/Cargo.toml file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/HACKING.md file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/LICENSE-APACHE file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/LICENSE-MIT file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/PERFORMANCE.md file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/README.md file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/appveyor.yml file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/ci/after_success.sh file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/ci/run-kcov file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/ci/run-shootout-test file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/ci/script.sh file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/bug347.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/regexdna-input.txt file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/regexdna-output.txt file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/shootout-regex-dna-bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/shootout-regex-dna-cheat.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/shootout-regex-dna-replace.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/shootout-regex-dna-single-cheat.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/shootout-regex-dna-single.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/shootout-regex-dna.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/shootout-regex-redux-1.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/shootout-regex-redux-chunked.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/examples/shootout-regex-redux.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/scripts/frequencies.py file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/scripts/regex-match-tests.py file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/scripts/unicode.py file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/backtrack.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/compile.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/dfa.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/error.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/exec.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/expand.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/freqs.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/input.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/lib.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/literals.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/pattern.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/pikevm.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/prog.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/re_builder.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/re_bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/re_plugin.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/re_set.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/re_trait.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/re_unicode.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/simd_accel/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/simd_accel/teddy128.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/simd_fallback/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/simd_fallback/teddy128.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/sparse.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/testdata/LICENSE file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/testdata/README file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/testdata/basic.dat file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/testdata/nullsubexpr.dat file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/testdata/repetition.dat file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/src/utf8.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/api.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/api_str.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/crazy.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/flags.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/fowler.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/macros.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/macros_bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/macros_str.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/misc.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/multiline.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/noparse.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/plugin.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/regression.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/replace.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/searcher.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/set.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/shortest_match.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/suffix_reverse.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/test_backtrack.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/test_backtrack_bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/test_backtrack_utf8bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/test_default.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/test_default_bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/test_nfa.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/test_nfa_bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/test_nfa_utf8bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/test_plugin.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/unicode.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/word_boundary.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/word_boundary_ascii.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-0.2.2/tests/word_boundary_unicode.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax-0.4.1/.cargo-checksum.json file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax-0.4.1/Cargo.toml file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax-0.4.1/src/lib.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax-0.4.1/src/literals.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax-0.4.1/src/parser.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax-0.4.1/src/properties.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax-0.4.1/src/unicode.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/.cargo-checksum.json file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/Cargo.toml file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/LICENSE-APACHE file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/LICENSE-MIT file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/benches/bench.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/ast/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/ast/parse.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/ast/print.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/ast/visitor.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/either.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/error.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/hir/interval.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/hir/literal/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/hir/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/hir/print.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/hir/translate.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/hir/visitor.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/lib.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/literals.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/parser.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/properties.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/age.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/case_folding_simple.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/general_category.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/perl_word.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/property_bool.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/property_names.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/property_values.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/script.rs file | annotate | diff | comparison | revisions

third_party/rust/regex-syntax/src/unicode_tables/script_extension.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/.cargo-checksum.json file | annotate | diff | comparison | revisions

third_party/rust/regex/.travis.yml file | annotate | diff | comparison | revisions

third_party/rust/regex/CHANGELOG.md file | annotate | diff | comparison | revisions

third_party/rust/regex/Cargo.toml file | annotate | diff | comparison | revisions

third_party/rust/regex/HACKING.md file | annotate | diff | comparison | revisions

third_party/rust/regex/PERFORMANCE.md file | annotate | diff | comparison | revisions

third_party/rust/regex/README.md file | annotate | diff | comparison | revisions

third_party/rust/regex/UNICODE.md file | annotate | diff | comparison | revisions

third_party/rust/regex/appveyor.yml file | annotate | diff | comparison | revisions

third_party/rust/regex/build.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/ci/after_success.sh file | annotate | diff | comparison | revisions

third_party/rust/regex/ci/run-kcov file | annotate | diff | comparison | revisions

third_party/rust/regex/ci/script.sh file | annotate | diff | comparison | revisions

third_party/rust/regex/examples/bug347.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/examples/shootout-regex-dna-bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/examples/shootout-regex-dna-single.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/examples/shootout-regex-dna.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/examples/shootout-regex-redux-1.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/examples/shootout-regex-redux-chunked.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/examples/shootout-regex-redux.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/scripts/unicode.py file | annotate | diff | comparison | revisions

third_party/rust/regex/src/backtrack.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/compile.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/dfa.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/error.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/exec.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/expand.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/input.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/lib.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/literal/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/literal/teddy_avx2/fallback.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/literal/teddy_avx2/imp.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/literal/teddy_avx2/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/literal/teddy_ssse3/fallback.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/literal/teddy_ssse3/imp.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/literal/teddy_ssse3/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/literals.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/pikevm.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/prog.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/re_builder.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/re_bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/re_plugin.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/re_set.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/re_trait.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/re_unicode.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/simd_accel/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/simd_accel/teddy128.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/simd_fallback/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/simd_fallback/teddy128.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/utf8.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/vector/avx2.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/vector/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/src/vector/ssse3.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/api_str.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/crazy.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/macros.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/macros_bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/macros_str.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/noparse.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/plugin.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/regression.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/replace.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/test_default.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/test_default_bytes.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/test_plugin.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/unicode.rs file | annotate | diff | comparison | revisions

third_party/rust/regex/tests/word_boundary_unicode.rs file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/.cargo-checksum.json file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/Cargo.toml file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/LICENSE-APACHE file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/LICENSE-MIT file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/README.md file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/src/hangul.rs file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/src/ideograph.rs file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/src/lib.rs file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/src/name.rs file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/src/property.rs file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/src/unicode_tables/jamo_short_name.rs file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/src/unicode_tables/mod.rs file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/src/unicode_tables/property_names.rs file | annotate | diff | comparison | revisions

third_party/rust/ucd-util/src/unicode_tables/property_values.rs file | annotate | diff | comparison | revisions
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/.travis.yml
@@ -0,0 +1,14 @@
+language: rust
+rust:
+  - 1.12.0
+  - stable
+  - beta
+  - nightly
+script:
+  - cargo build --verbose --no-default-features
+  - cargo build --verbose
+  - cargo test --verbose
+  - cargo doc
+  - if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
+      cargo bench --verbose;
+    fi
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/COPYING
@@ -0,0 +1,3 @@
+This project is dual-licensed under the Unlicense and MIT licenses.
+
+You may use this code under the terms of either license.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/Makefile
@@ -0,0 +1,14 @@
+all:
+	echo Nothing to do...
+
+ctags:
+	ctags --recurse --options=ctags.rust --languages=Rust
+
+docs:
+	cargo doc
+	in-dir ./target/doc fix-perms
+	rscp ./target/doc/* gopher:~/www/burntsushi.net/rustdoc/
+
+push:
+	git push origin master
+	git push github master
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/README.md
@@ -0,0 +1,36 @@
+This crate provides a safe interface `libc`'s `memchr` and `memrchr`.
+This crate also provides fallback implementations when either function is
+unavailable.
+
+[![Build status](https://api.travis-ci.org/BurntSushi/rust-memchr.png)](https://travis-ci.org/BurntSushi/rust-memchr)
+[![Build status](https://ci.appveyor.com/api/projects/status/8i9484t8l4w7uql0/branch/master?svg=true)](https://ci.appveyor.com/project/BurntSushi/rust-memchr/branch/master)
+[![](http://meritbadge.herokuapp.com/memchr)](https://crates.io/crates/memchr)
+
+Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
+
+
+### Documentation
+
+[https://docs.rs/memchr](https://docs.rs/memchr)
+
+### no_std
+
+memchr links to the standard library by default, but you can disable the
+`use_std` feature if you want to use it in a `#![no_std]` crate:
+
+```toml
+[dependencies]
+memchr = { version = "1.0", default-features = false }
+```
+
+### Performance
+
+On my system (Linux/amd64), `memchr` is about an order of magnitude faster than
+the more idiomatic `haystack.iter().position(|&b| b == needle)`:
+
+```
+test iterator          ... bench:       5,280 ns/iter (+/- 13) = 1893 MB/s
+test iterator_reversed ... bench:       5,271 ns/iter (+/- 7) = 1897 MB/s
+test libc_memchr       ... bench:         202 ns/iter (+/- 0) = 49504 MB/s
+test libc_memrchr      ... bench:         197 ns/iter (+/- 1) = 50761 MB/s
+```
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/UNLICENSE
@@ -0,0 +1,24 @@
+This is free and unencumbered software released into the public domain.
+
+Anyone is free to copy, modify, publish, use, compile, sell, or
+distribute this software, either in source code form or as a compiled
+binary, for any purpose, commercial or non-commercial, and by any
+means.
+
+In jurisdictions that recognize copyright laws, the author or authors
+of this software dedicate any and all copyright interest in the
+software to the public domain. We make this dedication for the benefit
+of the public at large and to the detriment of our heirs and
+successors. We intend this dedication to be an overt act of
+relinquishment in perpetuity of all present and future rights to this
+software under copyright law.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+
+For more information, please refer to <http://unlicense.org/>
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/appveyor.yml
@@ -0,0 +1,19 @@
+environment:
+  matrix:
+  - TARGET: x86_64-pc-windows-msvc
+  - TARGET: i686-pc-windows-msvc
+  - TARGET: i686-pc-windows-gnu
+install:
+  - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe"
+  - rust-nightly-%TARGET%.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
+  - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin
+  - SET PATH=%PATH%;C:\MinGW\bin
+  - rustc -V
+  - cargo -V
+
+build: false
+
+test_script:
+  - cargo build --verbose
+  - cargo test --verbose
+  - cargo bench --verbose
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/benches/bench.rs
@@ -0,0 +1,117 @@
+#![feature(test)]
+
+extern crate memchr;
+extern crate test;
+
+use std::iter;
+
+fn bench_data() -> Vec<u8> { iter::repeat(b'z').take(10000).collect() }
+
+#[bench]
+fn iterator_memchr(b: &mut test::Bencher) {
+    let haystack = bench_data();
+    let needle = b'a';
+    b.iter(|| {
+        assert!(haystack.iter().position(|&b| b == needle).is_none());
+    });
+    b.bytes = haystack.len() as u64;
+}
+
+#[bench]
+fn optimized_memchr(b: &mut test::Bencher) {
+    let haystack = bench_data();
+    let needle = b'a';
+    b.iter(|| {
+        assert!(memchr::memchr(needle, &haystack).is_none());
+    });
+    b.bytes = haystack.len() as u64;
+}
+
+#[bench]
+fn iterator_memrchr(b: &mut test::Bencher) {
+    let haystack = bench_data();
+    let needle = b'a';
+    b.iter(|| {
+        assert!(haystack.iter().rposition(|&b| b == needle).is_none());
+    });
+    b.bytes = haystack.len() as u64;
+}
+
+#[bench]
+fn optimized_memrchr(b: &mut test::Bencher) {
+    let haystack = bench_data();
+    let needle = b'a';
+    b.iter(|| {
+        assert!(memchr::memrchr(needle, &haystack).is_none());
+    });
+    b.bytes = haystack.len() as u64;
+}
+
+#[bench]
+fn iterator_memchr2(b: &mut test::Bencher) {
+    let haystack = bench_data();
+    let (needle1, needle2) = (b'a', b'b');
+    b.iter(|| {
+        assert!(haystack.iter().position(|&b| {
+            b == needle1 || b == needle2
+        }).is_none());
+    });
+    b.bytes = haystack.len() as u64;
+}
+
+#[bench]
+fn manual_memchr2(b: &mut test::Bencher) {
+    fn find_singles(
+        sparse: &[bool],
+        text: &[u8],
+    ) -> Option<(usize, usize)> {
+        for (hi, &b) in text.iter().enumerate() {
+            if sparse[b as usize] {
+                return Some((hi, hi+1));
+            }
+        }
+        None
+    }
+
+    let haystack = bench_data();
+    let mut sparse = vec![false; 256];
+    sparse[b'a' as usize] = true;
+    sparse[b'b' as usize] = true;
+    b.iter(|| {
+        assert!(find_singles(&sparse, &haystack).is_none());
+    });
+    b.bytes = haystack.len() as u64;
+}
+
+#[bench]
+fn optimized_memchr2(b: &mut test::Bencher) {
+    let haystack = bench_data();
+    let (needle1, needle2) = (b'a', b'b');
+    b.iter(|| {
+        assert!(memchr::memchr2(needle1, needle2, &haystack).is_none());
+    });
+    b.bytes = haystack.len() as u64;
+}
+
+#[bench]
+fn iterator_memchr3(b: &mut test::Bencher) {
+    let haystack = bench_data();
+    let (needle1, needle2, needle3) = (b'a', b'b', b'c');
+    b.iter(|| {
+        assert!(haystack.iter().position(|&b| {
+            b == needle1 || b == needle2 || b == needle3
+        }).is_none());
+    });
+    b.bytes = haystack.len() as u64;
+}
+
+#[bench]
+fn optimized_memchr3(b: &mut test::Bencher) {
+    let haystack = bench_data();
+    let (needle1, needle2, needle3) = (b'a', b'b', b'c');
+    b.iter(|| {
+        assert!(memchr::memchr3(
+            needle1, needle2, needle3, &haystack).is_none());
+    });
+    b.bytes = haystack.len() as u64;
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/ctags.rust
@@ -0,0 +1,11 @@
+--langdef=Rust
+--langmap=Rust:.rs
+--regex-Rust=/^[ \t]*(#\[[^\]]\][ \t]*)*(pub[ \t]+)?(extern[ \t]+)?("[^"]+"[ \t]+)?(unsafe[ \t]+)?fn[ \t]+([a-zA-Z0-9_]+)/\6/f,functions,function definitions/
+--regex-Rust=/^[ \t]*(pub[ \t]+)?type[ \t]+([a-zA-Z0-9_]+)/\2/T,types,type definitions/
+--regex-Rust=/^[ \t]*(pub[ \t]+)?enum[ \t]+([a-zA-Z0-9_]+)/\2/g,enum,enumeration names/
+--regex-Rust=/^[ \t]*(pub[ \t]+)?struct[ \t]+([a-zA-Z0-9_]+)/\2/s,structure names/
+--regex-Rust=/^[ \t]*(pub[ \t]+)?mod[ \t]+([a-zA-Z0-9_]+)/\2/m,modules,module names/
+--regex-Rust=/^[ \t]*(pub[ \t]+)?static[ \t]+([a-zA-Z0-9_]+)/\2/c,consts,static constants/
+--regex-Rust=/^[ \t]*(pub[ \t]+)?trait[ \t]+([a-zA-Z0-9_]+)/\2/t,traits,traits/
+--regex-Rust=/^[ \t]*(pub[ \t]+)?impl([ \t\n]+<.*>)?[ \t]+([a-zA-Z0-9_]+)/\3/i,impls,trait implementations/
+--regex-Rust=/^[ \t]*macro_rules![ \t]+([a-zA-Z0-9_]+)/\1/d,macros,macro definitions/
new file mode 100644
--- /dev/null
+++ b/third_party/rust/memchr-1.0.2/session.vim
@@ -0,0 +1,1 @@
+au BufWritePost *.rs silent!make ctags > /dev/null 2>&1
--- a/third_party/rust/memchr/.cargo-checksum.json
+++ b/third_party/rust/memchr/.cargo-checksum.json
@@ -1,1 +1,1 @@
-{"files":{".travis.yml":"7c44542bc6662b5af4f15a98b2bcf052a57190922eb280bce93688785c31e323","COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"38694643a2d98dc66c1ea8583b9ee49210e8d92205835e43d9a8cc3216e680fe","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","Makefile":"a45a128685a2ae7d4fa39d310786674417ee113055ef290a11f88002285865fc","README.md":"74e385c51a2402527a61a500d66e509fea97961f15bfffab85040064e576fe31","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","appveyor.yml":"b5c1a28f805854370f24e530df912764a9520f4581b33da090f44cec0eef181c","benches/bench.rs":"87cfb76154c3c322691201c6f5649b37665ed8bf1cf303bca971309a4eef6b61","ctags.rust":"3d128d3cc59f702e68953ba2fe6c3f46bc6991fc575308db060482d5da0c79f3","session.vim":"95cb1d7caf0ff7fbe76ec911988d908ddd883381c925ba64b537695bc9f021c4","src/lib.rs":"98c86c86fd996455d7ec94bdfdcedd3ded3b2a7016480d3474808cfe36d00a63"},"package":"148fab2e51b4f1cfc66da2a7c32981d1d3c083a803978268bb11fe4b86925e7a"}
\ No newline at end of file
+{"files":{".travis.yml":"7c44542bc6662b5af4f15a98b2bcf052a57190922eb280bce93688785c31e323","COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"b37fbdbb466bbb057f4d715381c45ab26e37cdc469b97d901abdcb4b44733fc1","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","Makefile":"a45a128685a2ae7d4fa39d310786674417ee113055ef290a11f88002285865fc","README.md":"74e385c51a2402527a61a500d66e509fea97961f15bfffab85040064e576fe31","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","appveyor.yml":"b5c1a28f805854370f24e530df912764a9520f4581b33da090f44cec0eef181c","benches/bench.rs":"87cfb76154c3c322691201c6f5649b37665ed8bf1cf303bca971309a4eef6b61","ctags.rust":"3d128d3cc59f702e68953ba2fe6c3f46bc6991fc575308db060482d5da0c79f3","session.vim":"95cb1d7caf0ff7fbe76ec911988d908ddd883381c925ba64b537695bc9f021c4","src/lib.rs":"bd483dd7732610710f592861a77c733a321600267cf0a8237b5ac1b05d5e3c20"},"package":"796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d"}
\ No newline at end of file
--- a/third_party/rust/memchr/Cargo.toml
+++ b/third_party/rust/memchr/Cargo.toml
@@ -7,17 +7,17 @@
 #
 # If you believe there's an error in this file please file an
 # issue against the rust-lang/cargo repository. If you're
 # editing this file be aware that the upstream Cargo.toml
 # will likely look very different (and much more reasonable)
 
 [package]
 name = "memchr"
-version = "1.0.2"
+version = "2.0.1"
 authors = ["Andrew Gallant <jamslam@gmail.com>", "bluss"]
 description = "Safe interface to memchr."
 homepage = "https://github.com/BurntSushi/rust-memchr"
 documentation = "https://docs.rs/memchr/"
 readme = "README.md"
 keywords = ["memchr", "char", "scan", "strchr", "string"]
 license = "Unlicense/MIT"
 repository = "https://github.com/BurntSushi/rust-memchr"
@@ -27,17 +27,18 @@ opt-level = 3
 [lib]
 name = "memchr"
 bench = false
 [dependencies.libc]
 version = "0.2.18"
 optional = true
 default-features = false
 [dev-dependencies.quickcheck]
-version = "0.4.1"
+version = "0.5"
+default-features = false
 
 [features]
 default = ["use_std", "libc"]
 use_std = ["libc", "libc/use_std"]
 [badges.appveyor]
 repository = "BurntSushi/rust-memchr"
 
 [badges.travis-ci]
--- a/third_party/rust/memchr/src/lib.rs
+++ b/third_party/rust/memchr/src/lib.rs
@@ -1,28 +1,33 @@
 /*!
-This crate defines two functions, `memchr` and `memrchr`, which expose a safe interface
-to the corresponding functions in `libc`.
+This crate defines two functions, `memchr` and `memrchr`, which expose a safe
+interface to the corresponding functions in `libc`.
 */
 
 #![deny(missing_docs)]
 #![allow(unused_imports)]
+#![doc(html_root_url = "https://docs.rs/memchr/2.0.0")]
 
 #![cfg_attr(not(feature = "use_std"), no_std)]
 
 #[cfg(all(test, not(feature = "use_std")))]
 #[macro_use]
 extern crate std;
 
-#[cfg(feature = "libc")]
+#[cfg(all(feature = "libc", not(target_arch = "wasm32")))]
 extern crate libc;
 
-#[cfg(feature = "libc")]
+#[macro_use]
+#[cfg(test)]
+extern crate quickcheck;
+
+#[cfg(all(feature = "libc", not(target_arch = "wasm32")))]
 use libc::c_void;
-#[cfg(feature = "libc")]
+#[cfg(all(feature = "libc", not(target_arch = "wasm32")))]
 use libc::{c_int, size_t};
 
 #[cfg(feature = "use_std")]
 use std::cmp;
 #[cfg(not(feature = "use_std"))]
 use core::cmp;
 
 const LO_U64: u64 = 0x0101010101010101;
@@ -61,16 +66,46 @@ fn repeat_byte(b: u8) -> usize {
 #[inline]
 fn repeat_byte(b: u8) -> usize {
     let mut rep = (b as usize) << 8 | b as usize;
     rep = rep << 16 | rep;
     rep = rep << 32 | rep;
     rep
 }
 
+macro_rules! iter_next {
+    // Common code for the memchr iterators:
+    // update haystack and position and produce the index
+    //
+    // self: &mut Self where Self is the iterator
+    // search_result: Option<usize> which is the result of the corresponding
+    // memchr function.
+    //
+    // Returns Option<usize> (the next iterator element)
+    ($self_:expr, $search_result:expr) => {
+        $search_result.map(move |index| {
+            // split and take the remaining back half
+            $self_.haystack = $self_.haystack.split_at(index + 1).1;
+            let found_position = $self_.position + index;
+            $self_.position = found_position + 1;
+            found_position
+        })
+    }
+}
+
+macro_rules! iter_next_back {
+    ($self_:expr, $search_result:expr) => {
+        $search_result.map(move |index| {
+            // split and take the remaining front half
+            $self_.haystack = $self_.haystack.split_at(index).0;
+            $self_.position + index
+        })
+    }
+}
+
 /// An iterator for memchr
 pub struct Memchr<'a> {
     needle: u8,
     // The haystack to iterate over
     haystack: &'a [u8],
     // The index
     position: usize,
 }
@@ -85,40 +120,27 @@ impl<'a> Memchr<'a> {
         }
     }
 }
 
 impl<'a> Iterator for Memchr<'a> {
     type Item = usize;
 
     fn next(&mut self) -> Option<usize> {
-        let search_result = memchr(self.needle, &self.haystack);
-        match search_result {
-            Some(index) => {
-                // Move our internal position
-                self.haystack = self.haystack.split_at(index + 1).1;
-                self.position = self.position + index + 1;
-                Some(self.position)
-            }
-            None => None,
-        }
+        iter_next!(self, memchr(self.needle, &self.haystack))
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (0, Some(self.haystack.len()))
     }
 }
 
 impl<'a> DoubleEndedIterator for Memchr<'a> {
     fn next_back(&mut self) -> Option<Self::Item> {
-        let search_result = memrchr(self.needle, &self.haystack);
-        match search_result {
-            Some(index) => {
-                // Move our internal position
-                self.haystack = self.haystack.split_at(index).0;
-                Some(self.position + index + 1)
-            }
-            None => None,
-        }
+        iter_next_back!(self, memrchr(self.needle, &self.haystack))
     }
 }
 
 /// A safe interface to `memchr`.
 ///
 /// Returns the index corresponding to the first occurrence of `needle` in
 /// `haystack`, or `None` if one is not found.
 ///
@@ -135,16 +157,17 @@ impl<'a> DoubleEndedIterator for Memchr<
 ///
 /// let haystack = b"the quick brown fox";
 /// assert_eq!(memchr(b'k', haystack), Some(8));
 /// ```
 #[inline(always)] // reduces constant overhead
 pub fn memchr(needle: u8, haystack: &[u8]) -> Option<usize> {
     // libc memchr
     #[cfg(all(feature = "libc",
+              not(target_arch = "wasm32"),
               any(not(target_os = "windows"),
                   not(any(target_pointer_width = "32",
                           target_pointer_width = "64")))))]
     #[inline(always)] // reduces constant overhead
     fn memchr_specific(needle: u8, haystack: &[u8]) -> Option<usize> {
         use libc::memchr as libc_memchr;
 
         let p = unsafe {
@@ -155,17 +178,18 @@ pub fn memchr(needle: u8, haystack: &[u8
         if p.is_null() {
             None
         } else {
             Some(p as usize - (haystack.as_ptr() as usize))
         }
     }
 
     // use fallback on windows, since it's faster
-    #[cfg(all(any(not(feature = "libc"), target_os = "windows"),
+    // use fallback on wasm32, since it doesn't have libc
+    #[cfg(all(any(not(feature = "libc"), target_os = "windows", target_arch = "wasm32"),
               any(target_pointer_width = "32",
                   target_pointer_width = "64")))]
     fn memchr_specific(needle: u8, haystack: &[u8]) -> Option<usize> {
         fallback::memchr(needle, haystack)
     }
 
     // For the rare case of neither 32 bit nor 64-bit platform.
     #[cfg(all(any(not(feature = "libc"), target_os = "windows"),
@@ -253,26 +277,21 @@ impl<'a> Memchr2<'a> {
         }
     }
 }
 
 impl<'a> Iterator for Memchr2<'a> {
     type Item = usize;
 
     fn next(&mut self) -> Option<usize> {
-        let search_result = memchr2(self.needle1, self.needle2, &self.haystack);
-        match search_result {
-            Some(index) => {
-                // Move our internal position
-                self.haystack = self.haystack.split_at(index + 1).1;
-                self.position = self.position + index + 1;
-                Some(self.position)
-            }
-            None => None,
-        }
+        iter_next!(self, memchr2(self.needle1, self.needle2, &self.haystack))
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (0, Some(self.haystack.len()))
     }
 }
 
 
 /// Like `memchr`, but searches for two bytes instead of one.
 pub fn memchr2(needle1: u8, needle2: u8, haystack: &[u8]) -> Option<usize> {
     fn slow(b1: u8, b2: u8, haystack: &[u8]) -> Option<usize> {
         haystack.iter().position(|&b| b == b1 || b == b2)
@@ -314,46 +333,54 @@ pub struct Memchr3<'a> {
     // The haystack to iterate over
     haystack: &'a [u8],
     // The index
     position: usize,
 }
 
 impl<'a> Memchr3<'a> {
     /// Create a new Memchr2 that's initalized to zero with a haystack
-    pub fn new(needle1: u8, needle2: u8, needle3: u8, haystack: &[u8]) -> Memchr3 {
+    pub fn new(
+        needle1: u8,
+        needle2: u8,
+        needle3: u8,
+        haystack: &[u8],
+    ) -> Memchr3 {
         Memchr3 {
             needle1: needle1,
             needle2: needle2,
             needle3: needle3,
             haystack: haystack,
             position: 0,
         }
     }
 }
 
 impl<'a> Iterator for Memchr3<'a> {
     type Item = usize;
 
     fn next(&mut self) -> Option<usize> {
-        let search_result = memchr3(self.needle1, self.needle2, self.needle3, &self.haystack);
-        match search_result {
-            Some(index) => {
-                // Move our internal position
-                self.haystack = self.haystack.split_at(index + 1).1;
-                self.position = self.position + index + 1;
-                Some(self.position)
-            }
-            None => None,
-        }
+        iter_next!(
+            self,
+            memchr3(self.needle1, self.needle2, self.needle3, &self.haystack)
+        )
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (0, Some(self.haystack.len()))
     }
 }
 
 /// Like `memchr`, but searches for three bytes instead of one.
-pub fn memchr3(needle1: u8, needle2: u8, needle3: u8, haystack: &[u8]) -> Option<usize> {
+pub fn memchr3(
+    needle1: u8,
+    needle2: u8,
+    needle3: u8,
+    haystack: &[u8],
+) -> Option<usize> {
     fn slow(b1: u8, b2: u8, b3: u8, haystack: &[u8]) -> Option<usize> {
         haystack.iter().position(|&b| b == b1 || b == b2 || b == b3)
     }
 
     let len = haystack.len();
     let ptr = haystack.as_ptr();
     let align = (ptr as usize) & (USIZE_BYTES - 1);
     let mut i = 0;
@@ -387,35 +414,39 @@ pub fn memchr3(needle1: u8, needle2: u8,
 #[cfg(any(test, not(feature = "libc"), all(not(target_os = "linux"),
           any(target_pointer_width = "32", target_pointer_width = "64"))))]
 mod fallback {
     #[cfg(feature = "use_std")]
     use std::cmp;
     #[cfg(not(feature = "use_std"))]
     use core::cmp;
 
-    use super::{LO_U64, HI_U64, LO_USIZE, HI_USIZE, USIZE_BYTES, contains_zero_byte, repeat_byte};
+    use super::{
+        LO_U64, HI_U64, LO_USIZE, HI_USIZE, USIZE_BYTES,
+        contains_zero_byte, repeat_byte,
+    };
 
     /// Return the first index matching the byte `a` in `text`.
     pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
         // Scan for a single byte value by reading two `usize` words at a time.
         //
         // Split `text` in three parts
-        // - unaligned inital part, before the first word aligned address in text
+        // - unaligned inital part, before first word aligned address in text
         // - body, scan by 2 words at a time
         // - the last remaining part, < 2 word size
         let len = text.len();
         let ptr = text.as_ptr();
 
         // search up to an aligned boundary
         let align = (ptr as usize) & (USIZE_BYTES - 1);
         let mut offset;
         if align > 0 {
             offset = cmp::min(USIZE_BYTES - align, len);
-            if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
+            let pos = text[..offset].iter().position(|elt| *elt == x);
+            if let Some(index) = pos {
                 return Some(index);
             }
         } else {
             offset = 0;
         }
 
         // search the body of the text
         let repeated_x = repeat_byte(x);
@@ -453,17 +484,18 @@ mod fallback {
         let len = text.len();
         let ptr = text.as_ptr();
 
         // search to an aligned boundary
         let end_align = (ptr as usize + len) & (USIZE_BYTES - 1);
         let mut offset;
         if end_align > 0 {
             offset = if end_align >= len { 0 } else { len - end_align };
-            if let Some(index) = text[offset..].iter().rposition(|elt| *elt == x) {
+            let pos = text[offset..].iter().rposition(|elt| *elt == x);
+            if let Some(index) = pos {
                 return Some(offset + index);
             }
         } else {
             offset = len;
         }
 
         // search the body of the text
         let repeated_x = repeat_byte(x);
@@ -486,26 +518,26 @@ mod fallback {
 
         // find the byte before the point the body loop stopped
         text[..offset].iter().rposition(|elt| *elt == x)
     }
 }
 
 #[cfg(test)]
 mod tests {
-    extern crate quickcheck;
     use std::prelude::v1::*;
+    use quickcheck;
 
     use super::{memchr, memrchr, memchr2, memchr3, Memchr, Memchr2, Memchr3};
     // Use a macro to test both native and fallback impls on all configurations
     macro_rules! memchr_tests {
         ($mod_name:ident, $memchr:path, $memrchr:path) => {
             mod $mod_name {
             use std::prelude::v1::*;
-            use super::quickcheck;
+            use quickcheck;
             #[test]
             fn matches_one() {
                 assert_eq!(Some(0), $memchr(b'a', b"a"));
             }
 
             #[test]
             fn matches_begin() {
                 assert_eq!(Some(0), $memchr(b'a', b"aaaa"));
@@ -594,17 +626,18 @@ mod tests {
                     let uoffset = (offset & 0xF) as usize;
                     let data = if uoffset <= v.len() {
                         &v[uoffset..]
                     } else {
                         &v[..]
                     };
                     for byte in 0..256u32 {
                         let byte = byte as u8;
-                        if $memchr(byte, &data) != data.iter().position(|elt| *elt == byte) {
+                        let pos = data.iter().position(|elt| *elt == byte);
+                        if $memchr(byte, &data) != pos {
                             return false;
                         }
                     }
                     true
                 }
                 quickcheck::quickcheck(prop as fn(Vec<u8>, u8) -> bool);
             }
 
@@ -615,17 +648,18 @@ mod tests {
                     let uoffset = (offset & 0xF) as usize;
                     let data = if uoffset <= v.len() {
                         &v[uoffset..]
                     } else {
                         &v[..]
                     };
                     for byte in 0..256u32 {
                         let byte = byte as u8;
-                        if $memrchr(byte, &data) != data.iter().rposition(|elt| *elt == byte) {
+                        let pos = data.iter().rposition(|elt| *elt == byte);
+                        if $memrchr(byte, &data) != pos {
                             return false;
                         }
                     }
                     true
                 }
                 quickcheck::quickcheck(prop as fn(Vec<u8>, u8) -> bool);
             }
             }
@@ -729,88 +763,137 @@ mod tests {
         assert_eq!(None, memchr3(b'c', b'b', b'a', b""));
     }
 
     #[test]
     fn memchr3_no_match() {
         assert_eq!(None, memchr3(b'a', b'b', b'c', b"xyz"));
     }
 
+    // return an iterator of the 0-based indices of haystack that match the
+    // needle
+    fn positions1<'a>(needle: u8, haystack: &'a [u8])
+        -> Box<DoubleEndedIterator<Item=usize> + 'a>
+    {
+        Box::new(haystack.iter()
+                         .enumerate()
+                         .filter(move |&(_, &elt)| elt == needle)
+                         .map(|t| t.0))
+    }
+
+    fn positions2<'a>(needle1: u8, needle2: u8, haystack: &'a [u8])
+        -> Box<DoubleEndedIterator<Item=usize> + 'a>
+    {
+        Box::new(haystack
+            .iter()
+            .enumerate()
+            .filter(move |&(_, &elt)| elt == needle1 || elt == needle2)
+            .map(|t| t.0))
+    }
+
+    fn positions3<'a>(
+        needle1: u8,
+        needle2: u8,
+        needle3: u8,
+        haystack: &'a [u8],
+    ) -> Box<DoubleEndedIterator<Item=usize> + 'a> {
+        Box::new(haystack
+            .iter()
+            .enumerate()
+            .filter(move |&(_, &elt)| {
+                elt == needle1 || elt == needle2 || elt == needle3
+            })
+            .map(|t| t.0))
+    }
+
     #[test]
     fn memchr_iter() {
         let haystack = b"aaaabaaaab";
         let mut memchr_iter = Memchr::new(b'b', haystack);
         let first = memchr_iter.next();
         let second = memchr_iter.next();
         let third = memchr_iter.next();
-        assert_eq!(Some(5), first);
-        assert_eq!(Some(10), second);
-        assert_eq!(None, third);
+
+        let mut answer_iter = positions1(b'b', haystack);
+        assert_eq!(answer_iter.next(), first);
+        assert_eq!(answer_iter.next(), second);
+        assert_eq!(answer_iter.next(), third);
     }
 
     #[test]
     fn memchr2_iter() {
-        let haystack = b"ab";
+        let haystack = b"axxb";
         let mut memchr_iter = Memchr2::new(b'a', b'b', haystack);
         let first = memchr_iter.next();
         let second = memchr_iter.next();
         let third = memchr_iter.next();
-        assert_eq!(Some(1), first);
-        assert_eq!(Some(2), second);
-        assert_eq!(None, third);
+
+        let mut answer_iter = positions2(b'a', b'b', haystack);
+        assert_eq!(answer_iter.next(), first);
+        assert_eq!(answer_iter.next(), second);
+        assert_eq!(answer_iter.next(), third);
     }
 
     #[test]
     fn memchr3_iter() {
-        let haystack = b"abc";
+        let haystack = b"axxbc";
         let mut memchr_iter = Memchr3::new(b'a', b'b', b'c', haystack);
         let first = memchr_iter.next();
         let second = memchr_iter.next();
         let third = memchr_iter.next();
         let fourth = memchr_iter.next();
-        assert_eq!(Some(1), first);
-        assert_eq!(Some(2), second);
-        assert_eq!(Some(3), third);
-        assert_eq!(None, fourth);
+
+        let mut answer_iter = positions3(b'a', b'b', b'c', haystack);
+        assert_eq!(answer_iter.next(), first);
+        assert_eq!(answer_iter.next(), second);
+        assert_eq!(answer_iter.next(), third);
+        assert_eq!(answer_iter.next(), fourth);
     }
 
     #[test]
     fn memchr_reverse_iter() {
         let haystack = b"aaaabaaaabaaaab";
         let mut memchr_iter = Memchr::new(b'b', haystack);
         let first = memchr_iter.next();
         let second = memchr_iter.next_back();
         let third = memchr_iter.next();
         let fourth = memchr_iter.next_back();
 
-        assert_eq!(Some(5), first);
-        assert_eq!(Some(15), second);
-        assert_eq!(Some(10), third);
-        assert_eq!(None, fourth);
+        let mut answer_iter = positions1(b'b', haystack);
+        assert_eq!(answer_iter.next(), first);
+        assert_eq!(answer_iter.next_back(), second);
+        assert_eq!(answer_iter.next(), third);
+        assert_eq!(answer_iter.next_back(), fourth);
     }
 
     #[test]
     fn memrchr_iter(){
         let haystack = b"aaaabaaaabaaaab";
         let mut memchr_iter = Memchr::new(b'b', haystack);
         let first = memchr_iter.next_back();
         let second = memchr_iter.next_back();
         let third = memchr_iter.next_back();
         let fourth = memchr_iter.next_back();
 
-        assert_eq!(Some(15), first);
-        assert_eq!(Some(10), second);
-        assert_eq!(Some(5), third);
-        assert_eq!(None, fourth);
+        let mut answer_iter = positions1(b'b', haystack);
+        assert_eq!(answer_iter.next_back(), first);
+        assert_eq!(answer_iter.next_back(), second);
+        assert_eq!(answer_iter.next_back(), third);
+        assert_eq!(answer_iter.next_back(), fourth);
 
     }
 
     #[test]
     fn qc_never_fail_memchr3() {
-        fn prop(needle1: u8, needle2: u8, needle3: u8, haystack: Vec<u8>) -> bool {
+        fn prop(
+            needle1: u8,
+            needle2: u8,
+            needle3: u8,
+            haystack: Vec<u8>,
+        ) -> bool {
             memchr3(needle1, needle2, needle3, &haystack);
             true
         }
         quickcheck::quickcheck(prop as fn(u8, u8, u8, Vec<u8>) -> bool);
     }
 
     #[test]
     fn qc_correct_memchr() {
@@ -819,17 +902,18 @@ mod tests {
             let uoffset = (offset & 0xF) as usize;
             let data = if uoffset <= v.len() {
                 &v[uoffset..]
             } else {
                 &v[..]
             };
             for byte in 0..256u32 {
                 let byte = byte as u8;
-                if memchr(byte, &data) != data.iter().position(|elt| *elt == byte) {
+                let pos = data.iter().position(|elt| *elt == byte);
+                if memchr(byte, &data) != pos {
                     return false;
                 }
             }
             true
         }
         quickcheck::quickcheck(prop as fn(Vec<u8>, u8) -> bool);
     }
 
@@ -840,17 +924,18 @@ mod tests {
             let uoffset = (offset & 0xF) as usize;
             let data = if uoffset <= v.len() {
                 &v[uoffset..]
             } else {
                 &v[..]
             };
             for byte in 0..256u32 {
                 let byte = byte as u8;
-                if memrchr(byte, &data) != data.iter().rposition(|elt| *elt == byte) {
+                let pos = data.iter().rposition(|elt| *elt == byte);
+                if memrchr(byte, &data) != pos {
                     return false;
                 }
             }
             true
         }
         quickcheck::quickcheck(prop as fn(Vec<u8>, u8) -> bool);
     }
 
@@ -862,20 +947,116 @@ mod tests {
             let data = if uoffset <= v.len() {
                 &v[uoffset..]
             } else {
                 &v[..]
             };
             for b1 in 0..256u32 {
                 for b2 in 0..256u32 {
                     let (b1, b2) = (b1 as u8, b2 as u8);
-                    let expected = data.iter().position(|&b| b == b1 || b == b2);
+                    let expected = data
+                        .iter()
+                        .position(|&b| b == b1 || b == b2);
                     let got = memchr2(b1, b2, &data);
                     if expected != got {
                         return false;
                     }
                 }
             }
             true
         }
         quickcheck::quickcheck(prop as fn(Vec<u8>, u8) -> bool);
     }
+
+    // take items from a DEI, taking front for each true and back for each
+    // false. Return a vector with the concatenation of the fronts and the
+    // reverse of the backs.
+    fn double_ended_take<I, J>(mut iter: I, take_side: J) -> Vec<I::Item>
+        where I: DoubleEndedIterator,
+              J: Iterator<Item=bool>,
+    {
+        let mut found_front = Vec::new();
+        let mut found_back = Vec::new();
+
+        for take_front in take_side {
+            if take_front {
+                if let Some(pos) = iter.next() {
+                    found_front.push(pos);
+                } else {
+                    break;
+                }
+            } else {
+                if let Some(pos) = iter.next_back() {
+                    found_back.push(pos);
+                } else {
+                    break;
+                }
+            };
+        }
+
+        let mut all_found = found_front;
+        all_found.extend(found_back.into_iter().rev());
+        all_found
+    }
+
+
+    quickcheck! {
+        fn qc_memchr_double_ended_iter(needle: u8, data: Vec<u8>,
+                                       take_side: Vec<bool>) -> bool
+        {
+            // make nonempty
+            let mut take_side = take_side;
+            if take_side.is_empty() { take_side.push(true) };
+
+            let iter = Memchr::new(needle, &data);
+            let all_found = double_ended_take(
+                iter, take_side.iter().cycle().cloned());
+
+            all_found.iter().cloned().eq(positions1(needle, &data))
+        }
+
+        fn qc_memchr1_iter(data: Vec<u8>) -> bool {
+            let needle = 0;
+            let answer = positions1(needle, &data);
+            answer.eq(Memchr::new(needle, &data))
+        }
+
+        fn qc_memchr1_rev_iter(data: Vec<u8>) -> bool {
+            let needle = 0;
+            let answer = positions1(needle, &data);
+            answer.rev().eq(Memchr::new(needle, &data).rev())
+        }
+
+        fn qc_memchr2_iter(data: Vec<u8>) -> bool {
+            let needle1 = 0;
+            let needle2 = 1;
+            let answer = positions2(needle1, needle2, &data);
+            answer.eq(Memchr2::new(needle1, needle2, &data))
+        }
+
+        fn qc_memchr3_iter(data: Vec<u8>) -> bool {
+            let needle1 = 0;
+            let needle2 = 1;
+            let needle3 = 2;
+            let answer = positions3(needle1, needle2, needle3, &data);
+            answer.eq(Memchr3::new(needle1, needle2, needle3, &data))
+        }
+
+        fn qc_memchr1_iter_size_hint(data: Vec<u8>) -> bool {
+            // test that the size hint is within reasonable bounds
+            let needle = 0;
+            let mut iter = Memchr::new(needle, &data);
+            let mut real_count = data
+                .iter()
+                .filter(|&&elt| elt == needle)
+                .count();
+
+            while let Some(index) = iter.next() {
+                real_count -= 1;
+                let (lower, upper) = iter.size_hint();
+                assert!(lower <= real_count);
+                assert!(upper.unwrap() >= real_count);
+                assert!(upper.unwrap() <= data.len() - index);
+            }
+            true
+        }
+    }
 }
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/LICENSE-APACHE
@@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2014 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
new file mode 100755
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/ci/run-shootout-test
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+set -e
+
+cargo build --example shootout-regex-dna
+diff \
+  ./examples/regexdna-output.txt \
+  <(./target/debug/examples/shootout-regex-dna < ./examples/regexdna-input.txt)
+
+cargo build --example shootout-regex-dna-single
+diff \
+  ./examples/regexdna-output.txt \
+  <(./target/debug/examples/shootout-regex-dna-single < ./examples/regexdna-input.txt)
+
+cargo build --example shootout-regex-dna-cheat
+diff \
+  ./examples/regexdna-output.txt \
+  <(./target/debug/examples/shootout-regex-dna-cheat < ./examples/regexdna-input.txt)
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/examples/regexdna-input.txt
@@ -0,0 +1,1671 @@
+>ONE Homo sapiens alu
+GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
+TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
+AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
+GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
+CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
+GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
+GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
+TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
+AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
+GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
+AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
+AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
+GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
+CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
+AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
+TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
+TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
+GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
+TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
+CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
+CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
+TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
+CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
+AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
+GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
+TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
+TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
+GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
+GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
+ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
+TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
+CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
+CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
+GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC
+CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT
+GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC
+GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA
+GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA
+GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA
+GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG
+AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT
+CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA
+GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA
+AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC
+GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT
+ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG
+GAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATC
+GCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGC
+GGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGG
+TCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAA
+AAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAG
+GAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACT
+CCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCC
+TGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAG
+ACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGC
+GTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGA
+ACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGA
+CAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCA
+CTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCA
+ACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCG
+CCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGG
+AGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTC
+CGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCG
+AGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACC
+CCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAG
+CTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAG
+CCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGG
+CCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATC
+ACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAA
+AAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGC
+TGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCC
+ACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGG
+CTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGG
+AGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATT
+AGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAA
+TCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGC
+CTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAA
+TCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAG
+CCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGT
+GGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCG
+GGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAG
+CGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG
+GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATG
+GTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGT
+AATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTT
+GCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCT
+CAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCG
+GGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTC
+TCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACT
+CGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAG
+ATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGG
+CGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTG
+AGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATA
+CAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGG
+CAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGC
+ACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCAC
+GCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTC
+GAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCG
+GGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCT
+TGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGG
+CGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCA
+GCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGG
+CCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGC
+GCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGG
+CGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGA
+CTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGG
+CCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAA
+ACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCC
+CAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGT
+GAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAA
+AGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGG
+ATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTAC
+TAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGA
+GGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGC
+GCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGG
+TGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTC
+AGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAA
+ATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGA
+GAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC
+AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTG
+TAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGAC
+CAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGT
+GGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC
+CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACA
+GAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACT
+TTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAAC
+ATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCC
+TGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAG
+GTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCG
+TCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAG
+GCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCC
+GTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCT
+ACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCC
+GAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCC
+GGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCAC
+CTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAA
+ATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTG
+AGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCAC
+TGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCT
+CACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAG
+TTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAG
+CCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATC
+GCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCT
+GGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATC
+CCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCC
+TGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGG
+CGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG
+AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCG
+AGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGG
+AGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGT
+GAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAA
+TCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGC
+AGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCA
+AAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGG
+CGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTC
+TACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCG
+GGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGAT
+CGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCG
+CGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAG
+GTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACA
+AAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCA
+GGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCAC
+TCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGC
+CTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA
+GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGG
+CGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTG
+AACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCG
+ACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGC
+ACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCC
+AACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGC
+GCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCG
+GAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACT
+CCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCC
+GAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAAC
+CCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA
+GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGA
+GCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAG
+GCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGAT
+CACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTA
+AAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGG
+CTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGC
+CACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTG
+GCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAG
+GAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAAT
+TAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGA
+ATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAG
+CCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTA
+ATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCA
+GCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGG
+TGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCC
+GGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGA
+GCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTT
+GGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACAT
+GGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTG
+TAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGT
+TGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTC
+TCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGC
+GGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGT
+CTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTAC
+TCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGA
+GATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGG
+GCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCT
+GAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT
+ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAG
+GCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG
+CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCA
+CGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTT
+CGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCC
+GGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGC
+TTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGG
+GCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCC
+AGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTG
+GCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCG
+CGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAG
+GCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAG
+ACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAG
+GCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGA
+AACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATC
+CCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAG
+TGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAA
+AAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCG
+GATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTA
+CTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGG
+AGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCG
+CGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCG
+GTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGT
+CAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAA
+AATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGG
+AGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTC
+CAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCT
+GTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA
+CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCG
+TGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAA
+CCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGAC
+AGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCAC
+TTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAA
+CATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGC
+CTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGA
+GGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCC
+GTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGA
+GGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCC
+CGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGC
+TACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGC
+CGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGC
+CGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCA
+CCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA
+AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCT
+GAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCA
+CTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGC
+TCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGA
+GTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTA
+GCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAAT
+CGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCC
+TGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAAT
+CCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGC
+CTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTG
+GCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGG
+GAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGC
+GAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG
+GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGG
+TGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTA
+ATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTG
+CAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTC
+AAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGG
+GCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCT
+CTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTC
+GGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGA
+TCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGC
+GCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGA
+GGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATAC
+AAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGC
+AGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCA
+CTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACG
+CCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCG
+AGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGG
+GCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTT
+GAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGC
+GACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAG
+CACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGC
+CAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCG
+CGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGC
+GGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGAC
+TCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGC
+CGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAA
+CCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCC
+AGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTG
+AGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA
+GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGA
+TCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACT
+AAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAG
+GCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCG
+CCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGT
+GGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCA
+GGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAA
+TTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAG
+AATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCA
+GCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGT
+AATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACC
+AGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTG
+GTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACC
+CGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAG
+AGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTT
+TGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACA
+TGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCT
+GTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGG
+TTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGT
+CTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGG
+CGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCG
+TCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTA
+CTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCG
+AGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCG
+GGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACC
+TGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAA
+TACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGA
+GGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACT
+GCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTC
+ACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGT
+TCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGC
+CGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCG
+CTTGAACCCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTG
+GGCGACAGAGCGAGACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCC
+CAGCACTTTGGGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCT
+GGCCAACATGGTGAAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGC
+GCGCGCCTGTAATCCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGA
+GGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGA
+GACTCCGTCTCAAAAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGA
+GGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTG
+AAACCCCGTCTCTACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT
+CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGGAGGCGGAGGTTGCA
+GTGAGCCGAGATCGCGCCACTGCACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAA
+AAAGGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGC
+GGATCACCTGAGGTCAGGAGTTCGAGACCAGCCTGGCCAACATGGTGAAACCCCGTCTCT
+ACTAAAAATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCAGCTACTCGG
+GAGGCTGAGGCAGGAGAATC
+>TWO IUB ambiguity codes
+cttBtatcatatgctaKggNcataaaSatgtaaaDcDRtBggDtctttataattcBgtcg
+tactDtDagcctatttSVHtHttKtgtHMaSattgWaHKHttttagacatWatgtRgaaa
+NtactMcSMtYtcMgRtacttctWBacgaaatatagScDtttgaagacacatagtVgYgt
+cattHWtMMWcStgttaggKtSgaYaaccWStcgBttgcgaMttBYatcWtgacaYcaga
+gtaBDtRacttttcWatMttDBcatWtatcttactaBgaYtcttgttttttttYaaScYa
+HgtgttNtSatcMtcVaaaStccRcctDaataataStcYtRDSaMtDttgttSagtRRca
+tttHatSttMtWgtcgtatSSagactYaaattcaMtWatttaSgYttaRgKaRtccactt
+tattRggaMcDaWaWagttttgacatgttctacaaaRaatataataaMttcgDacgaSSt
+acaStYRctVaNMtMgtaggcKatcttttattaaaaagVWaHKYagtttttatttaacct
+tacgtVtcVaattVMBcttaMtttaStgacttagattWWacVtgWYagWVRctDattBYt
+gtttaagaagattattgacVatMaacattVctgtBSgaVtgWWggaKHaatKWcBScSWa
+accRVacacaaactaccScattRatatKVtactatatttHttaagtttSKtRtacaaagt
+RDttcaaaaWgcacatWaDgtDKacgaacaattacaRNWaatHtttStgttattaaMtgt
+tgDcgtMgcatBtgcttcgcgaDWgagctgcgaggggVtaaScNatttacttaatgacag
+cccccacatYScaMgtaggtYaNgttctgaMaacNaMRaacaaacaKctacatagYWctg
+ttWaaataaaataRattagHacacaagcgKatacBttRttaagtatttccgatctHSaat
+actcNttMaagtattMtgRtgaMgcataatHcMtaBSaRattagttgatHtMttaaKagg
+YtaaBataSaVatactWtataVWgKgttaaaacagtgcgRatatacatVtHRtVYataSa
+KtWaStVcNKHKttactatccctcatgWHatWaRcttactaggatctataDtDHBttata
+aaaHgtacVtagaYttYaKcctattcttcttaataNDaaggaaaDYgcggctaaWSctBa
+aNtgctggMBaKctaMVKagBaactaWaDaMaccYVtNtaHtVWtKgRtcaaNtYaNacg
+gtttNattgVtttctgtBaWgtaattcaagtcaVWtactNggattctttaYtaaagccgc
+tcttagHVggaYtgtNcDaVagctctctKgacgtatagYcctRYHDtgBattDaaDgccK
+tcHaaStttMcctagtattgcRgWBaVatHaaaataYtgtttagMDMRtaataaggatMt
+ttctWgtNtgtgaaaaMaatatRtttMtDgHHtgtcattttcWattRSHcVagaagtacg
+ggtaKVattKYagactNaatgtttgKMMgYNtcccgSKttctaStatatNVataYHgtNa
+BKRgNacaactgatttcctttaNcgatttctctataScaHtataRagtcRVttacDSDtt
+aRtSatacHgtSKacYagttMHtWataggatgactNtatSaNctataVtttRNKtgRacc
+tttYtatgttactttttcctttaaacatacaHactMacacggtWataMtBVacRaSaatc
+cgtaBVttccagccBcttaRKtgtgcctttttRtgtcagcRttKtaaacKtaaatctcac
+aattgcaNtSBaaccgggttattaaBcKatDagttactcttcattVtttHaaggctKKga
+tacatcBggScagtVcacattttgaHaDSgHatRMaHWggtatatRgccDttcgtatcga
+aacaHtaagttaRatgaVacttagattVKtaaYttaaatcaNatccRttRRaMScNaaaD
+gttVHWgtcHaaHgacVaWtgttScactaagSgttatcttagggDtaccagWattWtRtg
+ttHWHacgattBtgVcaYatcggttgagKcWtKKcaVtgaYgWctgYggVctgtHgaNcV
+taBtWaaYatcDRaaRtSctgaHaYRttagatMatgcatttNattaDttaattgttctaa
+ccctcccctagaWBtttHtBccttagaVaatMcBHagaVcWcagBVttcBtaYMccagat
+gaaaaHctctaacgttagNWRtcggattNatcRaNHttcagtKttttgWatWttcSaNgg
+gaWtactKKMaacatKatacNattgctWtatctaVgagctatgtRaHtYcWcttagccaa
+tYttWttaWSSttaHcaaaaagVacVgtaVaRMgattaVcDactttcHHggHRtgNcctt
+tYatcatKgctcctctatVcaaaaKaaaagtatatctgMtWtaaaacaStttMtcgactt
+taSatcgDataaactaaacaagtaaVctaggaSccaatMVtaaSKNVattttgHccatca
+cBVctgcaVatVttRtactgtVcaattHgtaaattaaattttYtatattaaRSgYtgBag
+aHSBDgtagcacRHtYcBgtcacttacactaYcgctWtattgSHtSatcataaatataHt
+cgtYaaMNgBaatttaRgaMaatatttBtttaaaHHKaatctgatWatYaacttMctctt
+ttVctagctDaaagtaVaKaKRtaacBgtatccaaccactHHaagaagaaggaNaaatBW
+attccgStaMSaMatBttgcatgRSacgttVVtaaDMtcSgVatWcaSatcttttVatag
+ttactttacgatcaccNtaDVgSRcgVcgtgaacgaNtaNatatagtHtMgtHcMtagaa
+attBgtataRaaaacaYKgtRccYtatgaagtaataKgtaaMttgaaRVatgcagaKStc
+tHNaaatctBBtcttaYaBWHgtVtgacagcaRcataWctcaBcYacYgatDgtDHccta
+aagacYRcaggattHaYgtKtaatgcVcaataMYacccatatcacgWDBtgaatcBaata
+cKcttRaRtgatgaBDacggtaattaaYtataStgVHDtDctgactcaaatKtacaatgc
+gYatBtRaDatHaactgtttatatDttttaaaKVccYcaaccNcBcgHaaVcattHctcg
+attaaatBtatgcaaaaatYMctSactHatacgaWacattacMBgHttcgaatVaaaaca
+BatatVtctgaaaaWtctRacgBMaatSgRgtgtcgactatcRtattaScctaStagKga
+DcWgtYtDDWKRgRtHatRtggtcgaHgggcgtattaMgtcagccaBggWVcWctVaaat
+tcgNaatcKWagcNaHtgaaaSaaagctcYctttRVtaaaatNtataaccKtaRgtttaM
+tgtKaBtRtNaggaSattHatatWactcagtgtactaKctatttgRYYatKatgtccgtR
+tttttatttaatatVgKtttgtatgtNtataRatWYNgtRtHggtaaKaYtKSDcatcKg
+taaYatcSRctaVtSMWtVtRWHatttagataDtVggacagVcgKWagBgatBtaaagNc
+aRtagcataBggactaacacRctKgttaatcctHgDgttKHHagttgttaatgHBtatHc
+DaagtVaBaRccctVgtgDtacRHSctaagagcggWYaBtSaKtHBtaaactYacgNKBa
+VYgtaacttagtVttcttaatgtBtatMtMtttaattaatBWccatRtttcatagVgMMt
+agctStKctaMactacDNYgKYHgaWcgaHgagattacVgtttgtRaSttaWaVgataat
+gtgtYtaStattattMtNgWtgttKaccaatagNYttattcgtatHcWtctaaaNVYKKt
+tWtggcDtcgaagtNcagatacgcattaagaccWctgcagcttggNSgaNcHggatgtVt
+catNtRaaBNcHVagagaaBtaaSggDaatWaatRccaVgggStctDaacataKttKatt
+tggacYtattcSatcttagcaatgaVBMcttDattctYaaRgatgcattttNgVHtKcYR
+aatRKctgtaaacRatVSagctgtWacBtKVatctgttttKcgtctaaDcaagtatcSat
+aWVgcKKataWaYttcccSaatgaaaacccWgcRctWatNcWtBRttYaattataaNgac
+acaatagtttVNtataNaYtaatRaVWKtBatKagtaatataDaNaaaaataMtaagaaS
+tccBcaatNgaataWtHaNactgtcDtRcYaaVaaaaaDgtttRatctatgHtgttKtga
+aNSgatactttcgagWaaatctKaaDaRttgtggKKagcDgataaattgSaacWaVtaNM
+acKtcaDaaatttctRaaVcagNacaScRBatatctRatcctaNatWgRtcDcSaWSgtt
+RtKaRtMtKaatgttBHcYaaBtgatSgaSWaScMgatNtctcctatttctYtatMatMt
+RRtSaattaMtagaaaaStcgVgRttSVaScagtgDtttatcatcatacRcatatDctta
+tcatVRtttataaHtattcYtcaaaatactttgVctagtaaYttagatagtSYacKaaac
+gaaKtaaatagataatSatatgaaatSgKtaatVtttatcctgKHaatHattagaaccgt
+YaaHactRcggSBNgtgctaaBagBttgtRttaaattYtVRaaaattgtaatVatttctc
+ttcatgBcVgtgKgaHaaatattYatagWacNctgaaMcgaattStagWaSgtaaKagtt
+ttaagaDgatKcctgtaHtcatggKttVDatcaaggtYcgccagNgtgcVttttagagat
+gctaccacggggtNttttaSHaNtatNcctcatSaaVgtactgBHtagcaYggYVKNgta
+KBcRttgaWatgaatVtagtcgattYgatgtaatttacDacSctgctaaaStttaWMagD
+aaatcaVYctccgggcgaVtaaWtStaKMgDtttcaaMtVgBaatccagNaaatcYRMBg
+gttWtaaScKttMWtYataRaDBMaDataatHBcacDaaKDactaMgagttDattaHatH
+taYatDtattDcRNStgaatattSDttggtattaaNSYacttcDMgYgBatWtaMagact
+VWttctttgYMaYaacRgHWaattgRtaagcattctMKVStatactacHVtatgatcBtV
+NataaBttYtSttacKgggWgYDtgaVtYgatDaacattYgatggtRDaVDttNactaSa
+MtgNttaacaaSaBStcDctaccacagacgcaHatMataWKYtaYattMcaMtgSttDag
+cHacgatcaHttYaKHggagttccgatYcaatgatRaVRcaagatcagtatggScctata
+ttaNtagcgacgtgKaaWaactSgagtMYtcttccaKtStaacggMtaagNttattatcg
+tctaRcactctctDtaacWYtgaYaSaagaWtNtatttRacatgNaatgttattgWDDcN
+aHcctgaaHacSgaataaRaataMHttatMtgaSDSKatatHHaNtacagtccaYatWtc
+actaactatKDacSaStcggataHgYatagKtaatKagStaNgtatactatggRHacttg
+tattatgtDVagDVaRctacMYattDgtttYgtctatggtKaRSttRccRtaaccttaga
+gRatagSaaMaacgcaNtatgaaatcaRaagataatagatactcHaaYKBctccaagaRa
+BaStNagataggcgaatgaMtagaatgtcaKttaaatgtaWcaBttaatRcggtgNcaca
+aKtttScRtWtgcatagtttWYaagBttDKgcctttatMggNttattBtctagVtacata
+aaYttacacaaRttcYtWttgHcaYYtaMgBaBatctNgcDtNttacgacDcgataaSat
+YaSttWtcctatKaatgcagHaVaacgctgcatDtgttaSataaaaYSNttatagtaNYt
+aDaaaNtggggacttaBggcHgcgtNtaaMcctggtVtaKcgNacNtatVaSWctWtgaW
+cggNaBagctctgaYataMgaagatBSttctatacttgtgtKtaattttRagtDtacata
+tatatgatNHVgBMtKtaKaNttDHaagatactHaccHtcatttaaagttVaMcNgHata
+tKtaNtgYMccttatcaaNagctggacStttcNtggcaVtattactHaSttatgNMVatt
+MMDtMactattattgWMSgtHBttStStgatatRaDaagattttctatMtaaaaaggtac
+taaVttaSacNaatactgMttgacHaHRttgMacaaaatagttaatatWKRgacDgaRta
+tatttattatcYttaWtgtBRtWatgHaaattHataagtVaDtWaVaWtgStcgtMSgaS
+RgMKtaaataVacataatgtaSaatttagtcgaaHtaKaatgcacatcggRaggSKctDc
+agtcSttcccStYtccRtctctYtcaaKcgagtaMttttcRaYDttgttatctaatcata
+NctctgctatcaMatactataggDaHaaSttMtaDtcNatataattctMcStaaBYtaNa
+gatgtaatHagagSttgWHVcttatKaYgDctcttggtgttMcRaVgSgggtagacaata
+aDtaattSaDaNaHaBctattgNtaccaaRgaVtKNtaaYggHtaKKgHcatctWtctDt
+ttctttggSDtNtaStagttataaacaattgcaBaBWggHgcaaaBtYgctaatgaaatW
+cDcttHtcMtWWattBHatcatcaaatctKMagtDNatttWaBtHaaaNgMttaaStagt
+tctctaatDtcRVaYttgttMtRtgtcaSaaYVgSWDRtaatagctcagDgcWWaaaBaa
+RaBctgVgggNgDWStNaNBKcBctaaKtttDcttBaaggBttgaccatgaaaNgttttt
+tttatctatgttataccaaDRaaSagtaVtDtcaWatBtacattaWacttaSgtattggD
+gKaaatScaattacgWcagKHaaccaYcRcaRttaDttRtttHgaHVggcttBaRgtccc
+tDatKaVtKtcRgYtaKttacgtatBtStaagcaattaagaRgBagSaattccSWYttta
+ttVaataNctgHgttaaNBgcVYgtRtcccagWNaaaacaDNaBcaaaaRVtcWMgBagM
+tttattacgDacttBtactatcattggaaatVccggttRttcatagttVYcatYaSHaHc
+ttaaagcNWaHataaaRWtctVtRYtagHtaaaYMataHYtNBctNtKaatattStgaMc
+BtRgctaKtgcScSttDgYatcVtggaaKtaagatWccHccgKYctaNNctacaWctttt
+gcRtgtVcgaKttcMRHgctaHtVaataaDtatgKDcttatBtDttggNtacttttMtga
+acRattaaNagaactcaaaBBVtcDtcgaStaDctgaaaSgttMaDtcgttcaccaaaag
+gWtcKcgSMtcDtatgtttStaaBtatagDcatYatWtaaaBacaKgcaDatgRggaaYc
+taRtccagattDaWtttggacBaVcHtHtaacDacYgtaatataMagaatgHMatcttat
+acgtatttttatattacHactgttataMgStYaattYaccaattgagtcaaattaYtgta
+tcatgMcaDcgggtcttDtKgcatgWRtataatatRacacNRBttcHtBgcRttgtgcgt
+catacMtttBctatctBaatcattMttMYgattaaVYatgDaatVagtattDacaacDMa
+tcMtHcccataagatgBggaccattVWtRtSacatgctcaaggggYtttDtaaNgNtaaB
+atggaatgtctRtaBgBtcNYatatNRtagaacMgagSaSDDSaDcctRagtVWSHtVSR
+ggaacaBVaccgtttaStagaacaMtactccagtttVctaaRaaHttNcttagcaattta
+ttaatRtaaaatctaacDaBttggSagagctacHtaaRWgattcaaBtctRtSHaNtgta
+cattVcaHaNaagtataccacaWtaRtaaVKgMYaWgttaKggKMtKcgWatcaDatYtK
+SttgtacgaccNctSaattcDcatcttcaaaDKttacHtggttHggRRaRcaWacaMtBW
+VHSHgaaMcKattgtaRWttScNattBBatYtaNRgcggaagacHSaattRtttcYgacc
+BRccMacccKgatgaacttcgDgHcaaaaaRtatatDtatYVtttttHgSHaSaatagct
+NYtaHYaVYttattNtttgaaaYtaKttWtctaNtgagaaaNctNDctaaHgttagDcRt
+tatagccBaacgcaRBtRctRtggtaMYYttWtgataatcgaataattattataVaaaaa
+ttacNRVYcaaMacNatRttcKatMctgaagactaattataaYgcKcaSYaatMNctcaa
+cgtgatttttBacNtgatDccaattattKWWcattttatatatgatBcDtaaaagttgaa
+VtaHtaHHtBtataRBgtgDtaataMttRtDgDcttattNtggtctatctaaBcatctaR
+atgNacWtaatgaagtcMNaacNgHttatactaWgcNtaStaRgttaaHacccgaYStac
+aaaatWggaYaWgaattattcMaactcBKaaaRVNcaNRDcYcgaBctKaacaaaaaSgc
+tccYBBHYaVagaatagaaaacagYtctVccaMtcgtttVatcaatttDRtgWctagtac
+RttMctgtDctttcKtWttttataaatgVttgBKtgtKWDaWagMtaaagaaattDVtag
+gttacatcatttatgtcgMHaVcttaBtVRtcgtaYgBRHatttHgaBcKaYWaatcNSc
+tagtaaaaatttacaatcactSWacgtaatgKttWattagttttNaggtctcaagtcact
+attcttctaagKggaataMgtttcataagataaaaatagattatDgcBVHWgaBKttDgc
+atRHaagcaYcRaattattatgtMatatattgHDtcaDtcaaaHctStattaatHaccga
+cNattgatatattttgtgtDtRatagSacaMtcRtcattcccgacacSattgttKaWatt
+NHcaacttccgtttSRtgtctgDcgctcaaMagVtBctBMcMcWtgtaacgactctcttR
+ggRKSttgYtYatDccagttDgaKccacgVatWcataVaaagaataMgtgataaKYaaat
+cHDaacgataYctRtcYatcgcaMgtNttaBttttgatttaRtStgcaacaaaataccVg
+aaDgtVgDcStctatatttattaaaaRKDatagaaagaKaaYYcaYSgKStctccSttac
+agtcNactttDVttagaaagMHttRaNcSaRaMgBttattggtttaRMggatggcKDgWR
+tNaataataWKKacttcKWaaagNaBttaBatMHtccattaacttccccYtcBcYRtaga
+ttaagctaaYBDttaNtgaaaccHcaRMtKtaaHMcNBttaNaNcVcgVttWNtDaBatg
+ataaVtcWKcttRggWatcattgaRagHgaattNtatttctctattaattaatgaDaaMa
+tacgttgggcHaYVaaNaDDttHtcaaHtcVVDgBVagcMacgtgttaaBRNtatRtcag
+taagaggtttaagacaVaaggttaWatctccgtVtaDtcDatttccVatgtacNtttccg
+tHttatKgScBatgtVgHtYcWagcaKtaMYaaHgtaattaSaHcgcagtWNaatNccNN
+YcacgVaagaRacttctcattcccRtgtgtaattagcSttaaStWaMtctNNcSMacatt
+ataaactaDgtatWgtagtttaagaaaattgtagtNagtcaataaatttgatMMYactaa
+tatcggBWDtVcYttcDHtVttatacYaRgaMaacaStaatcRttttVtagaDtcacWat
+ttWtgaaaagaaagNRacDtttStVatBaDNtaactatatcBSMcccaSttccggaMatg
+attaaWatKMaBaBatttgataNctgttKtVaagtcagScgaaaDggaWgtgttttKtWt
+atttHaatgtagttcactaaKMagttSYBtKtaYgaactcagagRtatagtVtatcaaaW
+YagcgNtaDagtacNSaaYDgatBgtcgataacYDtaaactacagWDcYKaagtttatta
+gcatcgagttKcatDaattgattatDtcagRtWSKtcgNtMaaaaacaMttKcaWcaaSV
+MaaaccagMVtaMaDtMaHaBgaacataBBVtaatVYaNSWcSgNtDNaaKacacBttta
+tKtgtttcaaHaMctcagtaacgtcgYtactDcgcctaNgagagcYgatattttaaattt
+ccattttacatttDaaRctattttWctttacgtDatYtttcagacgcaaVttagtaaKaa
+aRtgVtccataBggacttatttgtttaWNtgttVWtaWNVDaattgtatttBaagcBtaa
+BttaaVatcHcaVgacattccNggtcgacKttaaaRtagRtctWagaYggtgMtataatM
+tgaaRttattttgWcttNtDRRgMDKacagaaaaggaaaRStcccagtYccVattaNaaK
+StNWtgacaVtagaagcttSaaDtcacaacgDYacWDYtgtttKatcVtgcMaDaSKStV
+cgtagaaWaKaagtttcHaHgMgMtctataagBtKaaaKKcactggagRRttaagaBaaN
+atVVcgRcKSttDaactagtSttSattgttgaaRYatggttVttaataaHttccaagDtg
+atNWtaagHtgcYtaactRgcaatgMgtgtRaatRaNaacHKtagactactggaatttcg
+ccataacgMctRgatgttaccctaHgtgWaYcactcacYaattcttaBtgacttaaacct
+gYgaWatgBttcttVttcgttWttMcNYgtaaaatctYgMgaaattacNgaHgaacDVVM
+tttggtHtctaaRgtacagacgHtVtaBMNBgattagcttaRcttacaHcRctgttcaaD
+BggttKaacatgKtttYataVaNattccgMcgcgtagtRaVVaattaKaatggttRgaMc
+agtatcWBttNtHagctaatctagaaNaaacaYBctatcgcVctBtgcaaagDgttVtga
+HtactSNYtaaNccatgtgDacgaVtDcgKaRtacDcttgctaagggcagMDagggtBWR
+tttSgccttttttaacgtcHctaVtVDtagatcaNMaVtcVacatHctDWNaataRgcgt
+aVHaggtaaaaSgtttMtattDgBtctgatSgtRagagYtctSaKWaataMgattRKtaa
+catttYcgtaacacattRWtBtcggtaaatMtaaacBatttctKagtcDtttgcBtKYYB
+aKttctVttgttaDtgattttcttccacttgSaaacggaaaNDaattcYNNaWcgaaYat
+tttMgcBtcatRtgtaaagatgaWtgaccaYBHgaatagataVVtHtttVgYBtMctaMt
+cctgaDcYttgtccaaaRNtacagcMctKaaaggatttacatgtttaaWSaYaKttBtag
+DacactagctMtttNaKtctttcNcSattNacttggaacaatDagtattRtgSHaataat
+gccVgacccgatactatccctgtRctttgagaSgatcatatcgDcagWaaHSgctYYWta
+tHttggttctttatVattatcgactaagtgtagcatVgtgHMtttgtttcgttaKattcM
+atttgtttWcaaStNatgtHcaaaDtaagBaKBtRgaBgDtSagtatMtaacYaatYtVc
+KatgtgcaacVaaaatactKcRgtaYtgtNgBBNcKtcttaccttKgaRaYcaNKtactt
+tgagSBtgtRagaNgcaaaNcacagtVtttHWatgttaNatBgtttaatNgVtctgaata
+tcaRtattcttttttttRaaKcRStctcggDgKagattaMaaaKtcaHacttaataataK
+taRgDtKVBttttcgtKaggHHcatgttagHggttNctcgtatKKagVagRaaaggaaBt
+NatttVKcRttaHctaHtcaaatgtaggHccaBataNaNaggttgcWaatctgatYcaaa
+HaatWtaVgaaBttagtaagaKKtaaaKtRHatMaDBtBctagcatWtatttgWttVaaa
+ScMNattRactttgtYtttaaaagtaagtMtaMaSttMBtatgaBtttaKtgaatgagYg
+tNNacMtcNRacMMHcttWtgtRtctttaacaacattattcYaMagBaacYttMatcttK
+cRMtgMNccattaRttNatHaHNaSaaHMacacaVaatacaKaSttHatattMtVatWga
+ttttttaYctttKttHgScWaacgHtttcaVaaMgaacagNatcgttaacaaaaagtaca
+HBNaattgttKtcttVttaaBtctgctacgBgcWtttcaggacacatMgacatcccagcg
+gMgaVKaBattgacttaatgacacacaaaaaatRKaaBctacgtRaDcgtagcVBaacDS
+BHaaaaSacatatacagacRNatcttNaaVtaaaataHattagtaaaaSWccgtatWatg
+gDttaactattgcccatcttHaSgYataBttBaactattBtcHtgatcaataSttaBtat
+KSHYttWggtcYtttBttaataccRgVatStaHaKagaatNtagRMNgtcttYaaSaact
+cagDSgagaaYtMttDtMRVgWKWtgMaKtKaDttttgactatacataatcNtatNaHat
+tVagacgYgatatatttttgtStWaaatctWaMgagaRttRatacgStgattcttaagaD
+taWccaaatRcagcagaaNKagtaaDggcgccBtYtagSBMtactaaataMataBSacRM
+gDgattMMgtcHtcaYDtRaDaacggttDaggcMtttatgttaNctaattaVacgaaMMt
+aatDccSgtattgaRtWWaccaccgagtactMcgVNgctDctaMScatagcgtcaactat
+acRacgHRttgctatttaatgaattataYKttgtaagWgtYttgcHgMtaMattWaWVta
+RgcttgYgttBHtYataSccStBtgtagMgtDtggcVaaSBaatagDttgBgtctttctc
+attttaNagtHKtaMWcYactVcgcgtatMVtttRacVagDaatcttgctBBcRDgcaac
+KttgatSKtYtagBMagaRtcgBattHcBWcaactgatttaatttWDccatttatcgagS
+KaWttataHactaHMttaatHtggaHtHagaatgtKtaaRactgtttMatacgatcaagD
+gatKaDctataMggtHDtggHacctttRtatcttYattttgacttgaaSaataaatYcgB
+aaaaccgNatVBttMacHaKaataagtatKgtcaagactcttaHttcggaattgttDtct
+aaccHttttWaaatgaaatataaaWattccYDtKtaaaacggtgaggWVtctattagtga
+ctattaagtMgtttaagcatttgSgaaatatccHaaggMaaaattttcWtatKctagDtY
+tMcctagagHcactttactatacaaacattaacttaHatcVMYattYgVgtMttaaRtga
+aataaDatcaHgtHHatKcDYaatcttMtNcgatYatgSaMaNtcttKcWataScKggta
+tcttacgcttWaaagNatgMgHtctttNtaacVtgttcMaaRatccggggactcMtttaY
+MtcWRgNctgNccKatcttgYDcMgattNYaRagatHaaHgKctcataRDttacatBatc
+cattgDWttatttaWgtcggagaaaaatacaatacSNtgggtttccttacSMaagBatta
+caMaNcactMttatgaRBacYcYtcaaaWtagctSaacttWgDMHgaggatgBVgcHaDt
+ggaactttggtcNatNgtaKaBcccaNtaagttBaacagtatacDYttcctNgWgcgSMc
+acatStctHatgRcNcgtacacaatRttMggaNKKggataaaSaYcMVcMgtaMaHtgat
+tYMatYcggtcttcctHtcDccgtgRatcattgcgccgatatMaaYaataaYSggatagc
+gcBtNtaaaScaKgttBgagVagttaKagagtatVaactaSacWactSaKatWccaKaaa
+atBKgaaKtDMattttgtaaatcRctMatcaaMagMttDgVatggMaaWgttcgaWatga
+aatttgRtYtattaWHKcRgctacatKttctaccaaHttRatctaYattaaWatVNccat
+NgagtcKttKataStRaatatattcctRWatDctVagttYDgSBaatYgttttgtVaatt
+taatagcagMatRaacttBctattgtMagagattaaactaMatVtHtaaatctRgaaaaa
+aaatttWacaacaYccYDSaattMatgaccKtaBKWBattgtcaagcHKaagttMMtaat
+ttcKcMagNaaKagattggMagaggtaatttYacatcWaaDgatMgKHacMacgcVaaca
+DtaDatatYggttBcgtatgWgaSatttgtagaHYRVacaRtctHaaRtatgaactaata
+tctSSBgggaaHMWtcaagatKgagtDaSatagttgattVRatNtctMtcSaagaSHaat
+aNataataRaaRgattctttaataaagWaRHcYgcatgtWRcttgaaggaMcaataBRaa
+ccagStaaacNtttcaatataYtaatatgHaDgcStcWttaacctaRgtYaRtataKtgM
+ttttatgactaaaatttacYatcccRWtttHRtattaaatgtttatatttgttYaatMca
+RcSVaaDatcgtaYMcatgtagacatgaaattgRtcaaYaaYtRBatKacttataccaNa
+aattVaBtctggacaagKaaYaaatatWtMtatcYaaVNtcgHaactBaagKcHgtctac
+aatWtaDtSgtaHcataHtactgataNctRgttMtDcDttatHtcgtacatcccaggStt
+aBgtcacacWtccNMcNatMVaVgtccDYStatMaccDatggYaRKaaagataRatttHK
+tSaaatDgataaacttaHgttgVBtcttVttHgDacgaKatgtatatNYataactctSat
+atatattgcHRRYttStggaactHgttttYtttaWtatMcttttctatctDtagVHYgMR
+BgtHttcctaatYRttKtaagatggaVRataKDctaMtKBNtMtHNtWtttYcVtattMc
+gRaacMcctNSctcatttaaagDcaHtYccSgatgcaatYaaaaDcttcgtaWtaattct
+cgttttScttggtaatctttYgtctaactKataHacctMctcttacHtKataacacagcN
+RatgKatttttSaaatRYcgDttaMRcgaaattactMtgcgtaagcgttatBtttttaat
+taagtNacatHgttcRgacKcBBtVgatKttcgaBaatactDRgtRtgaNacWtcacYtt
+aaKcgttctHaKttaNaMgWgWaggtctRgaKgWttSttBtDcNtgtttacaaatYcDRt
+gVtgcctattcNtctaaaDMNttttNtggctgagaVctDaacVtWccaagtaacacaNct
+gaScattccDHcVBatcgatgtMtaatBgHaatDctMYgagaatgYWKcctaatNaStHa
+aaKccgHgcgtYaaYtattgtStgtgcaaRtattaKatattagaWVtcaMtBagttatta
+gNaWHcVgcaattttDcMtgtaRHVYtHtctgtaaaaHVtMKacatcgNaatttMatatg
+ttgttactagWYtaRacgataKagYNKcattataNaRtgaacKaYgcaaYYacaNccHat
+MatDcNgtHttRaWttagaaDcaaaaaatagggtKDtStaDaRtaVtHWKNtgtattVct
+SVgRgataDaRaWataBgaagaaKtaataaYgDcaStaNgtaDaaggtattHaRaWMYaY
+aWtggttHYgagVtgtgcttttcaaDKcagVcgttagacNaaWtagtaataDttctggtt
+VcatcataaagtgKaaaNaMtaBBaattaatWaattgctHaVKaSgDaaVKaHtatatat
+HatcatSBagNgHtatcHYMHgttDgtaHtBttWatcgtttaRaattgStKgSKNWKatc
+agDtctcagatttctRtYtBatBgHHtKaWtgYBgacVVWaKtacKcDttKMaKaVcggt
+gttataagaataaHaatattagtataatMHgttYgaRttagtaRtcaaVatacggtcMcg
+agtaaRttacWgactKRYataaaagSattYaWgagatYagKagatgSaagKgttaatMgg
+tataatgttWYttatgagaaacctNVataatHcccKtDctcctaatactggctHggaSag
+gRtKHaWaattcgSatMatttagaggcYtctaMcgctcataSatatgRagacNaaDagga
+VBagaYttKtacNaKgtSYtagttggaWcatcWttaatctatgaVtcgtgtMtatcaYcg
+tRccaaYgDctgcMgtgtWgacWtgataacacgcgctBtgttaKtYDtatDcatcagKaV
+MctaatcttgVcaaRgcRMtDcgattaHttcaNatgaatMtactacVgtRgatggaWttt
+actaaKatgagSaaKggtaNtactVaYtaaKRagaacccacaMtaaMtKtatBcttgtaa
+WBtMctaataaVcDaaYtcRHBtcgttNtaaHatttBNgRStVDattBatVtaagttaYa
+tVattaagaBcacggtSgtVtatttaRattgatgtaHDKgcaatattKtggcctatgaWD
+KRYcggattgRctatNgatacaatMNttctgtcRBYRaaaHctNYattcHtaWcaattct
+BtMKtVgYataatMgYtcagcttMDataVtggRtKtgaatgccNcRttcaMtRgattaac
+attRcagcctHtWMtgtDRagaKaBtgDttYaaaaKatKgatctVaaYaacWcgcatagB
+VtaNtRtYRaggBaaBtgKgttacataagagcatgtRattccacttaccatRaaatgWgD
+aMHaYVgVtaSctatcgKaatatattaDgacccYagtgtaYNaaatKcagtBRgagtcca
+tgKgaaaccBgaagBtgSttWtacgatWHaYatcgatttRaaNRgcaNaKVacaNtDgat
+tgHVaatcDaagcgtatgcNttaDataatcSataaKcaataaHWataBtttatBtcaKtK
+tatagttaDgSaYctacaRatNtaWctSaatatttYaKaKtaccWtatcRagacttaYtt
+VcKgSDcgagaagatccHtaattctSttatggtKYgtMaHagVaBRatttctgtRgtcta
+tgggtaHKgtHacHtSYacgtacacHatacKaaBaVaccaDtatcSaataaHaagagaat
+ScagactataaRttagcaaVcaHataKgDacatWccccaagcaBgagWatctaYttgaaa
+tctVNcYtttWagHcgcgcDcVaaatgttKcHtNtcaatagtgtNRaactttttcaatgg
+WgBcgDtgVgtttctacMtaaataaaRggaaacWaHttaRtNtgctaaRRtVBctYtVta
+tDcattDtgaccYatagatYRKatNYKttNgcctagtaWtgaactaMVaacctgaStttc
+tgaKVtaaVaRKDttVtVctaDNtataaaDtccccaagtWtcgatcactDgYaBcatcct
+MtVtacDaaBtYtMaKNatNtcaNacgDatYcatcgcaRatWBgaacWttKttagYtaat
+tcggttgSWttttDWctttacYtatatWtcatDtMgtBttgRtVDggttaacYtacgtac
+atgaattgaaWcttMStaDgtatattgaDtcRBcattSgaaVBRgagccaaKtttcDgcg
+aSMtatgWattaKttWtgDBMaggBBttBaatWttRtgcNtHcgttttHtKtcWtagHSt
+aacagttgatatBtaWSaWggtaataaMttaKacDaatactcBttcaatatHttcBaaSa
+aatYggtaRtatNtHcaatcaHtagVtgtattataNggaMtcttHtNagctaaaggtaga
+YctMattNaMVNtcKtactBKcaHHcBttaSagaKacataYgctaKaYgttYcgacWVtt
+WtSagcaacatcccHaccKtcttaacgaKttcacKtNtacHtatatRtaaatacactaBt
+ttgaHaRttggttWtatYagcatYDatcggagagcWBataagRtacctataRKgtBgatg
+aDatataSttagBaHtaatNtaDWcWtgtaattacagKttcNtMagtattaNgtctcgtc
+ctcttBaHaKcKccgtRcaaYagSattaagtKataDatatatagtcDtaacaWHcaKttD
+gaaRcgtgYttgtcatatNtatttttatggccHtgDtYHtWgttatYaacaattcaWtat
+NgctcaaaSttRgctaatcaaatNatcgtttaBtNNVtgttataagcaaagattBacgtD
+atttNatttaaaDcBgtaSKgacgtagataatttcHMVNttgttBtDtgtaWKaaRMcKM
+tHtaVtagataWctccNNaSWtVaHatctcMgggDgtNHtDaDttatatVWttgttattt
+aacctttcacaaggaSaDcggttttttatatVtctgVtaacaStDVaKactaMtttaSNa
+gtgaaattaNacttSKctattcctctaSagKcaVttaagNaVcttaVaaRNaHaaHttat
+gtHttgtgatMccaggtaDcgaccgtWgtWMtttaHcRtattgScctatttKtaaccaag
+tYagaHgtWcHaatgccKNRtttagtMYSgaDatctgtgaWDtccMNcgHgcaaacNDaa
+aRaStDWtcaaaaHKtaNBctagBtgtattaactaattttVctagaatggcWSatMaccc
+ttHttaSgSgtgMRcatRVKtatctgaaaccDNatYgaaVHNgatMgHRtacttaaaRta
+tStRtDtatDttYatattHggaBcttHgcgattgaKcKtttcRataMtcgaVttWacatN
+catacctRataDDatVaWNcggttgaHtgtMacVtttaBHtgagVttMaataattatgtt
+cttagtttgtgcDtSatttgBtcaacHattaaBagVWcgcaSYttMgcttacYKtVtatc
+aYaKctgBatgcgggcYcaaaaacgNtctagKBtattatctttKtaVttatagtaYtRag
+NtaYataaVtgaatatcHgcaaRataHtacacatgtaNtgtcgYatWMatttgaactacR
+ctaWtWtatacaatctBatatgYtaagtatgtgtatSttactVatcttYtaBcKgRaSgg
+RaaaaatgcagtaaaWgtaRgcgataatcBaataccgtatttttccatcNHtatWYgatH
+SaaaDHttgctgtccHtggggcctaataatttttctatattYWtcattBtgBRcVttaVM
+RSgctaatMagtYtttaaaaatBRtcBttcaaVtaacagctccSaaSttKNtHtKYcagc
+agaaaccccRtttttaaDcDtaStatccaagcgctHtatcttaDRYgatDHtWcaaaBcW
+gKWHttHataagHacgMNKttMKHccaYcatMVaacgttaKgYcaVaaBtacgcaacttt
+MctaaHaatgtBatgagaSatgtatgSRgHgWaVWgataaatatttccKagVgataattW
+aHNcYggaaatgctHtKtaDtctaaagtMaatVDVactWtSaaWaaMtaHtaSKtcBRaN
+cttStggtBttacNagcatagRgtKtgcgaacaacBcgKaatgataagatgaaaattgta
+ctgcgggtccHHWHaaNacaBttNKtKtcaaBatatgctaHNgtKcDWgtttatNgVDHg
+accaacWctKaaggHttgaRgYaatHcaBacaatgagcaaattactgtaVaaYaDtagat
+tgagNKggtggtgKtWKaatacagDRtatRaMRtgattDggtcaaYRtatttNtagaDtc
+acaaSDctDtataatcgtactaHttatacaatYaacaaHttHatHtgcgatRRttNgcat
+SVtacWWgaaggagtatVMaVaaattScDDKNcaYBYaDatHgtctatBagcaacaagaa
+tgagaaRcataaKNaRtBDatcaaacgcattttttaaBtcSgtacaRggatgtMNaattg
+gatatWtgagtattaaaVctgcaYMtatgatttttYgaHtgtcttaagWBttHttgtctt
+attDtcgtatWtataataSgctaHagcDVcNtaatcaagtaBDaWaDgtttagYctaNcc
+DtaKtaHcttaataacccaRKtacaVaatNgcWRaMgaattatgaBaaagattVYaHMDc
+aDHtcRcgYtcttaaaWaaaVKgatacRtttRRKYgaatacaWVacVcRtatMacaBtac
+tggMataaattttHggNagSctacHgtBagcgtcgtgattNtttgatSaaggMttctttc
+ttNtYNagBtaaacaaatttMgaccttacataattgYtcgacBtVMctgStgMDtagtaR
+ctHtatgttcatatVRNWataDKatWcgaaaaagttaaaagcacgHNacgtaatctttMR
+tgacttttDacctataaacgaaatatgattagaactccSYtaBctttaataacWgaaaYa
+tagatgWttcatKtNgatttttcaagHtaYgaaRaDaagtaggagcttatVtagtctttc
+attaaaatcgKtattaRttacagVaDatgcatVgattgggtctttHVtagKaaRBtaHta
+aggccccaaaaKatggtttaMWgtBtaaacttcactttKHtcgatctccctaYaBacMgt
+cttBaBaNgcgaaacaatctagtHccHtKttcRtRVttccVctttcatacYagMVtMcag
+aMaaacaataBctgYtaatRaaagattaaccatVRatHtaRagcgcaBcgDttStttttc
+VtttaDtKgcaaWaaaaatSccMcVatgtKgtaKgcgatatgtagtSaaaDttatacaaa
+catYaRRcVRHctKtcgacKttaaVctaDaatgttMggRcWaacttttHaDaKaDaBctg
+taggcgtttaHBccatccattcNHtDaYtaataMttacggctNVaacDattgatatttta
+cVttSaattacaaRtataNDgacVtgaacataVRttttaDtcaaacataYDBtttaatBa
+DtttYDaDaMccMttNBttatatgagaaMgaNtattHccNataattcaHagtgaaggDga
+tgtatatatgYatgaStcataaBStWacgtcccataRMaaDattggttaaattcMKtctM
+acaBSactcggaatDDgatDgcWctaacaccgggaVcacWKVacggtaNatatacctMta
+tgatagtgcaKagggVaDtgtaacttggagtcKatatcgMcttRaMagcattaBRaStct
+YSggaHYtacaactMBaagDcaBDRaaacMYacaHaattagcattaaaHgcgctaaggSc
+cKtgaaKtNaBtatDDcKBSaVtgatVYaagVtctSgMctacgttaacWaaattctSgtD
+actaaStaaattgcagBBRVctaatatacctNttMcRggctttMttagacRaHcaBaacV
+KgaataHttttMgYgattcYaNRgttMgcVaaacaVVcDHaatttgKtMYgtatBtVVct
+WgVtatHtacaaHttcacgatagcagtaaNattBatatatttcVgaDagcggttMaagtc
+ScHagaaatgcYNggcgtttttMtStggtRatctacttaaatVVtBacttHNttttaRca
+aatcacagHgagagtMgatcSWaNRacagDtatactaaDKaSRtgattctccatSaaRtt
+aaYctacacNtaRtaactggatgaccYtacactttaattaattgattYgttcagDtNKtt
+agDttaaaaaaaBtttaaNaYWKMBaaaacVcBMtatWtgBatatgaacVtattMtYatM
+NYDKNcKgDttDaVtaaaatgggatttctgtaaatWtctcWgtVVagtcgRgacttcccc
+taDcacagcRcagagtgtWSatgtacatgttaaSttgtaaHcgatgggMagtgaacttat
+RtttaVcaccaWaMgtactaatSSaHtcMgaaYtatcgaaggYgggcgtgaNDtgttMNg
+aNDMtaattcgVttttaacatgVatgtWVMatatcaKgaaattcaBcctccWcttgaaWH
+tWgHtcgNWgaRgctcBgSgaattgcaaHtgattgtgNagtDttHHgBttaaWcaaWagc
+aSaHHtaaaVctRaaMagtaDaatHtDMtcVaWMtagSagcttHSattaacaaagtRacM
+tRtctgttagcMtcaBatVKtKtKacgagaSNatSactgtatatcBctgagVtYactgta
+aattaaaggcYgDHgtaacatSRDatMMccHatKgttaacgactKtgKagtcttcaaHRV
+tccttKgtSataatttacaactggatDNgaacttcaRtVaagDcaWatcBctctHYatHa
+DaaatttagYatSatccaWtttagaaatVaacBatHcatcgtacaatatcgcNYRcaata
+YaRaYtgattVttgaatgaVaactcRcaNStgtgtattMtgaggtNttBaDRcgaaaagc
+tNgBcWaWgtSaDcVtgVaatMKBtttcgtttctaaHctaaagYactgMtatBDtcStga
+ccgtSDattYaataHctgggaYYttcggttaWaatctggtRagWMaDagtaacBccacta
+cgHWMKaatgatWatcctgHcaBaSctVtcMtgtDttacctaVgatYcWaDRaaaaRtag
+atcgaMagtggaRaWctctgMgcWttaagKBRtaaDaaWtctgtaagYMttactaHtaat
+cttcataacggcacBtSgcgttNHtgtHccatgttttaaagtatcgaKtMttVcataYBB
+aKtaMVaVgtattNDSataHcagtWMtaggtaSaaKgttgBtVtttgttatcatKcgHac
+acRtctHatNVagSBgatgHtgaRaSgttRcctaacaaattDNttgacctaaYtBgaaaa
+tagttattactcttttgatgtNNtVtgtatMgtcttRttcatttgatgacacttcHSaaa
+ccaWWDtWagtaRDDVNacVaRatgttBccttaatHtgtaaacStcVNtcacaSRttcYa
+gacagaMMttttgMcNttBcgWBtactgVtaRttctccaaYHBtaaagaBattaYacgat
+ttacatctgtaaMKaRYtttttactaaVatWgctBtttDVttctggcDaHaggDaagtcg
+aWcaagtagtWttHtgKtVataStccaMcWcaagataagatcactctHatgtcYgaKcat
+cagatactaagNSStHcctRRNtattgtccttagttagMVgtatagactaactctVcaat
+MctgtttgtgttgccttatWgtaBVtttctggMcaaKgDWtcgtaaYStgSactatttHg
+atctgKagtagBtVacRaagRtMctatgggcaaaKaaaatacttcHctaRtgtDcttDat
+taggaaatttcYHaRaaBttaatggcacKtgctHVcaDcaaaVDaaaVcgMttgtNagcg
+taDWgtcgttaatDgKgagcSatatcSHtagtagttggtgtHaWtaHKtatagctgtVga
+ttaBVaatgaataagtaatVatSttaHctttKtttgtagttaccttaatcgtagtcctgB
+cgactatttVcMacHaaaggaatgDatggKtaHtgStatattaaSagctWcctccRtata
+BaDYcgttgcNaagaggatRaaaYtaWgNtSMcaatttactaacatttaaWttHtatBat
+tgtcgacaatNgattgcNgtMaaaKaBDattHacttggtRtttaYaacgVactBtaBaKt
+gBttatgVttgtVttcaatcWcNctDBaaBgaDHacBttattNtgtDtatttVSaaacag
+gatgcRatSgtaSaNtgBatagttcHBgcBBaaattaHgtDattatDaKaatBaaYaaMa
+ataaataKtttYtagtBgMatNcatgtttgaNagtgttgtgKaNaSagtttgaSMaYBca
+aaacDStagttVacaaaaactaaWttBaagtctgtgcgtMgtaattctcctacctcaNtt
+taaccaaaaVtBcacataacaccccBcWMtatVtggaatgaWtcaaWaaaaaaaaWtDta
+atatRcctDWtcctaccMtVVatKttaWaaKaaatataaagScHBagaggBaSMtaWaVt
+atattactSaaaKNaactatNatccttgaYctattcaaaVgatttYHcRagattttaSat
+aggttattcVtaaagaKgtattattKtRttNcggcRgtgtgtWYtaacHgKatKgatYta
+cYagDtWcHBDctctgRaYKaYagcactKcacSaRtBttttBHKcMtNtcBatttatttt
+tgSatVgaaagaWtcDtagDatatgMacaacRgatatatgtttgtKtNRaatatNatgYc
+aHtgHataacKtgagtagtaacYttaNccaaatHcacaacaVDtagtaYtccagcattNt
+acKtBtactaaagaBatVtKaaHBctgStgtBgtatgaSNtgDataaccctgtagcaBgt
+gatcttaDataStgaMaccaSBBgWagtacKcgattgaDgNNaaaacacagtSatBacKD
+gcgtataBKcatacactaSaatYtYcDaactHttcatRtttaatcaattataRtttgtaa
+gMcgNttcatcBtYBagtNWNMtSHcattcRctttttRWgaKacKttgggagBcgttcgc
+MaWHtaatactgtctctatttataVgtttaBScttttaBMaNaatMacactYtBMggtHa
+cMagtaRtctgcatttaHtcaaaatttgagKtgNtactBacaHtcgtatttctMaSRagc
+agttaatgtNtaaattgagagWcKtaNttagVtacgatttgaatttcgRtgtWcVatcgt
+taaDVctgtttBWgaccagaaagtcSgtVtatagaBccttttcctaaattgHtatcggRa
+ttttcaaggcYSKaagWaWtRactaaaacccBatMtttBaatYtaagaactSttcgaaSc
+aatagtattgaccaagtgttttctaacatgtttNVaatcaaagagaaaNattaaRtttta
+VaaaccgcaggNMtatattVctcaagaggaacgBgtttaacaagttcKcYaatatactaa
+ccBaaaSggttcNtattctagttRtBacgScVctcaatttaatYtaaaaaaatgSaatga
+tagaMBRatgRcMcgttgaWHtcaVYgaatYtaatctttYttatRaWtctgBtDcgatNa
+tcKaBaDgatgtaNatWKctccgatattaacattNaaacDatgBgttctgtDtaaaMggt
+gaBaSHataacgccSctaBtttaRBtcNHcDatcDcctagagtcRtaBgWttDRVHagat
+tYatgtatcWtaHtttYcattWtaaagtctNgtStggRNcgcggagSSaaagaaaatYcH
+DtcgctttaatgYcKBVSgtattRaYBaDaaatBgtatgaHtaaRaRgcaSWNtagatHa
+acttNctBtcaccatctMcatattccaSatttgcgaDagDgtatYtaaaVDtaagtttWV
+aagtagYatRttaagDcNgacKBcScagHtattatcDaDactaaaaaYgHttBcgaDttg
+gataaaKSRcBMaBcgaBSttcWtgNBatRaccgattcatttataacggHVtaattcaca
+agagVttaaRaatVVRKcgWtVgacctgDgYaaHaWtctttcacMagggatVgactagMa
+aataKaaNWagKatagNaaWtaaaatttgaattttatttgctaaVgaHatBatcaaBWcB
+gttcMatcgBaaNgttcgSNaggSaRtttgHtRtattaNttcDcatSaVttttcgaaaaa
+ttgHatctaRaggSaNatMDaaatDcacgattttagaHgHaWtYgattaatHNSttatMS
+gggNtcKtYatRggtttgtMWVtttaYtagcagBagHaYagttatatggtBacYcattaR
+SataBatMtttaaatctHcaaaSaaaagttNSaaWcWRccRtKaagtBWtcaaattSttM
+tattggaaaccttaacgttBtWatttatatWcDaatagattcctScacctaagggRaaYt
+aNaatgVtBcttaaBaacaMVaaattatStYgRcctgtactatcMcVKatttcgSgatRH
+MaaaHtagtaaHtVgcaaataatatcgKKtgccaatBNgaaWcVttgagttaKatagttc
+aggKDatDtattgaKaVcaKtaataDataataHSaHcattagttaatRVYcNaHtaRcaa
+ggtNHcgtcaaccaBaaagYtHWaaaRcKgaYaaDttgcWYtataRgaatatgtYtgcKt
+aNttWacatYHctRaDtYtattcBttttatcSataYaYgttWaRagcacHMgtttHtYtt
+YaatcggtatStttcgtRSattaaDaKMaatatactaNBaWgctacacYtgaYVgtgHta
+aaRaaRgHtagtWattataaaSDaaWtgMattatcgaaaagtaYRSaWtSgNtBgagcRY
+aMDtactaacttaWgtatctagacaagNtattHggataatYttYatcataDcgHgttBtt
+ctttVttgccgaaWtaaaacgKgtatctaaaaaNtccDtaDatBMaMggaatNKtatBaa
+atVtccRaHtaSacataHattgtttKVYattcataVaattWtcgtgMttcttKtgtctaa
+cVtatctatatBRataactcgKatStatattcatHHRttKtccaacgtgggtgRgtgaMt
+attattggctatcgtgacMtRcBDtcttgtactaatRHttttaagatcgVMDStattatY
+BtttDttgtBtNttgRcMtYtgBacHaWaBaatDKctaagtgaaactaatgRaaKgatcc
+aagNaaaatattaggWNtaagtatacttttKcgtcggSYtcttgRctataYcttatataa
+agtatattaatttataVaacacaDHatctatttttKYVatHRactttaBHccaWagtact
+BtcacgaVgcgttRtttttttSVgtSagtBaaattctgaHgactcttgMcattttagVta
+agaattHctHtcaDaaNtaacRggWatagttcgtSttgaDatcNgNagctagDgatcNtt
+KgttgtaDtctttRaaYStRatDtgMggactSttaDtagSaVtBDttgtDgccatcacaM
+attaaaMtNacaVcgSWcVaaDatcaHaatgaattaMtatccVtctBtaattgtWattat
+BRcWcaatgNNtactWYtDaKttaaatcactcagtRaaRgatggtKgcgccaaHgaggat
+StattYcaNMtcaBttacttatgagDaNtaMgaaWtgtttcttctaHtMNgttatctaWW
+atMtBtaaatagDVatgtBYtatcggcttaagacMRtaHScgatatYgRDtcattatSDa
+HggaaataNgaWSRRaaaBaatagBattaDctttgHWNttacaataaaaaaatacggttt
+gHgVtaHtWMttNtBtctagtMcgKMgHgYtataHaNagWtcaacYattaataYRgtaWK
+gaBctataaccgatttaHaNBRaRaMtccggtNgacMtctcatttgcaattcWgMactta
+caaDaaNtactWatVtttagccttMaatcagVaagtctVaaDaBtattaattaYtNaYtg
+gattaKtaKctYaMtattYgatattataatKtVgDcttatatNBtcgttgtStttttMag
+aggttaHYSttcKgtcKtDNtataagttataagSgttatDtRttattgttttSNggRtca
+aKMNatgaatattgtBWtaMacctgggYgaSgaagYataagattacgagaatBtggtRcV
+HtgYggaDgaYaKagWagctatagacgaaHgtWaNgacttHRatVaWacKYtgRVNgVcS
+gRWctacatcKSactctgWYtBggtataagcttNRttVtgRcaWaaatDMatYattaact
+ttcgaagRatSctgccttgcRKaccHtttSNVagtagHagBagttagaccaRtataBcca
+taatSHatRtcHagacBWatagcaMtacaRtgtgaaBatctKRtScttccaNaatcNgta
+atatWtcaMgactctBtWtaaNactHaaaaRctcgcatggctMcaaNtcagaaaaacaca
+gtggggWttRttagtaagaVctVMtcgaatcttcMaaaHcaHBttcgattatgtcaDagc
+YRtBtYcgacMgtDcagcgaNgttaataatagcagKYYtcgtaBtYctMaRtaRtDagaa
+aacacatgYaBttgattattcgaaNttBctSataaMataWRgaHtttccgtDgaYtatgg
+tDgHKgMtatttVtMtVagttaRatMattRagataaccctKctMtSttgaHagtcStcta
+tttccSagatgttccacgaggYNttHRacgattcDatatDcataaaatBBttatcgaHtN
+HaaatatDNaggctgaNcaaggagttBttMgRagVatBcRtaWgatgBtSgaKtcgHttt
+gaatcaaDaHttcSBgHcagtVaaSttDcagccgttNBtgttHagYtattctttRWaaVt
+SttcatatKaaRaaaNacaVtVctMtSDtDtRHRcgtaatgctcttaaatSacacaatcg
+HattcaWcttaaaatHaaatcNctWttaNMcMtaKctVtcctaagYgatgatcYaaaRac
+tctaRDaYagtaacgtDgaggaaatctcaaacatcaScttcKttNtaccatNtaNataca
+tttHaaDHgcaDatMWaaBttcRggctMaagctVYcacgatcaDttatYtaatcKatWat
+caatVYtNagatttgattgaYttttYgacttVtcKaRagaaaHVgDtaMatKYagagttN
+atWttaccNtYtcDWgSatgaRgtMatgKtcgacaagWtacttaagtcgKtgatccttNc
+ttatagMatHVggtagcgHctatagccctYttggtaattKNaacgaaYatatVctaataM
+aaaYtgVtcKaYtaataacagaatHcacVagatYWHttagaaSMaatWtYtgtaaagNaa
+acaVgaWtcacNWgataNttcaSagctMDaRttgNactaccgataMaaatgtttattDtc
+aagacgctDHYYatggttcaagccNctccttcMctttagacBtaaWtaWVHggaaaaNat
+ttaDtDtgctaaHHtMtatNtMtagtcatttgcaaaRatacagRHtatDNtgtDgaatVg
+tVNtcaaatYBMaaaagcaKgtgatgatMgWWMaHttttMgMagatDtataaattaacca
+actMtacataaattgRataatacgBtKtaataattRgtatDagDtcRDacctatRcagag
+cSHatNtcaScNtttggacNtaaggaccgtgKNttgttNcttgaaRgYgRtNtcagttBc
+ttttcHtKtgcttYaaNgYagtaaatgaatggWaMattBHtatctatSgtcYtgcHtaat
+tHgaaMtHcagaaSatggtatgccaHBtYtcNattWtgtNgctttaggtttgtWatNtgH
+tgcDttactttttttgcNtactKtWRaVcttcatagtgSNKaNccgaataaBttataata
+YtSagctttaaatSttggctaaKSaatRccgWHgagDttaaatcatgagMtcgagtVtaD
+ggaBtatttgDacataaacgtagYRagBWtgDStKDgatgaagttcattatttaKWcata
+aatWRgatataRgttRacaaNKttNtKagaaYaStaactScattattaacgatttaaatg
+DtaattagatHgaYataaactatggggatVHtgccgtNgatNYcaStRtagaccacWcaM
+tatRagHgVactYtWHtcttcatgatWgagaKggagtatgaWtDtVtNaNtcgYYgtaaa
+ctttaDtBactagtaDctatagtaatatttatatataacgHaaaRagKattSagttYtSt
+>THREE Homo sapiens frequency
+agagagacgatgaaaattaatcgtcaatacgctggcgaacactgagggggacccaatgct
+cttctcggtctaaaaaggaatgtgtcagaaattggtcagttcaaaagtagaccggatctt
+tgcggagaacaattcacggaacgtagcgttgggaaatatcctttctaccacacatcggat
+tttcgccctctcccattatttattgtgttctcacatagaattattgtttagacatccctc
+gttgtatggagagttgcccgagcgtaaaggcataatccatataccgccgggtgagtgacc
+tgaaattgtttttagttgggatttcgctatggattagcttacacgaagagattctaatgg
+tactataggataattataatgctgcgtggcgcagtacaccgttacaaacgtcgttcgcat
+atgtggctaacacggtgaaaatacctacatcgtatttgcaatttcggtcgtttcatagag
+cgcattgaattactcaaaaattatatatgttgattatttgattagactgcgtggaaagaa
+ggggtactcaagccatttgtaaaagctgcatctcgcttaagtttgagagcttacattagt
+ctatttcagtcttctaggaaatgtctgtgtgagtggttgtcgtccataggtcactggcat
+atgcgattcatgacatgctaaactaagaaagtagattactattaccggcatgcctaatgc
+gattgcactgctatgaaggtgcggacgtcgcgcccatgtagccctgataataccaatact
+tacatttggtcagcaattctgacattatacctagcacccataaatttactcagacttgag
+gacaggctcttggagtcgatcttctgtttgtatgcatgtgatcatatagatgaataagcg
+atgcgactagttagggcatagtatagatctgtgtatacagttcagctgaacgtccgcgag
+tggaagtacagctgagatctatcctaaaatgcaaccatatcgttcacacatgatatgaac
+ccagggggaaacattgagttcagttaaattggcagcgaatcccccaagaagaaggcggag
+tgacgttgaacgggcttatggtttttcagtacttcctccgtataagttgagcgaaatgta
+aacagaataatcgttgtgttaacaacattaaaatcgcggaatatgatgagaatacacagt
+gtgagcatttcacttgtaaaatatctttggtagaacttactttgctttaaatatgttaaa
+ccgatctaataatctacaaaacggtagattttgcctagcacattgcgtccttctctattc
+agatagaggcaatactcagaaggttttatccaaagcactgtgttgactaacctaagtttt
+agtctaataatcatgattgattataggtgccgtggactacatgactcgtccacaaataat
+acttagcagatcagcaattggccaagcacccgacttttatttaatggttgtgcaatagtc
+cagattcgtattcgggactctttcaaataatagtttcctggcatctaagtaagaaaagct
+cataaggaagcgatattatgacacgctcttccgccgctgttttgaaacttgagtattgct
+cgtccgaaattgagggtcacttcaaaatttactgagaagacgaagatcgactaaagttaa
+aatgctagtccacagttggtcaagttgaattcatccacgagttatatagctattttaatt
+tatagtcgagtgtacaaaaaacatccacaataagatttatcttagaataacaacccccgt
+atcatcgaaatcctccgttatggcctgactcctcgagcttatagcatttgtgctggcgct
+cttgccaggaacttgctcgcgaggtggtgacgagtgagatgatcagtttcattatgatga
+tacgattttatcgcgactagttaatcatcatagcaagtaaaatttgaattatgtcattat
+catgctccattaacaggttatttaattgatactgacgaaattttttcacaatgggttttc
+tagaatttaatatcagtaattgaagccttcataggggtcctactagtatcctacacgacg
+caggtccgcagtatcctggagggacgtgttactgattaaaagggtcaaaggaatgaaggc
+tcacaatgttacctgcttcaccatagtgagccgatgagttttacattagtactaaatccc
+aaatcatactttacgatgaggcttgctagcgctaaagagaatacatacaccaccacatag
+aattgttagcgatgatatcaaatagactcctggaagtgtcagggggaaactgttcaatat
+ttcgtccacaggactgaccaggcatggaaaagactgacgttggaaactataccatctcac
+gcccgacgcttcactaattgatgatccaaaaaatatagcccggattcctgattagcaaag
+ggttcacagagaaagatattatcgacgtatatcccaaaaaacagacgtaatgtgcatctt
+cgaatcgggatgaatacttgtatcataaaaatgtgacctctagtatacaggttaatgtta
+gtgatacacaatactcgtgggccatgggttctcaaataaaatgtaatattgcgtcgatca
+ctcacccacgtatttggtctaattatgttttatttagtgacaatccaatagataaccggt
+cctattaagggctatatttttagcgaccacgcgtttaaacaaaggattgtatgtagatgg
+taccagtttaattgccagtgggcaatcctaagcaaaatgagattctatcctaaagtttgg
+gcttgatataagatttcggatgtatgggttttataatcgttggagagctcaatcatgagc
+taatacatggatttcgctacctcaccgagagaccttgcatgaagaattctaaccaaaagt
+ttaataggccggattggattgagttaattaagaccttgttcagtcatagtaaaaaccctt
+aaattttaccgattgacaaagtgagcagtcgcaataccctatgcgaaacgcctcgatagt
+gactaggtatacaaggtttttgagttcctttgaaatagttaactaatttaaaattaatta
+acgacatggaaatcacagaacctaatgctttgtaggagttatttatgctgtttactgcct
+ctacaaccctaataaagcagtcctaagaatgaaacgcatcttttagttcagaaagtggta
+tccagggtggtcaatttaataaattcaacatcgggtctcaggatattcggtcatataatt
+tattaagggctcttcgagtcttactctgagtgaaattggaaacagtcatccttttcgttg
+tgaggcatcttacaccgctatcgatatacaatgcattccaccgcggtgtcccgtacacaa
+ggaaacttgttaccttggggatataagaaaactcacacgtctcattattaaactgagtac
+aatttttgcacgagaaagtaatgcaatacaatatgatgaaagccagctaatgaaaaggga
+tggaacgcacctcggatctgttgcactggattaaaatccgattatttttaaaaatattca
+gtgctagagcatatcaggtctacttttttatctggtatgtaaagcccacggagcgatagt
+gagatccttacgactcaacgaaaagttataacataactcccgttagccaaagcccaatcc
+cgattactgccctaccctaacgtctgccatctaaatatcgaacttgttatgatcaatgtg
+actacctcccaccctttccccttcatttgttccactggggataagctagcgttttcagaa
+tcaatgcaataagaatagccaattgtctcacttcatcagagctcttggcaattccaggcg
+ctacgtggttctggaatatattcatttttcaaatagtaatacgtttagtgttgctattgt
+ctacacgtttggatattacgttatgtgagcggacatcaatagttgtctaactctttagta
+agccagagatagcactcttagcgaatggataccatcttccataagtttagttaatagtcc
+gaaacaactgcttcgagcatatttgaacctccttgtaggcaaatagcctcttcaaagcaa
+tcttactaatagatagagtttgttttaagggactactagaaatgggacaatcttaatagt
+atgacctaaactgacatttaaagatatatccaggtggcaagcataaagatcattgcgcca
+cctccaccgtgggattacttatcagtcgatatcctatatgctaagtttgcgacggcagaa
+tacaaactaagctgagttgatgctaaccttacctatgataccccattggaccggttaaca
+gccctacttattccaaataaaagaacttttatgctgtagaagctattatagtgatgcctg
+gtaacttcagtatattaaaatgacacacatacgccatatagagctcctggaactttgaat
+aatgagcgaacttcgaagttgaagagcaagaaaccatatgtcacggttgcctaaagcccg
+gtaaccagacatgtgctatcattgatcattatcgaggttttcataaccttgacccattat
+cggctgtgcgcggacaagtacttaaatcactagtttcttcacctgcttatcggtaagaaa
+taaggttggcaaagaatcgcataagacggacgtagagccgcagcgttgtgcgagtccagg
+tgcatgcgcagcaataggattttaaattttgttccatttttaatttagccgtaaggatgt
+ccgtaaatgattgaaaattggattcaatctttgggcctatgctactggaacctgatcgac
+aaaatttcaaacatacgttaactccgaaagaccgtatttttgcggctagaatagtcagtc
+gcttggagccatataccttaccacttaaacgacgtgctcctgtagttgaaatataaacag
+aacacaaagactaccgatcatatcaactgaagatctttgtaactttgaggcgaagcaccc
+tcttcgagacaactaagagtaaagtaccgggcgccgcaaggagtcgattgggaccctaaa
+tcttgacgaattgctaagaggctcagagctaccactgtaatttctctagagcccataata
+aatgaacgatacatccgtaggtagcacctaagggattataatggaagccaaatgcagtta
+ataatattatatactggcgtacacgattcgacggatctctcacatagtgattcacgaccc
+ccccctttgattgacacagcgtcagcattttgcaagaacgatcttctgcatagggtgcgc
+caccgtaaggatgacgtcgaagctacaactgggtataatttaccatgcttccctgatgct
+gagtgcaatacactaagaatgagtttttaccccatatcaccagtatttgttctgttattg
+cgaagaaatggctatgctgagttggcgactaaagtcacccatcctttttattaggtaacc
+ccctcccttaaactaactgatttgctggagctgccctgcatacatatactttatcattta
+tggacgtccgtgacgcttattatccaccatagtcgatatgctacacggattcattaatgg
+atcgtaggagtttaagttatatttactaagatcggtctcggctactatcccgccttaccc
+ggcgctatttacggccatttttaatatattgacggtaattattcctatggtttcgaccgc
+acgtccttggacaagaaagaatggcaaaaaaaatgtaaaagaaaaaaaatattgagtccc
+taccatcatataaaaaatatgtgatgagtaacttgacgaaatgttagtggttattaaaga
+ctatctattacaccttttgttttctgtcgtagtatattaaagtctagaagccttacagga
+aaatcagggttatacagccgatactccgcagcatgaatcatcgaggaggtgtcctaccat
+cgcgccttgtaatcttgtctgtgtatactgtatttagaccttttatacaaagtaaatatc
+tcggctttatgtgattgggaggggcctactcaaacatgatgacttgacctaataatcact
+gtgcgggcgtcttatgactagctattccttgaaatccaccaccaaatggttaatatgtaa
+aaactttgacgatgaaacaaggtgaatgtgtagttactttgtgtaattagctgcgtcgag
+cattgcttgtaaaaccgtcaatcgcacacgttacttccataaaatttctacgaatacacc
+cttcttaaaaaaaacgtaggaattcacgagtttaacaaacgataactgtataaagtggaa
+gtccgaagaaagcagatgcccgaactactcgaagatgtttcgttttcttaaccatagggg
+cttcttaatggcccactacgcacattttgttcaagcccgagagggacatccccattacgg
+gagtattactaaaactgttccgtaatacgttcagcaagggatgaaaaaggccactgctca
+agttattgacgtgggagtattacatcggaagcctgaatcccacactatgatggtctgtac
+aggcctagggactgcgtctagacggtattaccggcttctaatcatacgatcgtgagtctt
+aacgggaagtaaggctcacacctaccccaaaccatttatctatgtaagtataaaattgtg
+cgtaagtgttcaaagtggacaataaagacgtggcaaaaacccccgcacataagccgcttt
+agatttcacaaataccaatgcggttaaaaacatccttgagtcgtacatacaccatactcg
+cgttaaacggatataacagaagataataaatccggatgtggagtcggtgtaactatagaa
+agccaagtgaaataatgcttaccagtcatttagctatacggctttcatttcatgtcaaga
+gggtggagtttgacctgtacagttgatatatcaccgatacttagaactcacctaaagcta
+aaattgctcgcagcgtgtaatccgcatattacaaacaatagatgggattcattatacata
+agacacgatgatctgctttttcaggttgcgagatgttgcctatcgtcaatcgagtcctgc
+cttacaccacttaaacaaaagtattgacagggaacctattttcgaggtattatatagtcc
+agcttgaatatcaatttgacagttaacctagtgaaaatcagtaagaggaaatacgccaca
+ttctccagtgaaattctacgggttatcgtctagtccaactatcaattataactcacgaga
+tataagtaaattctcgtacttggcctgatttttattatactttggatccttagtaaacag
+gaagggagaaaccttcaacgaaaaacactggattttgttttactctcaaagctcttatat
+gacggaaataccctgtcaagtcttaactttattactagactaatgaaatgggcttggggt
+ggccagaatcatagtacaatttagcggatacactattcggactttcctatcggctgtctg
+gttggataagtatggggactaataggctagacatacctatacttaaactatacaggcgtc
+atctatctctgcaactttggagttccctgatgttctcccgccctttgggttcacatcttc
+tataccgacacccctaataacgattagtttgtgggttagagtaaattaatacggttaata
+ttaatgtatcgttgaaaagctggtgtcgccaataaggtaaccggctaggcagagtatatg
+tcacgaagtataactaccctaatgataagctgtaggaataaaattaatgctgtctctaag
+cgaagagatatttccgactctgttttaatgacgaatctcattacttctgacttgcaaatg
+ttcaatatggcacggtttcacggcacctttgtgacgcatataatgaacttagaagattat
+aacgacggaactttatatgataatccgttacgattaaagaatctgttaaatatcataatg
+gcattcagttctagaccgtgcatcatggtaaacttactttctctgcatggcgacatacat
+ttcgctattcaaattcgcgtgtggttacacccactcgcacctttggaatattaagagaag
+atgatcagaaaatccattcgctcaatttttctgacgtacgtctaatttatcctaggagac
+aaatcgttttatgtctctcacatttttgaagaaaggttcgagagacaatactcaggtcct
+gaactgctagaagatactcggtggagcgtggcaacaatgaaaaactcgtgacataaatga
+atgatacttttccaagttcagttaagtgaatatgtttaacatacccggcttttcgatctt
+aagctgacgctggacgtgcgagtaatgtcagtctcttacatacactagtgactccaagtt
+tcgtcaaaaacgccccctcccttctcgagcccactcacgctatgtattgacgcgaacttg
+ttcgggatcagacttttcaggagttcggtcgcgtgtccctatgtgctaatatataagtta
+gatcgcattagatgctaatctgaatacttatagacgaccttcaacgagaacgggtaccac
+cttgaggctagagttaggtgtgaaacgacaggtagggacatataaaatttgagtgcggct
+ttagttaagggtttaattacctactcaaacatcacgctcgcgcccttcgtacgtaatcga
+ccatctagaggctaaggggactgtactaggtagtgattaatgatatcctagacgcacgtg
+ccttagatcttcagactctgatggtccgcgatcaccgtaattgtagtcctccaactcgat
+cactttgttggcgtcaaagaaattacgatatctaaatacttataatacaataaccaagga
+tgagaatgactcatcgcgttggagttatattgcttgaagttctatggaatgaaagcacgt
+tatctgccgtcccaatatctccagtgagctaattcattggacggtccactttgatcaatc
+cccgaggagatgttcggacactttagtctgtaacacttagcgttgagaccacgaacaatt
+gattactcagtcttgaaggtgttttccaaagttcattttaaataagactacgataggcct
+ttcctattgatataaactacccggctctgttgttcgtgtgagtcgtacttctctgtgttt
+ttctgattatagcaagattcgattcttagtgtaaacagcgatttttatttgacccgtcaa
+tgagaagcgcataggatctaagcaaaattatcaagttgtgccacaaggtaagatctttcc
+agttattgcaggtaggatgtatcccacgttgatagtatgaggtctgacgtcaactgtcta
+ggagagttgaccgcgtgcgggtacaccggatttgcatcgatgttgagaacgcagaactcc
+cactgtcgtggcggcgttcctgatatttagcaagaggcgttgataaagccctcatcatct
+agatctcgacctcatctgccctcttgctccatcattttctacacagactactttcctatc
+tacgttagtataattgctttctatcttagtatcatttagagcttctccgtcaacaggttc
+gtgctattaaagttagtacgaaagggacaacttgtagcaacgcatttaatcggttttcga
+ctacttcgcacaaaatcagataaagaagtttgtcattctattagacattgaattgcgcaa
+ttgacttgtaccacttatgatcgaacactgaatcaagactgtgattaactaaaatagaca
+agccactatatcaactaataaaaacgcccctggtggtcgaacatagttgactacaggata
+attaattggactggagccattacattctctacaatcgtatcacttcccaagtagacaact
+ttgaccttgtagtttcatgtacaaaaaaatgctttcgcaggagcacattggtagttcaat
+agtttcatgggaacctcttgagccgtcttctgtgggtgtgttcggatagtaggtactgat
+aaagtcgtgtcgctttcgatgagagggaattcaccggaaaacaccttggttaacaggata
+gtctatgtaaacttcgagacatgtttaagagttaccagcttaatccacggtgctctacta
+gtatcatcagctgtcttgcctcgcctagaaatatgcattctatcgttatcctatcaacgg
+ttgccgtactgagcagccttattgtggaagagtaatatataaatgtagtcttgtctttac
+gaagcagacgtaagtaataatgacttggaataccaaaactaaacatagtggattatcata
+ctcaagaactctccagataaataacagtttttacgatacgtcaccaatgagcttaaagat
+taggatcctcaaaactgatacaaacgctaattcatttgttattggatccagtatcagtta
+aactgaatggagtgaagattgtagaatgttgttctggcctcgcatggggtctaggtgata
+tacaatttctcatacttacacggtagtggaaatctgattctagcttcgtagctgactata
+ctcaaggaaccactgctcaaggtaggagactagttccgaccctacagtcaaagtggccga
+agcttaaactatagactagttgttaaatgctgatttcaagatatcatctatatacagttt
+ggacaattatgtgtgcgaaactaaaattcatgctattcagatggatttcacttatgcctt
+agaaacagatattgcccgagctcaatcaacagttttagccggaaacaatcgaagcatagg
+gacaatgtatcttttcctaaattgccatgtgcagatttctgagtgtcacgaagcgcataa
+tagaatcttgtgttgcctcaactcgttgaaaagtttaaaacaatcgcagcagtctttttg
+gggtctactgtgtgtttgcaaaataactgaaagaaacgcttgaacaactctgaagtagct
+cgagtactcattaaagtgtaacacattagtgaatatcggccaatgaaccaaacgcttccc
+ggtacgctatctctctcatcgggaggcgatgtgcaggttatctacgaaagcatcccttta
+cgttgagagtgtcgatgcatgaacctcattgtaacaatagcccagcaaattctcatacgt
+gcctcagggtccgggcgtactcctccatggaagggcgcgcatctagtgttataccaactc
+gctttttaactactatgctgtagttctacaggcatagtggccagtattttctaacttctc
+tggatagatgctctcactcctcatccatcacggcttcagtttacgtcttacttgcttgtt
+cagcaacggatggaggcattaagtatcttcactgttccctaaaattgctgttcaatatca
+aagtaaggacgatacagggaaagctcaagcacactcattgaatactgccccagttgcaac
+ctcacttaatctgacaaaaataatgactactctaagtgttgcggaagcagtctcttccac
+gagcttgtctgtatcacttcgtataggcatgtaactcgatagacacgaacaccgagtgag
+aaactatattcttgcttccgtgtgtgtgacaccaggtaattgatgcggatataagctgga
+gatcactcacgcccacacaaggcgctgctacctctttattccaatgtgtaagaatttgct
+aacttcatttctagaccgcagctttgcggtcataatttcacggtacggacccttgggtta
+gagacttgataacacacttcgcagtttccaccgcgcacatgttttagtggcttctaacat
+agaatttttgttgtgacataaagagtgcgtgggagacttgcccgaccgttaagccataat
+caattgaaagccccgtgagtcacatctaattggttgtactgcgcatttagctatccttta
+gctgactcgaagagattcgattcctaatataggttaattagatggctgccgcgcgaagta
+aaacgtgaaaaacgtagtgcgcagatctgcataactcgcgcttaattacttatgagtagt
+tccaagttcgctacgttatgagagagattggaattaagcaaatatgttttatggtgattt
+tgggatgagaaggactgctaagtacggctactaaacaaatttctaaaaccgccatctacc
+ttatcttggagacatttaagttgtatatgtcactagtctagcttttgtctgtgggacgcg
+ttctcggaatgagggaaatgcaagagccgattcatcaaatgcttatctaagaaagtagtg
+gactattacaccaagcacgaatgccagggaactgctttcttgctcaggacctcgcgacaa
+ggtaccccgcataagtcctagaattacatttggtcagcaatgctgacatttgaccgtgaa
+aacataattttaatcagaaggcagctcacccgcttgctctagatcttatctttgtatgaa
+tgtcagaatttactgcaatatccgttccgaatagtgagggcttagtatagttctctgtat
+acaggtcacatcaaactccccctgtcctagtacagctctgagctttaattaattgcatac
+atttccttcaatcatcagatgaaaacaccgcgaatcatgctcttctcgtatagggcaaga
+gaagcaacaaacaactagcccgactcacgttcatccgccgtatccttgttcagttcttac
+tccgtattaggtcagcgaaatctaatcagaataatcggtcgcgtatcaaaattaaaatcc
+cgcttgaggttgacaattaaaacgctgagcagttatcggctattagatagtggggtgaaa
+gtaattggctggaattatgttaaaacgtgatattaagctaaaatacgctacttgttgccg
+acctaattcagtcattcgatattcagttagagccaagaataacaagcttgtataaattga
+acggggtgcactaaacgatgtgttactctaatattcagcttggagtatacctgaaggcga
+attcatgtatcggccaataataagacgttgaagatcacaatttggactagcaaaagaagg
+tgatttatgcgtggggattgagtccactgtacgagtacggtctctggaaaattataggtt
+cagggaatataaggaagtaaagataattaccaagagatttttggtatcgctatgacccag
+aggtgttctaacgtctgttttgatccgcagaatttctgcctcaatgcatatttgacggac
+ttgaactagagcctctaaagttaaatggcgacgcaactgttcctaaacttcaattattac
+tactctttttttcctagggtattgtagaggccagtggacaaaataaatcaaatttaagat
+gtttcggacattaacatcccccgtagcatagaaatcatcagttatccaatctctcatcga
+gcttttacaatttctgctggcgctatggacagcatatgccgcgagacctccgcaagactc
+acttgatcactgtaagtatcttcattagaggttagagcctatagttaagctgctgaccta
+gtaaaattggtattttctaattttattgctcaagttaaaggttagtgaagggataatgac
+gttatttttgaacaatgggttgtattcaattttatatcacgaatggaacccttcattccc
+ggcataatactagacgacacgaacaagctccgatctatcagccaggcacgtgttaaggtt
+taattccggcaaaccaatgaagcatcaaaaggtgacctgatgcaacttagggtcacgatg
+agtttttcaggactacttattacctattaataagttaacatgagccttcataccccgtaa
+gacaatacatactccaccaattagaattctgagccatcttatctttttgtatcatcgaag
+ggtatggccgaataggttaattagttactcctaacgtctctacaggcatgcatttgacgc
+accttcgaaaatagtcaatctctcgccacacgcgtctagtatgcagcatcaaaaatatag
+tccacggtttccggattaccaaacgcggcaaagagaaacattgtatcgacggagataact
+taatacagaaggaaggggcatcttcgaatacggatgaataattctatctgtttattctga
+catcttgttttcaggttaatcttacgcattcaaatgacgcctgccccatgcgtgcgcaat
+tattttctaatattgacgagagcaatctcactccttttgggtctatttatgttttattga
+ggcacaagcctatacagaacaggtactattaaggccgtgagtgtgagactcaaaccgtgg
+aaacaaaggatgggttgttcttggtacaagttttagtgcatgtgggcaatccttaccaaa
+atcagatgctatccttaactttgggctgcatttaagatggcggttggaggcctgtgagaa
+tcctgcgtgtcatctttaatgaccgaattcatccatgtagattcagatcacacactcatt
+ccttgatgttgtctaaacaaaagttgttgtggacgcattggagggagttaagtaacaact
+tgggatcgcatacttataaaaattatatgttaaactttcacaaacgctgaagtccaaagt
+aactagcccaaacgcctcgagagtcactaggtattaatggtgtttgagttcctgtgaaat
+agtgttcgaaggtaaaatttatgtaccaaatcgaaagaacacttaataaggcttgcttgc
+acggaggtatgatgtttactgactctacaaccctaattttccagtacgtacattcattcc
+aataggttagttctcaaagtgctatacaggctcctcaattgatgatatgcttcagccgct
+ctatggatattagctcattttatttaggaagcccgcttagaggcttactatgagggaaat
+gccaaaatgtcatacttttcggtgtgtcccatatgacaccgctttacatagaatttgaat
+taaaacgcgctctcccgttcactaccatacttggtaccgtgcgcatattacatatagata
+taggatcattttttaaagctgtactaggtttgatcgacaatcttatgctatactatatga
+tgtaaccctcataatcaataccgatcgtacgatcctagcataggtggcaagcgattttat
+gccgattattgtgttaaatagtctgtgagtgtgattatcagggctacgttggtagagggg
+ttgtatagacctcgcacacattgtgacatacttaacaatatacgaaaactgatataataa
+atccccttacccaaacaccaatcccgttgaatcaactaccataacgtctcccatataaat
+tgcctacttgtttgcataaatctgaatacataacaccattgcaccttcttgtgttccaat
+cccgttaagattgccttgtcagatgatatgcaagaacaatagcatttgctagcaattatt
+aacagctcttcgaattgcctccacataacgcgggagggtatattttaatttggcaaatac
+taagtactgttggcgtcatatgctattaacggttggatattaagttatgtcagccgtaag
+caagagtgggcgaaatattttgttacccagtgagagcactcttagagtttggatacaata
+ggccatatgttgacttaagaggacgtaactacgccgtacaccattgttcaaccgacttct
+tggcaaatagaatcgtattagcaatcttaagaatagagacacgttcgtgttagggtatac
+tacaaatccgaaaatcttaagaggatcacctaaactgaaatttatacatatttcaacgtg
+gatagatttaacataattcagccacctccaacctgggagtaattttcagtagatttacta
+gatgattagtggcccaacgcacttgactatataagatctggggatcctaacctgacctat
+gagacaaaattggaaacgttaacagcccttatgtgtacaaagaaaagtaagttgttgctg
+ttcaacagatgatagtcatgacgcgtaacttcactatagtaaattgaaacaaatacgcaa
+tttagacagaatggtacggtcatgaatgacagtaattcgaagtgctagaccaacttaaaa
+taggtaaacgtgcccgaaaccccccttaacagaaagctgctatcatggtgcagtatcgac
+gtgttcagaaacttgtaacttttgagcaggtccgagcacatggaagtatatcacgtgttt
+ctgaaccggcttatccctaagatatatccgtcgcaaactttcgatttagtcccacgtaga
+gcccaagcgttgtgcgactccacgtgcatgcccagaaatacgagtttaaatttggttaca
+tggttaattttgaccgaagcatcgcactttatgattgataattggattcaatatgtcgcc
+ctatgcgaatgcaacatgatccacaatttggctataagacgtttaatccgtatcacactt
+tgtttgcggctagtatagtaacgcccgtgcaccaagagtcagtaacaattataagtactc
+cgcaggtacttcaaatataaaaactaatcaaacacgacccatatgatcatctgaagatat
+ttggaactttctcgacaaccaccctcgtactcaatacttacactaatcgacaggcacacg
+caacgtgtacagtcgcaccatattgagtcaagatttgcttagtggcgatgagcgtacacg
+cttatttctctagtcacaattagttatctacgagacatcacgagggagcaaataagcgat
+gttatggctacacataggcacgtatgaatatgatataagccagttaaacagtcgaaccat
+cgagcaaattctcatgcaccaacccacacgttgaggcacaaagagtaagctgtttgaatg
+taacttcttctgctgagcgggccccaacgtaaggatcaactagaagagaaaactcggtat
+tagtttaaatgcgtcacggagcatgagtgcatttcactaagaatgtctgtgtaaccaata
+taacatctatttgttatctgattgcctacttatggctttgcggtcgtggcgactaatgtc
+tccaatccttttgaggtcggtaccaactccctttaaattacgctgtgcaggctcatgcac
+tgcatacatatacggtagcaggtagggacctcacgcacccttattataatcaatagtagt
+tatcagtcaacgaggcaggaatgctgaggtcgaggtgttggtatattttctatgtgccgt
+ctaggcgactatcacgcattaccaggcgagatttaagccaattttgaatatagtcaacgt
+aatttttactatgggttccaccgaaacgccttgcacaactaagaatcccataaaatatcg
+atatcaaataaaagattgtgtcaataccttcatatatattttttcggttgactaacgtga
+actaaggttaggggttttgtatgtctatataggaaacagtttcttttctgtcctacttta
+gtaaagtcttcaagccttactccaaaatcacggtgattaagccgttactcagcagcatga
+ttctgcctgctcgggtcctaaaatccagccttgtaagagtcgctgtgtattagctaggga
+gacctttgttaaaaaggatatatcgcggcgggatgtgagtgcgtggcgcatactcaatct
+tcagctcgtgtcattataatatctctcccccacgcttttcactagatatgccgtgtaagc
+aaacaccttatgcttaatttcgaaaatattggtacttgaaaaaagctgtaggggtactta
+atgtctggtaggagatcaggagagaattgagtgtaaaaccgtaaagccctcacctgactt
+catgtaaatggcttagaagactccatgatttaataaatactacgaaggaaagactggatc
+taaagataactctagtaaggccaactcccttcaatgctgttgccagttataatccaagag
+ctgtccttttctgaaccatagcggcttctgaagcgaactagaagcaaagttggttctagc
+cagacagccacataccctgtacgggtgtattactaaaactggtccggtattagttcacca
+agggaggaattaggcaaaggatctaggtatgcaagtcggagtattacatccctaccctga
+atccatcaataggttcctctgtactggccttcgcaatgagtattcaaggttgtacagccg
+tataataataagatagtgactatgaacgggaagtaacccgctcaccttccccaaaacatt
+gttatatctaagtattaaagtctgccgtagtgttaatactcgaaaataaacaactggcaa
+attacaccgcacttaagccgcttttgatttatatttttccaatgcgcttttaaaaataat
+tcagtcctacatactaattaagacccttaaacggagatatcacaagttaagttttaacca
+tctcgactaggtggaactatagatacccaactcaatttatcattacctgtaatgttccta
+gaaggattgcatttcatgtcaagacggtggagtttcacagcgaaacttcagtgtgaacag
+attctgagaaatcacctaaacctattagtcagagcacccggttagaaccagttgtcaaaa
+aatagagcggttgcatgagacagaagtaacgatgagatccgttgtaacgttgagacatct
+ggcctatcgtcaatacagtcctcccttaaaaatatttttaaatactaggcaaacccaaca
+taggttagtcctatgtgatacgccacatggtatatcattttgtaacgttacctagggata
+atcaggaagtggaattacgcaaaagtagacagtgaaatgcttagggttatagtctagtcc
+aaagataaaggataaagcacgtcagagaactatattagccgaatgggaatcattgttagg
+agactgtggatcatgtctaaaaagcaacgcagaaacagtcatcgaaaaaatctcgttttt
+gtttgaatctaaaagagctttgatgaccgatagtacctgtatactagttactgtattacg
+tgtctaatgatttcggattggggtccccagaatcagacgtcattgtagacgattcaagtt
+taccaatttaatttcccagctctccttggagaactatcgccaataattgcagtcactttc
+cttttctgaaacgataaagccgtcagagttctctgcaacgttggacttacctgaggttct
+aacccactttcggttctaatagtagttaacgacacaacgaataacctttactgtggggct
+ttcacgatattttttcgcttattattaatggttacgtcataagctggtgtccaaattaag
+gttaccggcttcgcagagtagttgtatccaagtataacttccctaatcataagatcgagg
+tagaaaattaatgctgtctctaaccgaacagatatgtcccactatgtggtatggacgttg
+ctaattacttctgaagggaaattggtcattatggatacgtgtctaccatcaggtcggacg
+cagatatggttctgtcttcagttgatccaccgttctttataggataataactgacgatta
+aagattatggtaaatagattaagccaattctcttcttgtcagtgaagcatccttaactga
+cttgctctgcagcccctcatacatttagctattcaaagtaccggctcgtttcaaactctc
+ccacctttggaagaggttgtcaacttgataagtatatcatttacagcattttttcggacg
+tacctctaatgtttcattgcagaaaattagttttttctatcgcacattttgcaagtaacg
+ttagagacacaattatctgcgaatgaactgctagatctgacgaccgggagcctcgcaaat
+atcaaaaaagactgacatatatcaaggagtcgttgacaagtgctggtaagtcaattggtt
+tatctgtcccggcgtttcgatcttaagctgaccatgcacggcagagtaatgtcactctcg
+ttcttacaagtctgtctccaagggtcggcaaaaaagacccctccattctcgagcccactc
+acgatatgtagggacgacaacttgtgcggcttatgaattgtctggactgcgggcgagggt
+ccatatctccgaagttagaagggacatacctttagatgataagatcaattcttattgacg
+aaattcatccacaacggggaacaacttcaccctagacttacgtctgaaaagacacctagc
+gtcttataaaaggtcagtgccccgtttcgtaaggctggaattacctacgcaaacttaaac
+ctcgcgcccttccttacgtatcgacaagatagaggctatcgcgaatgtactacggaggca
+tgaatcatatactagaaccaagtgcctgtgatattaacaagatgatccgacgcgagcacc
+gtaattctaggcataaaactccagcaatttgggggccgaaaacaaatgacgttagctaat
+taattatatgacatgatcaaaggaggtcaatcacgcatcgagttcgacgtatattcattg
+aacttcgtgcgtttgaaagaaacttttatgaaggcaaaattgatcctgtctcctatttca
+tgcgtacctcctagttgataattccccgagcagtggttaggacacttttgtcggtatcaa
+gttccggtctcaaaacgtaaaattctgtaatctgtatggatggtctgtgaattagttaat
+ttttatgaagtcgtcgagacgcagttcctattgatttattctaaacggagatgtgcttcg
+tgggactcggaagtagatctgtgtttatgattattgctactttagatgctgactgttaac
+tccgtgttgtttttcaaccgtatatcacaaccgaattggatagaacctatagtttcaagt
+tctgccacaaggtatcatatttacagttagtgctggttgcttctttcaaacgtggtgagt
+ttgtgctatcacgtcaacggtagagctcagtggaccgagtgcgcgttcaaccctgttcca
+gagagggtgtgatagcacatataccacgctcgtcgaggcgttcatgatagtttgcaagag
+ccggtgttaaacacatattattattgttatccaactaatcggacctatgcataaagcatt
+gtctaaacagaataattgcctatatacggtagttttagtgatttatatcttagtatcagt
+tagagcttcgaactcttcaggttcctcatatttaacgttcttcgaaagcgaaaacttcta
+caaacgaatgtaagcggttttccaagtagtacctataaatcacagaaagatctgtctcag
+tatagttgaaatggtattcagctagtgacgtgtaccaattatcatagttcactcaagcaa
+gacgctcattaacgaatatagacaagacactatatcatataataaaaaagaacatggtgc
+tcgaacatagttgaattcaccatattgaaggggaatgctgacatgtaattcgctactaga
+cgatcaattccctacttgtcaaagttgaactggtacgttcttggaattaaatatgattgc
+gctggaccaaattgcgacttcttgagtttcagggcaaacgattgagccggaggatgtccg
+tctcttacctttcttgcttatgataaacgacggtccctgtacatcactgggaattctcag
+caaaaataattgggtaaatcgagactcgatgtattcggccacaaaggtgttagacgttaa
+agattattcaacggggcgataataggatcataaccggtatgcaagcgcattgaaagagcc
+atgagatccttatccgataaacgctgcacggtatgtgcagccttattgtcgatcacgaat
+ttataaatgtagtctgggctgtaagttgaagacctaagttataatgaagtgcaataccaa
+atcgattcatagtggattatcagactcaagatatctcctgataaattacagttgttaaga
+tacggataaaatgagatttaagattagcagcctctaatctgtttcaatcccgttggaatg
+tggtatgcgatcaaggttaagttaaaatcaagcctgtcttcagtcttgattcttgttctg
+ccatcgcatgcggtctacgtgagttaatatgtagcttacgttctagcttgtgctaatctg
+agtatagattcgtagaggaatattatcaagcttccacgcctcaacgtacgtgtattggtc
+acacaagacactaaaagtggaagtagcgtaaactatagtctagttgttaaatgctcagtt
+cttgttatattcgatatactcttggctaatttatgtctgagtatataaaattaatgatat
+taacttgcatttcacggatcccttagaaaaagattttgaccgagcgcattataaacggtt
+acaccgaatcaatagaagcatacccaatagctttctttgaatttattgcctgcgcaactt
+ggctgactctctagatccgaataattctatatggtcgtgacgaaactagttcattactgt
+ttaaaatgccaacatgtcttttgggccgataatggctctttgcaaaattactcaatgata
+cgattgatcaaagcggtagttgctagtggtagcatgtaagtctatcaaatgtctgattat
+ccgaaaatcttccaaaagagtccacgtaccatatctatctcatagcgacgcgaggggaac
+cttatctaactatcattccatttaccgggtgactctcgatgcaggatccgattgggataa
+attgcccagaaatggctcattcctgactaagggtaaggccgttctcagcaagggaacccc
+gcgaatctaggcttataccatctagattgttaactacttgcctgtagttctacagccata
+ctggacagttgtttctaaatgatcgggattcatgctagcactcctctgaatgcaccgcgt
+aagtttaactattacgtccgtgggcagataaggatggaggctgtatgtatcttaactgtt
+acctaatatggctggtaattatcaaagtaaggaccttaatgccatagcgctagcaatcgc
+tttgtatactgaccatgtgccaacctctcttaatctgtaaaatataatgtcttagctaac
+tgtggacgatcatgtctctgcctagagcttcgctgtatcaattcctatagccagcgtact
+agtgacacaacaacaccgtgtgagaaaagatattagtccttacgtctgtctctctacagc
+ttattgatgaggattgaacatggacatatagctccccctcaaaagcagatgctacctctt
+tattccattctcgaacatttgccgaacttaatttcgacaaacctgaggtcacgtcttaat
+ttatcggtaacgtcacgtccctttgagactggataaatatattaccaggggccaacgagc
+aattgttggaggcgcttctataatacaaggtgtcttgtcaaagaaagacggcgtgcgtct
+cgtgcaactcacttaaccaatattaatgtgaaacccccctctctcacatcttatgcggtg
+tactgccctggtacatttcctgtacaggactccaacagtgtagattcctaagatagctgt
+tggagttgcctcacgccagatcgaaaaactgaataaactagtgagctgagctgcagaaat
+accgcttaattacttatgactagttcaaagggacctacgtgatgtcagacattgcaagga
+agaaattaggtttgtgcgtcattttggctggactagcactccttacttcccctactattc
+aaatgtcgtaaacagcatgagacaggatcgtgctgacatttaaggtctattgggaacgag
+gctacctttggtcgcgcgctcgcgttctccgaatgaccgaaatgcatgagcacagtatgc
+aattgcttatagatctaaggtctggtcgttgaaaccaagcacgtaggcctgggaaatcag
+ttcttcctcagcaactacacaaaagcgtccaagcattagtacttgtagtaaatgtccgaa
+cctatgcgctcatttgaaagtcaaaaaatatttttaagcagtaggcacctaacccgattc
+ctctacttagtagctttctttgattctcagaattgactgcaatatcactgcacaattctg
+tgccattactagacttctctgtattaacgtctcatcttactaacactcgcctaggacaca
+tctgagagtgaagtatttcaatacatttactgaaatcttcagttctaaaatccccgaata
+aggctcttatcggtttggccaacacaagaaaaaaacttcttgcaccactcaccttcatac
+gcaggagcctggggaacttagtaataactatttcggcagacaaagcttataacaagttgc
+cggcgcgtataatatttaaaagaccccttgagctgctcaattaaaacgctcacctggtat
+aggctattagatagtgccgtcttagtaaggggcgggaattatcggataaactgatatttt
+gataaaataaccgacttgttcacgacataagtcactaaggagattttatctttctccaaa
+gtatatcttccttggataatttcaaagcgctgcaatttaagttctgttactagtttatgc
+tgctgggaggtgaccggaaggcgtagtaatctagaggcaaattataagaagttcatcata
+tcattttcgactacaaaaacaaggtgttgtatgccggcgcattgtgtaaactggacgagt
+accctagatggaaaattatacgttaagccaagatttcgatgtaatgataattacctacac
+atttttgctatccataggaacaagagctgttctataggctcgtggcatacgaacatttgc
+tgccgctatgaatattggaagctcttcaactacagactctattcttaattgccgtcgaaa
+atgggccgaatcggctattattaatactcggtttttccgaggggattgttgtcgacagtc
+gtaattattattaatattgatgttggtgaggtcatttaaatacaaccttgcagacaatga
+ataagggatccaatctctcatactccttttacaattgctcatgcccctatgcaaacctta
+tgccgccacacctccgcaactctctcttctgaactgtaagtagcttcattactggtttga
+gactatactgaagctgatgacattctaaaatggctattttcgaatgtgattcataatgtt
+tatcgtttgggatggcagaatcacgttatttttgatatagcccgggtattctattgtata
+gaacgtatgctacaagtcattccccgaagaagactagaagtaaacaacatgcgaccatcg
+ttaagccacgcaaggctgtagctttatttcccgataacctatcttccataaatagcggac
+agcaggatactgacgctcaacatcagtggttatggtctaatttttaacttttaataaggt
+aacttcagcaggcatacacagtaactctttaatttataatcaaattagaagtctgacact
+tcttatatttttctatcatccaacgcgatcgcccattagcttattgtgttactaataacg
+tatctaaaccaatccttttcaagctactgcctatattgtcaatatatacaaacaacagga
+tagtaggctgcttaaaaaatattgtcaaccgtgtacgctttacaatacccggaaatcaca
+aactttgtagacaacgagtgaaatttatacactacgaagggccagcgtacaagacccatg
+aattaggcgatatgtttattctgacatattggtttatccttaatctgtcgctgtaaaatg
+aagccgcccccatccctgcgaattttttttcgaagattcacgactgaaatataaatacgt
+ttggctatatttatgttggagggaggcaatagcctttactgttaaccgaagatttagcca
+gtgagtgtgacactaaaacactggaataaatgcaggcgttcttctgggtaaaaggtttag
+tcaatctcgcctataagttcatatagctctggatataattatctggcccatgcatttatc
+atggcgcttggtgccctgtgtgaagccggcctctcatattgaaggtccgaagtattccat
+gtacattaagatcactctctcattcatgcatcttggcttaacaaatctggttgtccaagc
+tttccaggcacgtatggtacaaattcggatcgaatacttataaaaatgatatgttaaact
+gtctaaaacgctcatctacaaagtaaagtgcactaaccaatagagtctcaagaccgtgta
+atgctggtgcactgaatgtgtaatacggttagaagggattagttatgttacaaatccatt
+gaaaacttaagaagcattgcgtgctcggagggtgcatcttttatcaagagactaacatta
+ttttcaacgacgtacatgctttacaatagggtacttatcaaacgccgagaaacgcgccta
+tagtgatgttatgattatgacccgatatccattggaccgaattttatgtaggttcccagc
+gtactcgcgtaatatctcggtattgccataatgtaatacttgtcggtctctcccagatga
+aaaagcgttacagagtatttcaatgaaaaacagcgcgcaacgtcaatacctttaggggta
+acggccgctgatttcatatagatatacgataagttggtatagctctactaggtggcatcc
+acaatcgttgcatttactatagctggttacaatcataatctataccgttccttacatact
+accatagcgggatagcgtttttttgccgttgattgggtttaagaggatgtcagtctcatt
+atatccgattcggtgggagagccgttgttttcaaatcgcacactttgtgacataatgtac
+aagataacaaaactgatataagatataaactgtcaatatcaccttgacacttgaatcaaa
+gtaaattaactcgcaaatataatttgactaattgggtgcagatttctcaattaataaaaa
+aatggcaccggatgggcttacaagccccttatcattcacttgtatcatgatttccaagaa
+caatagaatttgctagcaagtatgaacagagattcgaattgcatccacagtacgccggag
+cgtttattttaatgtggatatgacgatgtactgttggcggcatttgctagtaaccggtcc
+ttatttacgtagcgcacacgtaagcatgtctgggagaaatatggtggtacaatctcagag
+aaagattacagtttggtttaaataggacttatcgggtcggaagtggaacttaataagcag
+tacacaattgggcaacagacgtcttgcctattacaataggattacaatgcgttagatttc
+agacacgttcgtgtttggctattcgtcaattccctaaatagttagacgatcaactattat
+caaagtgattctttgttcatcctccattcatgtaacagatggcacactacgcataacgcc
+gaggaattttaacgagatttaagagagcagttcgggcacaacccacttgactttataaca
+gctcggcagcataaacggtaatatgtgacaaatttccaaacgttataagaacgtatgtgt
+acttagaaaactaagtggttcatgttcaacagatgtgacgcagcaagcctaacttatcta
+ttggttttgctataaaagaacaaagttacacagaatcctaagggcttgtttcacacttat
+gcctagtgcttcaccatcttaaaatagcgaaaccggcacgaatcaaaccttaaaacaatg
+cgcagatattggtgatggtgactccgggtatgataatggtaactgttgaccagcgcccac
+ctcatcgaagtatagaaagtggttaggataaggatgagaccgaacttatttccggccata
+actttagattttctacctagtacacaacatcagggcggacacgaaaccgccatcacatca
+tataccaggtttaatttgcttaatgggggaagtgtcaacgaaccttcgaactttagcagg
+catatggccattatatatggccccagagcagaatgctacagcagacaaaatttggattta
+tgtagtttaatacctatcaaacttggtgtgaccatacttgtctaacgacagtgcacaaag
+tgtaagttacaattattactactcagcagcttctgcaatgataaaatcttatcatacacg
+tcacatatgataatatctacttagggggaacgggctccacaacctacatagtactcaata
+cttacactattcgacaggcacaccaaacctgtacagtcccaaaagattgagtcaactttg
+cagtactgcagatcacagtaatagcttagttagcgagtcaaaattagttttctacgagac
+tgcacgaccgtgcaaatttccgatgtgttggctacaaatagcaacgtatgaatttgtttg
+aagccacgtaaactgtacaaccttagagataagtctcaggctactaaaaacacgttgtgg
+cactaacaggatcatggttgattcttacttattcggctgaccggcccaataagtaacctt
+caactagaacagaataatcgggagtagtttaattcagtcaaggtgcaggtctcattgtaa
+ctaacaagctctgtgtaaccaagttaaaatcgttttcttagcggattccctacttatgga
+tttgagctcgtccacaatattcgatacaagaagtttgtggtccgtaacaacgaaatttta
+attacgctgtgcagcctcatccaaggaattaatagaaggttgatggtaggctccgaacgc
+tccatgattataatcaagtggactgtgcagtaaacgaggaaggtatcctgacgtcgtggt
+gttcgtttttgttatttgtgccctatacgagtagataaaccatgaacagcacagtgtgaa
+cccatggttgattttaggctaccttatttttaatttccgttacacagaaacgaattccac
+aactaacatgccattaatttttcgatatcttataaaagatggtcgaaattcattcattta
+ttttttttcggttctcgaaagtcaactaagctgtcgcgttttgtttctctttagaggtaa
+aagtggctttgatctcctacgtttggatactagtcaaccattactccatttgatccgtga
+gtatcacctgtctaacatccagcattatgactcctcggcgaagaaaagacacacttctta
+gagtcgatgtgtattagctagggacacagttgtttaatacgatagtgagcccagggaggg
+cagtgcgtcccccagtagatttattcagctagtgtaagtataagatatctcacccacgag
+gttcaagtgatatgcagtcttagaataatacttatcctgaatttcgatattatgggtact
+tcaataatccgctagcgctactttatgtctcgttggacagcaggacacatggcagtctta
+aacactaaagacatcacctgaatgaatgtaatgggattacaagaatcaatgaggtattat
+atacgacgtaggaaactctggatatatacagtaatctagttacgccatcgcacttcattc
+ctctggaaacttagaagacatcagctgtacgtggaggaaccagacccccgtatgtagcca
+aatagaaccaaagttgcttatacaaacacacccaatgacaatggaccgctggagttcgta
+aactcggaacgtagtactgcacaaacccagcatttagcaataggagctacgtatgcaact
+cccacgtggtaataccttcaagctatcaatatataggtgcctagctaatcgcattcgcaa
+gcagtattcaagcttgtaaaccagtataataattacagaggctctatgaaacccaacttt
+ccagctaaaagtcccaattaaatggttatttcgtacttttaaagtcgcccgttctgttat
+tacgcgaattgattctactccaaaattaaacacaaattatcaaccgtttcatttatattt
+gtcaatgcagctgtttaaaataaggctctactaaattataattaagacacttattaccag
+atttctctagttaagtttgaaccagctcgactaccgcgaaagatacattcccttctctat
+ttttcagttcatctatgggtcagagaagcattgaatttattctattcaccctcgtcgttc
+acagcgaatcgtcagtgtgatcagtgtatgagaaatatcctaaaccgtttagtcagacca
+cacgcttagaacaagtggtctaaaaagactgccctggaaggagtaagaagtatacagctg
+atccggtgtatccttcagtcatctgccctatactaattacacgacgcaaggaaaaatagg
+tttattttctaggcaaacccttcataggtgactccgatgtgttacgaatcatgcttgaga
+atgtgctatcgttaccgacggataataacgatctccaatgaaccaaatgtagaatgtcta
+ttgattacccttttactattcgacttagagataggagatagaacctcagtgtactttttt
+agccgaatgggaatctttgggaggtgaatggccataaggtcgtaaatccaaccctcttaa
+agtcttccatattatatcgttgttcgtggaatcgataacagatttgttgacccatagtaa
+atgtatactagtttatgttgtaagtgtagattgttttccgattgccgtccaaactttatg
+tcgtaattgtagaccagtaaagttgaccaaggtaagtgcccagcgatcctgcgagatcga
+tcgccaatttttccagtcactgtaagtgtaggtttagataaagccgtatgagttatatca
+taagggcctcggaaagcagcttcgaaccaaagttcccttataatagtagtttaactataa
+aagtatatactggtctgtcgccctttcacgatttgttttaccggtttatgaagcgttacg
+tcattagagcggctccaatttaaggttaacggcttccatgtgtagttgtatacaaggata
+acttaaagtatctgttcagcgagctagttaagttatcctcgatagaacacaactcagagg
+tcccaagatcgggtttgcaacttgctaatttattctcaaggcaaattgggaattatcgat
+acctgtataccataaggtcgctcgatgtgatgcttatgtcttctggtgatcctaccttag
+ttagtgctgattaacggaacattaatgtttatcgttttgagatttagccaattctctgat
+tctaactcaagatgccttatctgacgtgctatgcagcccctaagtattttacattgtaat
+aggacacgctcctttaaaactcgccaaaaggtcgttgtggttctctactggttaactata
+taatttacagctttgttgagctagttcctctttggtttaagtcctcaatattagttggtt
+cgagcgataagttggctagttaccttagtcactatattagatccgaatgttatgcttcat
+ctgaagaccgccaccctccaaaatttcttttaagactcacttattgcaaggtgtaggtga
+attcggctcgtttctcaagtggtgtatctgtacacgagtttccatattttcatcaacagc
+caccgcacacttatgtcactctaggtattaaaagtcgctctacaaggggacgcaattaag
+aaacagacatgctagtcaaaaataaacatagcgaggcaccactaattcggccgcttatca
+atgggatgctctgcgcgagacgcgccagagctcagtagttagttcggacatacatttact
+tcagatgatcaattagttttctacaaatgcttactctaccccgaaaaaagtcaccagact
+cttacgtctctttagtatccttccgtcttatataaggtcagtcccccgtttcggtaccct
+ggaatttactaagaataatgaaacagcccccaaggacgtacgtttacaaatgatagacca
+gatcgcctagcttattccgacgcatgttgcatagaattgaaccaacggaatgtgagagta
+actagatgagccgaccacagcacccgtttgcgtcgcagaatacgcctgatagttcggcca
+cgaaatcatatgtcctttgagtattaagtatttgtaatgatcaatcgagctcaagcaagc
+ttacacttcctcggatattcagggaacttagtgcctttgaaagatacgttgatcaacgaa
+aaattgataatggctcatatggaatgcctacctcatagtgctgaattaacacagcactgc
+ggacctaacttttcgaggtttcaagttcacgtctcaaaacctaataggctggaatatgta
+gggatcctcggtgaatttgtgattgggtttgttgtagtactgaccaagtgaatattcttt
+ttttctaaaagcagatctgctgccgggcactacgaaggagatctctgtgtatcattattg
+cttcttgacatgatgactcttaaatcactgtgggtgtgcaaaacgatagcacaacccaat
+tcgatagtacatattgttgatacttcgcactaaaccgttcatatttaaaggttgtgctcc
+ttccttcgttaaatactggtgacttggtcctatctactattagctagacctctggggaac
+cacgcccccgtaaaacctgtgcaagagagggggtcatacatcttagacatcgcgcctcca
+ccagggaagcattgggtgattgaccaggtgtgtaacaaatatgattattcttatactaat
+attagcaaagatgcataatgatttgtattaaatgtataattgaattgataagggtctttt
+agtcagtgatagagtagtataaggtagacattagaactcttaaccggacgcagatttttc
+ggtcttagtaagccaattagtcgacaaaacaaggtaagagcggttactagtagtacctat
+aatgcactgaatcttcggtcgaagtatagttctaatgctatgcagattgtgacggcgaca
+aatgttcagacttatatcatgaaacaagctcttgtaagtattgacaaatgaaaagattga
+atatttttaaatacaaaatgcgcctacttattaggggaattaaccagattgaaggccaat
+cctcacatgtaatgagataatagacgataaatgaaattcttgtaatagttgaactgctac
+gtgatgggtattatatatgattgagatcctccaattgccgacgtcttgtcttgatgccca
+aaagattgtcaacgaggagctccctcgcgtacctgtcgtccgtatcataaacgacgcgac
+atgtacagcactccgaagtataagcaataataatgcgggtaatccagactagatcttttc
+ggactcaatgcggtttcacggtaaacatgattaataccggagagtagtcgagcttatcag
+cgatgcaagcgaattcattgtgccaggagatacgttgcagataaaaccggcaacgtatgt
+caacaagttttggcgatctcgttgtttgtattcgacgaggcgcgggaacttcaagaacta
+tcgtatattcaagtccattaccttttagtttcagactggtggagctgactaaagttatat
+catcattttgtacactggtttagttaacgataatttcagatttaacatgaccagacgata
+atcgctgtatatccagttggaatgtggtttgccagaaaggttaacttataatcaagcctc
+tcttcagtcttgattcgtcgtatcccatccattgcgctatacctcagtgtatttggagct
+gtagttataccgtgtgctaagatcagtagacatgacgagagcaatattatctaccttaca
+agcatcaacggacgtctagtcggaacaaaagactctaaaactcgaacttcaggttaatat
+actatagttctgtattcagcagttattcttatattcgatattatcttgcctattggatgt
+ctgactttagtatattaatcatagtatctgccatgtaaaggtgccagtactaaatctgtt
+tcacagtgcgaattataaacggttacaaccattaaagacaacaagaccctatagctttat
+ttgaattttgtcaatgcgcaacttggagctcgcgatacatcccaattagtctatagggtc
+gggacgattctacggcatttctggttataatgacaacatggattgtggcccgagaatcgc
+tctttcattaattaagcaatcattacagtcttataagcgctacttccgagtggtagcagg
+taactcgatataaggtcgcatgagccgaatagcttaaaaaacaggccaccgaacattgat
+agagaataccgaccacagcgcaacctttgattactttcattaaattgtacggctcactcg
+acatcaagcttaagattgcgataatgtgaactcaaatggatcagtactgaagaaccgtaa
+cccacttcgcagaaagcgtacccagagaagatacgctgttacaatatacagggtgaaatt
+attgcctgttcttcgtaaccatttcgccaaacttggttagaaatgatagccattcatgat
+agaaataagctgaatgataccagtatctttaactatgtagtcagggggaagataacgatg
+gtccatgtatgtttctgatatgtgacagtattggccgcgtaatttgctaacgaagctact
+taatgcctttgagcttcatatagatttctttaatcaaaatcggcaaaaagatagtatgag
+ctataatatatgctagtagagaactctggaccatcatctatatgaatactgattcgagcg
+tgcaattactttagcctgcgtactactgactctacaaaacactctgagataagtttgtag
+tcagtaagtcgctctctataaaccttttggatgaccattgtacagccacttatagatccc
+aataaatagcacaggagacagagtttttcaatgctcgatcatttgccgatagtattttcg
+tctaacctcagggcacctattatttgatacctaacctaacggccctttcacaatggagaa
+atatatgacatcgggacaaacacaaatggtgggtggccaggagatatgacatggtggcgt
+ctctaagaaacacggactccctctaggcaaactcacgtaaccaattttaatgtcaaacaa
+aacgctcgaaaagattttgccgtgtaatgacctggtacattgactggtcaggaatacatc
+actgtagttgccgtagtgtcctgttggtgttccatcaagacacatcgtataacgcaattt
+acgacggacatcagatcaagttatacagattatttaagtatcacgtgtgcattgggacat
+aagggatctcacacatgccttggaacatttttgctttgtgccgctttttcgctgcactac
+caatccttacttaccagtatattcaaaggtcgttaacagaatgagaaaggttagggctct
+aagttatcgtcgattgggatagacgagacatttgcgagcgccctccacggatacgaatct
+cccatatcaatgtgaactggatgctatgcagtttagttcttacgtctcctagtggtaaaa
+atcaaagtagcactcgcatagcagttattcagaacctaatacacaaaaccgtcaaacatt
+ttctaattctaggtatgggccgatcataggagctaaggtgaaactcataaatgttttgtt
+agatctagcatcctaaaaagatgcatatactgagtagctggcgtgcattctctcaattgt
+atcctttttaactgaactagtcggtcccatttcgtgactgagatctattaaccgataaga
+ttaataacactcgcattcgtatcagctcagagtgaagtttttcaataatttgactgatat
+attaacttctaaaataaccctttaagcctcggatccgtttcccaatcacatcaaaaattc
+ttattccaactatctacggattaacaacgtgcatggggatcgtagtaagaacttgttccg
+atcactttgagtatatcaagttgacggcccggttattattgaatagaaacattcacctgc
+taaattaaataccgcacatcggatacccgatttcagagggccgtcttactaagggcaggc
+tttgttcggtttaactgagatgttcattattttacagtatgcttcaactaatatgtaacg
+aaggacagtggatctgtctccatagtagatcttcagtcgtgaatttcataccgctcctat
+ttaagttcgcgttcgagttgttgatcatggcacgtgaaagcaacccctagtattctagac
+gaaaattttttctagttcatctgataatttgccaattcaaaaacaaccgctggtttcccg
+gcgcattctctaaaatggaagtcgaacctagagccattatttgtcggtaacccatgagtt
+ccttcttttcagaagttaatacactgtggtcctatacagaggaaaaacagcggttatata
+cgatcgtggcataacaacattggatcaagatagcaatttggctacctattctaattctca
+ctagattcggtattccactacaatatcggcagattaggattggatgaataatcggtgttt
+aagtccggttgcgtctccaatctcctaatttttattaatattgatcttggtgacctattg
+taaataaaaacttcaagactttgaataacggtgaaaagatagaagactcatttgaaaatg
+gatcatccacagatccaaacattagcaagacactaatccccaactagctattctgatcgc
+gatcgtgctgcagtactcctgtcacaatagtctgttcatgatctaattctttttgggctt
+tgttcgatggtgattcagaatctttatccggtcgcttccctgtagctactttgtggggat
+attgcccggggattatagggttgagatcgtttcctaaaagtatttaaaccaagtagactt
+caactaaactacatcagaacatcgtgaagacaccatacgcggtacctttatttaccgata
+acatttcttcaagaaataccggtaagcagcataatgaccctaaacagctcggggtatcgt
+cgtagttttaaattttatttaggttactgctcaaggaataaaaactaactatttaattta
+taataatattacaaggctcacactgattagatttgtctataagacttcgcgatcccccat
+taccggattgtcttaagaataaactagataaaccatgcattttctagataaggcctttag
+tctaattagatacaaaaaacacgatagttgcatccttaatttattgtgtcaaacctggaa
+ccttttaattacccgcaaatcactttatgtcgagactacctctgaaatttattatctacc
+taccgcatgaggacttgaaccatcttgtaggagttatgtttattagctaagattcgttta
+tcctgtagcggtccatgtatattcaacaagcaaaaagcactcagaattgtttttagttga
+gtcaagactgatatataaataagtttccctagttttttcgtggtgggacgatattgaatt
+gaatcttaaccgaagagtttcccactctgtcgcacaataatacacgccaatatttccagc
+cctgcttatgccttaatcggttactcaatctcccattgaagttcattttgatctgcatag
+aagtttcgggcccagccttttttctgccaccttcctccaagctctgtagacgcactctaa
+gattgatgctcacatgtattaattctacattaacataaatatataagtcatgcatcttcg
+agtaaaatatctggttctccaacatgtcctggcacgtatcgttataatgcccatacatgt
+agtattaaaatgattgggttaactggatattaagatcatcgaaattgtaaagtcaaatta
+acaatactgtctcaagaccgtgtattcctcgtgctcggaagggctattacgcttacttcc
+gttttggtatcttaatatgactttcaaaaattaagttgcagtgagtcctacctgcgtgca
+tcggttagcaagagtataaaagttgtttaaacgaactacttgctttacaataccggtcgt
+atatatcgccgtgaatccagaagattgtcttctttggattatcaaccgagatcctgtgga
+ccgatgttttgggaccttcacagaggactccaggtagagctcgcttttgcattaatctaa
+gaattgtacctctctaaaagatctaaaacagtgaatgtgtatttcatggaaaaacacaga
+gaaacgtaaattactttaggccgaaaggcacatgagttattatacatatacgagatggtg
+gtatacatcgaattcggggcatacactatagttgcattgtatttagctgctttaaataat
+atgatattaccttccttacataagacattaccggcataccctggttttcaacttgtgggg
+ctttttgacgatcgcactctcatttgatccgagtagggcggtgacccctgcttttcaaat
+acaaaaatttcgctatgaaggtaatagattacttttcgctgttatgatagaaacggtaaa
+tttaaaattgaaacttctagaaaagtaaagtaacgagaaatgattttgtgaataatgcgg
+tcatgattgcgcaagtaagaaaaaaaggcaaaaggatgcgcggaatagaaacttatcagt
+cacgggtatcttgatttcattcttcttgtcaattgccgacataggatgaaatcagattcc
+aatgcaatacacagtaacccccacccttgattgtaatgtcgatttgaagttgtacgcgtc
+gacgaagtggatagtatacgggccttttgtacggtgcgatcaactatgaatctcggcgag
+ttagatggtcgtacaatctcacacatagaggtcacttgcctgtaatgacgaattttcggc
+taggtactcgaactttattagaagtaaaaatgtgggcaaaagaaggattccattttacaa
+gacgattacaatgagttacatgtctctcaacgtagtctttccctagtagtctttgaacta
+tttaggtactccagaaaattttagcaaagggtttctgtgtgaatccgccattcatgttta
+tgatggaacaataagaataacgccctcgtatgttatcgacagtgaagtcagcagttcggc
+caaaaacatattcaatttagtacagatccccagaagttaagctaagtgctctaaaatggc
+ctaaacggttatcaaagtaggtctaattactatactaacgggtgcatcgtaataactgct
+gtcgatgcaacactatatgatagtgtcgttttgctatatatgtacaatgtgacaaagaag
+ccttagcgattcttgcaaacttaggacttcggattctcaatcttaaatgtccgaaaacgc
+aaagattcaaaaatttaatctatgagcagatatgcctgatggtgactacgcgtatgttaa
+ggctaaatgttgacaaccgcacacataatcgaactattgatagtcgggagcataaccagg
+tgaacgtactttgttcacgacatttattgacatgttctaaatacgtctcaaaatcacggc
+gcactagaaaacgcaatcaaatcattgtcctggtttaagggccgtaatgccggtagtgtc
+aaacttcatgagaactttagctggcttttggccagtatttagggaccaagagcactagcc
+ttaagctgaatattttgccatttatctactgttataactttaaaacttggtggcaccaga
+cttgtcgatacacacgcatcaatctgtaacgtaaaaggtttactaagaacaagcgtagga
+attgagtttatattatatttaaactaaaagatgatattagcttctgagggcgatagggct
+ccaaatcataaagaggaatatattattacacgattagaaacccacaacatacctcgaatc
+gcccaaaagtttgacgaaacttggcagtactccacatctcagtaatacagttgggagagt
+ctcaaatgttgttttattactcaatgaaccaccctcataatttcactgctgttccattaa
+atttgcaaacgatcatttgctttgaagaaacgtaaaatcgacaaaattacagataagtag
+atgcataataaaaaaaactgctcgctataacacgatcatcgtgcattcttacttaggagc
+atcacccgcacaataacgtaccttaaactacaacactattagaccgagtactgtaattca
+cgaaagctcaagctcgcattgtaaagaacttgctctctcgtaaaatgtgataatagtttg
+cggagaggattcaattattttccattgcacctactccactagattcgataaaagaaggtg
+gtcctcccttaaaaagaaatgttaagtaacatcggaaccataagcaaagcatgtaagtga
+accgtcatccttccctaagaaacataaaggtttttaataatgtcgactgtgaactataac
+tgcatcctttcctgacctactccggttccttgttgttatttctgaacgagaccagtagat
+aaacaatgtaaaccacagtgggtaccaatggtgcatgtgacgctaccgttgttttaagtg
+cccgtacaaacataagaagtcataatcttacttgaaattaattttgccttttattttttt
+tcaggctcgaaattaatgatttgttttttttgaccttctagttacgctaatatgcggtcg
+cctgtggtttctattgagtcctataacgggatgggatctaatacgtttggttactagtaa
+acaaggtataaatttgataccggagtatcaactgtataacatcaagctttatgactcata
+cgcgaagtaatgacacaaggctttcaggagatcgcgagtacagagccactaaggggtgta
+ttacgatagtgacaccaccgagcgcactcactccccaagtagatttatgatcctacgcta
+agtattagatatataaccaaagaggttctagtcagtgcaactcttagaataataattagc
+cggttttgcctttttaggcctaatgcaatattcagctagcccttatgtatctcgcgttcc
+acagcaccactcatggcacgcgtttaaactaatcaaatataatctatgaatgttatgcca
+gtacttgaataaatcaggttttttataagtccttgcatactctcgttatatactgttaga
+gtcttaccccatagaaattctttcatctgcaaacttagaagaattctcagctacggggag
+cataaagtccccaggatgttgacaaatacaacaaatgtggcttatacaaacactccatat
+gaaaatcgaaccctcgtggtagttttagccgaaccttgtacggataaatccctccatttt
+ccaatagcagatacctatcctactacctcgtggtattaaattaaagcttgaaatatagag
+ctgcatagcttatccaattcccaagcacgagtctaccgtcgtaaccacgatttgatttac
+agacgctagagcaaacccatctttaaacatataagtaaaaattaaagggtgagtgcgtac
+gtgtttactagcaacttcgcttattaagacaattgtttataagccataattaaaaacata
+tgttcaacaggttcattgatatttgtaattgcacaggtttttaataaggatctacgtaag
+tataatgaacaaactttttaccagagttatattctgtactttgaaaatgctcctctaccg
+ccttagagactttcaattagattttttgcagttaatctatgcgtaagtgaaccatgcaag
+ggatgcgattcaaccgcctcgtgctaaccctatcgtctgtctcataactgtaggtctaat
+ataattttcagttttcgaacacataaccctttgaaaatctgctatttaatgtctcacctg
+catgcactatcttctatactgctcagaacggctatacgtcactatgctccaagtgacgat
+ttaaacgaagcaaggaataataggtttattttagtgcaaaacaattaagtgcggactacg
+tgctctttacaataagccttgtgattgggctataggttaagtcccatattaacgatctcc
+aatgtacaaaatcgacaatcgctttgcattacccggttactagtcgaattacagatagct
+gttagatactcactctaattttggacaacaatcccaatcttggggtcgtctatcgcctga
+agctcgtaaatccttccatcttaaacgattacatattatagacttgttcggggtagagat
+atcacagttgtgcaaacattgtaaatcgatactagtttatgttggtagtctagttgcttt
+taccattccccgaaaaacttgatctactatttcgacaacagtaaacttgaactaggtaag
+tgaaaacagagaatgcctcatagtgccactatttgtccactatatgtaagtgtagcttta
+cataatccactatgactgagatcattacggcctaggaaagcagcgtagaaaaaaagggcc
+cggatattacgactgtaactataaaactagttactggtagcgcgccatgtatagatttgt
+tttaccggttgtggttgcgttaacgaatttcagccgcgaaaattgatccgttaaccagtc
+catctcgacttctataaaacgataaagtaaagttgatgttcagcctccttcttatggttg
+catcgagagtacactactcagtgggaaatagatcggggttcctacttcagattgtattat
+ctaggcaattgccgattgtgccatacctggataaaataagctacctacatgtgatgctta
+tctattatcgtcatactaccttagggtgtcctgttgaacgctacattaatctttagccgt
+ttgagatgttccaatggataggagtctaacgcatgatgaagtttaggaaggcagagcatc
+ccactaagtatgtgacagtgtatttcgaaacgagacgttataaatagaaaaaaggtcctt
+ctggttctattctgctgaactattgaatggaaagattggttgacctacgtactatttgct
+tgaagtcatcaatttgacggggtgagagacatatggtgcatactttacggactctatatt
+ttagatcagaagcttagcagtcttctctacaccccctcacgacataattgcttttaagaa
+tctatgtttgattcctctacgggaattcggatccgttcgcatgtgcggtttatctaaacc
+aggggacatatgttcagctaaagcatacgaacactttgctaactagacgtatgtatagta
+gctataaatcccgacgatatttacaaaaagaaatgagactcaaatatatacatagcgacc
+ctacacttattcgcaccctgatctaggcgatcctagcacccacacccgaaagtgagcact
+agtgtcttccgtattaaatttactgcagttgagattttagttgtctactaaggattactc
+taacccgtaataaggatcaagactcggtactagctttactatcattccctatgtgttttc
+ctaactcacaagggtacgtaccagcctatgtaattacaataatgataaagacacaaagga
+agtaactttacaaatgagtctccagttacactagcttagtccctcccatcttgctttgaa
+gtctaaatacgcaatctctgaggatatacagcagaagaacactcataacgttggagtcca
+agaattagactcatagggcccccaacatttaatatgtactgtgagtttgaaggtgttcta
+ttgttaattcctgctcttgatacatgacacgtactccgtgtttaaggcttcggactgact
+ttctttcataagttgagcaacgaaaatttcagaatcgataagttggattcactaactaat
+acggctgattgaaaactccactccggacctatatggtcgacctttatacgtaaccgatat
+aaaacttataggctggtatatcgagccttcctagcgcaatttcggatggggtttcttcta
+ctactcaacaacggaatagtctttgtttagtaaaccagagctcaggacgcccaatacgta
+ggagagcgctgtggagcatgtgtcattatggactggagcactcttaaatcactctgcgtg
+tgctaaacgatagatcataacatgtcctgagtaaattttcttgatacgtcgcaatatacc
+gttattagttaaacgttctcatccgtcatgcgtgaaatacggctgtcgtgctcagatata
+ctattagcgactcatctcgcctaacacgcacacgtataaactcggaatgactgccgctct
+tacatattagaaatacagactacaccacggaagcattgggtcattctcaaccgctgtata
+aaagatgattagtcttataataagattaccaaagaggcagaatcatgggtagtaaatcta
+ttattcaagtgattaccgtcgtgtaggcagggagtgaggacgagatggtactcaggacaa
+atattaaccggacgaagtggtttacgtcgtactttcactattagtagtaaatacaaggta
+acaccggggaatagtactaaatataatgatatctatcttcgggagaacgagtcgtctatt
+gctttgaacattctcaaggcgtaaaatgtgctgacttatagcatgatacaaccgattgtt
+acttttgtctattcaaaagattgaatagttttttatacaaaagccgcatacttatgacgg
+ctagtatacagtttcatcccctagcatcaatgctatggacagtattgaacttataggaaa
+ttcttctaatagggcaaatccgtcgtgatgcctattttttttcagtcacatcctcaaatg
+gcactagtattgtcgggatcccattaacaggctcaaccacgagctcacgcgaggacatgt
+agtccgtatctttaacgaagcgacagcgacagaactcccatggataaccaattataaggc
+ccgtaatcctctagacatcgtttaccaataaatccgctttctccgtaatcatgttgaata
+ccccagagtagtccagatgataaccgatgaaacacaagtctttctcaatgcacttacggt
+gaacttattaccgccaacgtagctcatcaaggttgcgacatctagttgtgtgtttgcgac
+gagcccagcgaacttcatcaactttcgtatattcaacgccttgtaattttactttaagac
+gcctggtgatgtagattcttagataatcagtttgttatcggctgtactttaccataattt
+cacaggtttcaggtcaagaagattatagctgtatatacagttccatgctcggtgcacaga
+aacgtgatcggataataatcaatcgcttatgtcgtctttaggcgtatccaatacatgccc
+cgataccgcagtgtatttcgacatgtaggtataccgtcgcatttgagctcgagtcaggac
+gtcagctagattagattccttaatagaatataccgacctctagtccgaactaaactatag
+ataacgccaacttcaggttaattgtctagtcgtctgtttgcagatgggattcttagatga
+gtgagtatcggccatattggttcgagcactttagtttttgatgcataggatatgcaatgt
+atagctgaaagtactttatctgtttcaaactcacattgattaaaccggtaaacctttaaa
+gactacaagaaaatattcagtgagggcaattttgtcaatcacaatcttccagctagagat
+acttcacaatttgtcttgaggctacgcaacattagacggattttcgcgttttattgaaat
+aatcgaggggcccaagagtatccatagttcattttgtaagatttctttacaggcttatta
+cagcttcttcagactcctacatgcttacgagttatatgctagcatgtgaacaatagatta
+atatacaggaaaacgtacattgagagagatgaccctacacagcgcaaccgttgagtactt
+tcattaaagggtaacgctctcgagacagcatccttaagatggccttattgtcaaatcatt
+tgcagaagtacgcaagatccctaaccaacgtagaagaatccctacaaacacatgagacgc
+ggtgaaaatagacagggtgttagtattcaatcttcggagtatcaatttcgccaatcttgg
+tgagaaagcataccctttcttcagagaaagaagatcaatcataacactatctttaacgag
+gtacgcacgcgcatcattacctgcctccatggatctttaggatagcggaaagtattggca
+gcgtattgtgatttcgttcctactttatcaatttcacattcatatacatgtcttttatca
+aaatcgccaataagataggatgagctatattagatgctagtagagttcgcgccaacatca
+tcgataggaatactcaggacagcgtgataggacttttcaatccctaatactctctataat
+tataactctctcttaagtttggaggcagtaacgcgctctatataatcagtttgctgcacc
+attcttcagcctctgatacatacaaataaattccacagcagtaagagggtttaattgaga
+catcttgggaacttaggattttactctaacatcaccgaaacgattattggataccgtacc
+taaacgaactttctcaaggcagtaatataggacatccgcaataacacaaatgctgcctcc
+ccaggagttatgtcttcctggaggctatatcttacacccactcactataggcaaactaaa
+gtttaaatgttgattgtctaaaaaaaagatagataagagttggccggcgtagcacatgcg
+aaagtgaatcgtaagctataattctctggacttgaagttctgtcctgttcctctgcaaga
+aacaaacttcctttaaagctatttacgacgcacatctcagcaagttataaacatgttgga
+agtttctagtcggaattcccaaagaacggatctatctaatgcattcctacatttttcctg
+tctgccgatggtgccatcctattcaaagaatttcttaaaagtagattaaatgggactttt
+aacaatgagtaaccttacgcctctaagggttcctcgagtgccatacaccagtcaggtccg
+agccacatacacggagaacattctaacatagcattctcaactcgatcatttgcaggttac
+ttctttcctatcctagtgctaaaaatcatacttgcaatcccatagcacggattaagaacc
+taagaaacaattcagtaaaacatgttcgaattcttggtatgggaacatcattgcagctat
+ggtctaacgcattaatgtttgggtacatcttccatcatataaacaggaagagtctgacga
+cagggagtgcttgcgatcatgtctatcattgtgaaatcaaattgtagctcacatgtcgtc
+tatgagagcgtgtatccgataagatttagaaaaatagaagtcgtataagatctcactgaa
+cttttgaatgaatgtgaagcatatatgatctgctttaataaaactttatccataggatac
+gtttccaaatcaattcaataattattagtcaaaatagataaggatgaacaacctgaaggc
+cgatcggacgtagaaagtggtcccatcactttgagttgatattgttgaaccacacgttat
+tatggttttcaaacagtctcaggatattgtatatacagataatccgataccagttgtctg
+acgcccctcttacgtaccccaccctttgtgacgtttaaagcagttgttcagtattttaaa
+ctaggcggcaactaatttggaaagaagcacagtggatatgtctaaattcttgttattcag
+gcctgaatttaatacaccgcatagttaacttcgcggtagagttgttcatcatgcctcctc
+taagctaccacttctatgatacaccaatagttgttctacggaatctgataattggccaag
+tcataaacttccgctgcgttcaacccccttgctcgaatatccaactcgaaaagacagcct
+tttggtgtccggaacaaatcagttacttcttttctgatgttaattctctgtggtcagata
+cagaccaaaaactccgcggatttaccatcctccaagaacaaatttgcatcaacatagcat
+tttggctacatattctaagtctcaatagtttaggttttcaactacattatcccaacatta
+ggattggaggaataatagctgggtaagtccccttgcgtctacaatcgactattttttatg
+aatatgcttctgccgcacctatggttattaaaaaagtcatgactttgaagaaccctgaaa
+agatagatgaatcaggtgtaatggcagcagccaaagagcatataattagcaacactctaa
+gaacattatagatatgatgatagcgatcgtcatgatgttatccggtcacaatagtagctt
+catcagctaattcgttttgccagtggtgacttgcgctggaagaatcgttatacggtccct
+tccctcttgatacggtgggggcttattcaaccgcgtggattgggttgtcatacttgcatt
+aaacgatgtaaaccatctagtagtcaactatactaaatcacaaaatagtgatcaatacat
+acccgcttcatggttttaaccatttaattgattaaagatattccgctaagaaccattatc
+tacctaaactgatcgccgtatcctagtagtttgaaatttgatgtaccgtaatgatcaacg
+aagtaaaacgttatattgtatgtagaataataggtcttggagctaaatgatgtgattggt
+agtgaagacttacccttacaactttaccggtttctcggaagaatatactagagaatcaat
+gcatgggctacataagcactttagtctaatgagataaaaaatacacgagtcttccatcat
+gaattttttgtcgaaaaactcgaacctggtaatttaaaccatatatctttatgtcgtcaa
+taactctcatatgttttatataacttcccaatcacgacttgtaactgcttgttcgactga
+gctgtttgagctatgaggccgggatccggttgagctacatctatttgctacaagaaaaat
+gaaagcacatttgttgggagttctggctacactcatagagaaataagtggcccgagtggg
+tgcggcctgcctccatattcaagtgtatcttaaaccaagtggttccaacgctcgcgctaa
+agaattaaagcctttatttcctccacggagtagcccgtaatccggttcgaaagagaccat
+tgaagttaattttcatatccagtgaagtttaggcacaagcatgtgttctgccacatgcct
+caaagcgctcttcaaccaagatatgattcatcctaacttcgatgaatgcgtctgtaacat
+aaatatagaaggaatgattcggcgagttaattttcgccttctccaacatggcatccctac
+gttcgttataaggaccatacatgtaggttttaaaggtttgcggttaatcgatatttacat
+catagaaattctatagtcaaatttacaagactctagatactcactcgttgcagccggcta
+ggaagcgctttgtaccttacttcccttttcgttgcgtaatatgaatttcatatagtaagt
+tcaaggcactcatacctccgtgaagagggtagatagactattaaagttgtttaatagtac
+gtattgatggaaatgacccgtaggagatttaccactcaatccacaagattcgctgctgtg
+cattatcaaaacagtgcatgtcgaaacatgggttgggtccttcaaacacgaatccaggta
+gagatacctttgcaattttt
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/examples/regexdna-output.txt
@@ -0,0 +1,13 @@
+agggtaaa|tttaccct 0
+[cgt]gggtaaa|tttaccc[acg] 3
+a[act]ggtaaa|tttacc[agt]t 9
+ag[act]gtaaa|tttac[agt]ct 8
+agg[act]taaa|ttta[agt]cct 10
+aggg[acg]aaa|ttt[cgt]ccct 3
+agggt[cgt]aa|tt[acg]accct 4
+agggta[cgt]a|t[acg]taccct 3
+agggtaa[cgt]|[acg]ttaccct 5
+
+101745
+100000
+133640
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/examples/shootout-regex-dna-cheat.rs
@@ -0,0 +1,88 @@
+// The Computer Language Benchmarks Game
+// http://benchmarksgame.alioth.debian.org/
+//
+// contributed by the Rust Project Developers
+// contributed by TeXitoi
+// contributed by BurntSushi
+
+// This technically solves the problem posed in the `regex-dna` benchmark, but
+// it cheats by combining all of the replacements into a single regex and
+// replacing them with a single linear scan. i.e., it re-implements
+// `replace_all`. As a result, this is around 25% faster. ---AG
+
+extern crate regex;
+
+use std::io::{self, Read};
+use std::sync::Arc;
+use std::thread;
+
+macro_rules! regex { ($re:expr) => { ::regex::Regex::new($re).unwrap() } }
+
+fn main() {
+    let mut seq = String::with_capacity(50 * (1 << 20));
+    io::stdin().read_to_string(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
+    let clen = seq.len();
+    let seq_arc = Arc::new(seq.clone());
+
+    let variants = vec![
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    let mut counts = vec![];
+    for variant in variants {
+        let seq = seq_arc.clone();
+        let restr = variant.to_string();
+        let future = thread::spawn(move || variant.find_iter(&seq).count());
+        counts.push((restr, future));
+    }
+
+    let substs = vec![
+        (b'B', "(c|g|t)"),
+        (b'D', "(a|g|t)"),
+        (b'H', "(a|c|t)"),
+        (b'K', "(g|t)"),
+        (b'M', "(a|c)"),
+        (b'N', "(a|c|g|t)"),
+        (b'R', "(a|g)"),
+        (b'S', "(c|g)"),
+        (b'V', "(a|c|g)"),
+        (b'W', "(a|t)"),
+        (b'Y', "(c|t)"),
+    ]; // combined into one regex in `replace_all`
+    let seq = replace_all(&seq, substs);
+
+    for (variant, count) in counts {
+        println!("{} {}", variant, count.join().unwrap());
+    }
+    println!("\n{}\n{}\n{}", ilen, clen, seq.len());
+}
+
+fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
+    let mut replacements = vec![""; 256];
+    let mut alternates = vec![];
+    for (re, replacement) in substs {
+        replacements[re as usize] = replacement;
+        alternates.push((re as char).to_string());
+    }
+
+    let re = regex!(&alternates.join("|"));
+    let mut new = String::with_capacity(text.len());
+    let mut last_match = 0;
+    for m in re.find_iter(text) {
+        new.push_str(&text[last_match..m.start()]);
+        new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
+        last_match = m.end();
+    }
+    new.push_str(&text[last_match..]);
+    new
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/examples/shootout-regex-dna-replace.rs
@@ -0,0 +1,19 @@
+extern crate regex;
+
+use std::io::{self, Read};
+
+macro_rules! regex {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re).build().unwrap().into_regex()
+    }}
+}
+
+fn main() {
+    let mut seq = String::with_capacity(50 * (1 << 20));
+    io::stdin().read_to_string(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
+    println!("original: {}, replaced: {}", ilen, seq.len());
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/examples/shootout-regex-dna-single-cheat.rs
@@ -0,0 +1,73 @@
+// The Computer Language Benchmarks Game
+// http://benchmarksgame.alioth.debian.org/
+//
+// contributed by the Rust Project Developers
+// contributed by TeXitoi
+// contributed by BurntSushi
+
+extern crate regex;
+
+use std::io::{self, Read};
+
+macro_rules! regex { ($re:expr) => { ::regex::Regex::new($re).unwrap() } }
+
+fn main() {
+    let mut seq = String::with_capacity(50 * (1 << 20));
+    io::stdin().read_to_string(&mut seq).unwrap();
+    let ilen = seq.len();
+
+    seq = regex!(">[^\n]*\n|\n").replace_all(&seq, "").into_owned();
+    let clen = seq.len();
+
+    let variants = vec![
+        regex!("agggtaaa|tttaccct"),
+        regex!("[cgt]gggtaaa|tttaccc[acg]"),
+        regex!("a[act]ggtaaa|tttacc[agt]t"),
+        regex!("ag[act]gtaaa|tttac[agt]ct"),
+        regex!("agg[act]taaa|ttta[agt]cct"),
+        regex!("aggg[acg]aaa|ttt[cgt]ccct"),
+        regex!("agggt[cgt]aa|tt[acg]accct"),
+        regex!("agggta[cgt]a|t[acg]taccct"),
+        regex!("agggtaa[cgt]|[acg]ttaccct"),
+    ];
+    for re in variants {
+        println!("{} {}", re.to_string(), re.find_iter(&seq).count());
+    }
+
+    let substs = vec![
+        (b'B', "(c|g|t)"),
+        (b'D', "(a|g|t)"),
+        (b'H', "(a|c|t)"),
+        (b'K', "(g|t)"),
+        (b'M', "(a|c)"),
+        (b'N', "(a|c|g|t)"),
+        (b'R', "(a|g)"),
+        (b'S', "(c|g)"),
+        (b'V', "(a|c|g)"),
+        (b'W', "(a|t)"),
+        (b'Y', "(c|t)"),
+    ]; // combined into one regex in `replace_all`
+    let seq = replace_all(&seq, substs);
+
+    println!("\n{}\n{}\n{}", ilen, clen, seq.len());
+}
+
+fn replace_all(text: &str, substs: Vec<(u8, &str)>) -> String {
+    let mut replacements = vec![""; 256];
+    let mut alternates = vec![];
+    for (re, replacement) in substs {
+        replacements[re as usize] = replacement;
+        alternates.push((re as char).to_string());
+    }
+
+    let re = regex!(&alternates.join("|"));
+    let mut new = String::with_capacity(text.len());
+    let mut last_match = 0;
+    for m in re.find_iter(text) {
+        new.push_str(&text[last_match..m.start()]);
+        new.push_str(replacements[text.as_bytes()[m.start()] as usize]);
+        last_match = m.end();
+    }
+    new.push_str(&text[last_match..]);
+    new
+}
new file mode 100755
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/scripts/frequencies.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+# This does simple normalized frequency analysis on UTF-8 encoded text. The
+# result of the analysis is translated to a ranked list, where every byte is
+# assigned a rank. This list is written to src/freqs.rs.
+#
+# Currently, the frequencies are generated from the following corpuses:
+#
+#   * The CIA world fact book
+#   * The source code of rustc
+#   * Septuaginta
+
+from __future__ import absolute_import, division, print_function
+
+import argparse
+from collections import Counter
+import sys
+
+preamble = '''// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// NOTE: The following code was generated by "scripts/frequencies.py", do not
+// edit directly
+'''
+
+
+def eprint(*args, **kwargs):
+    kwargs['file'] = sys.stderr
+    print(*args, **kwargs)
+
+
+def main():
+    p = argparse.ArgumentParser()
+    p.add_argument('corpus', metavar='FILE', nargs='+')
+    args = p.parse_args()
+
+    # Get frequency counts of each byte.
+    freqs = Counter()
+    for i in range(0, 256):
+        freqs[i] = 0
+
+    eprint('reading entire corpus into memory')
+    corpus = []
+    for fpath in args.corpus:
+        corpus.append(open(fpath, 'rb').read())
+
+    eprint('computing byte frequencies')
+    for c in corpus:
+        for byte in c:
+            freqs[byte] += 1.0 / float(len(c))
+
+    eprint('writing Rust code')
+    # Get the rank of each byte. A lower rank => lower relative frequency.
+    rank = [0] * 256
+    for i, (byte, _) in enumerate(freqs.most_common()):
+        # print(byte)
+        rank[byte] = 255 - i
+
+    # Forcefully set the highest rank possible for bytes that start multi-byte
+    # UTF-8 sequences. The idea here is that a continuation byte will be more
+    # discerning in a homogenous haystack.
+    for byte in range(0xC0, 0xFF + 1):
+        rank[byte] = 255
+
+    # Now write Rust.
+    olines = ['pub const BYTE_FREQUENCIES: [u8; 256] = [']
+    for byte in range(256):
+        olines.append('    %3d, // %r' % (rank[byte], chr(byte)))
+    olines.append('];')
+
+    print(preamble)
+    print('\n'.join(olines))
+
+if __name__ == '__main__':
+    main()
new file mode 100755
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/scripts/regex-match-tests.py
@@ -0,0 +1,107 @@
+#!/usr/bin/env python2
+
+# Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+# file at the top-level directory of this distribution and at
+# http://rust-lang.org/COPYRIGHT.
+#
+# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+# option. This file may not be copied, modified, or distributed
+# except according to those terms.
+
+from __future__ import absolute_import, division, print_function
+import argparse
+import datetime
+import os.path as path
+
+
+def print_tests(tests):
+    print('\n'.join([test_tostr(t) for t in tests]))
+
+
+def read_tests(f):
+    basename, _ = path.splitext(path.basename(f))
+    tests = []
+    for lineno, line in enumerate(open(f), 1):
+        fields = filter(None, map(str.strip, line.split('\t')))
+        if not (4 <= len(fields) <= 5) \
+           or 'E' not in fields[0] or fields[0][0] == '#':
+            continue
+
+        opts, pat, text, sgroups = fields[0:4]
+        groups = []  # groups as integer ranges
+        if sgroups == 'NOMATCH':
+            groups = [None]
+        elif ',' in sgroups:
+            noparen = map(lambda s: s.strip('()'), sgroups.split(')('))
+            for g in noparen:
+                s, e = map(str.strip, g.split(','))
+                if s == '?' and e == '?':
+                    groups.append(None)
+                else:
+                    groups.append((int(s), int(e)))
+        else:
+            # This skips tests that should result in an error.
+            # There aren't many, so I think we can just capture those
+            # manually. Possibly fix this in future.
+            continue
+
+        if pat == 'SAME':
+            pat = tests[-1][1]
+        if '$' in opts:
+            pat = pat.decode('string_escape')
+            text = text.decode('string_escape')
+        if 'i' in opts:
+            pat = '(?i)%s' % pat
+
+        name = '%s_%d' % (basename, lineno)
+        tests.append((name, pat, text, groups))
+    return tests
+
+
+def test_tostr(t):
+    lineno, pat, text, groups = t
+    options = map(group_tostr, groups)
+    return 'mat!(match_%s, r"%s", r"%s", %s);' \
+           % (lineno, pat, '' if text == "NULL" else text, ', '.join(options))
+
+
+def group_tostr(g):
+    if g is None:
+        return 'None'
+    else:
+        return 'Some((%d, %d))' % (g[0], g[1])
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description='Generate match tests from an AT&T POSIX test file.')
+    aa = parser.add_argument
+    aa('files', nargs='+',
+       help='A list of dat AT&T POSIX test files. See src/testdata')
+    args = parser.parse_args()
+
+    tests = []
+    for f in args.files:
+        tests += read_tests(f)
+
+    tpl = '''// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
+// on {date}.
+'''
+    print(tpl.format(date=str(datetime.datetime.now())))
+
+    for f in args.files:
+        print('// Tests from %s' % path.basename(f))
+        print_tests(read_tests(f))
+        print('')
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/src/freqs.rs
@@ -0,0 +1,271 @@
+// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// NOTE: The following code was generated by "scripts/frequencies.py", do not
+// edit directly
+
+pub const BYTE_FREQUENCIES: [u8; 256] = [
+     55, // '\x00'
+     52, // '\x01'
+     51, // '\x02'
+     50, // '\x03'
+     49, // '\x04'
+     48, // '\x05'
+     47, // '\x06'
+     46, // '\x07'
+     45, // '\x08'
+    103, // '\t'
+    242, // '\n'
+     66, // '\x0b'
+     67, // '\x0c'
+    229, // '\r'
+     44, // '\x0e'
+     43, // '\x0f'
+     42, // '\x10'
+     41, // '\x11'
+     40, // '\x12'
+     39, // '\x13'
+     38, // '\x14'
+     37, // '\x15'
+     36, // '\x16'
+     35, // '\x17'
+     34, // '\x18'
+     33, // '\x19'
+     56, // '\x1a'
+     32, // '\x1b'
+     31, // '\x1c'
+     30, // '\x1d'
+     29, // '\x1e'
+     28, // '\x1f'
+    255, // ' '
+    148, // '!'
+    164, // '"'
+    149, // '#'
+    136, // '$'
+    160, // '%'
+    155, // '&'
+    173, // "'"
+    221, // '('
+    222, // ')'
+    134, // '*'
+    122, // '+'
+    232, // ','
+    202, // '-'
+    215, // '.'
+    224, // '/'
+    208, // '0'
+    220, // '1'
+    204, // '2'
+    187, // '3'
+    183, // '4'
+    179, // '5'
+    177, // '6'
+    168, // '7'
+    178, // '8'
+    200, // '9'
+    226, // ':'
+    195, // ';'
+    154, // '<'
+    184, // '='
+    174, // '>'
+    126, // '?'
+    120, // '@'
+    191, // 'A'
+    157, // 'B'
+    194, // 'C'
+    170, // 'D'
+    189, // 'E'
+    162, // 'F'
+    161, // 'G'
+    150, // 'H'
+    193, // 'I'
+    142, // 'J'
+    137, // 'K'
+    171, // 'L'
+    176, // 'M'
+    185, // 'N'
+    167, // 'O'
+    186, // 'P'
+    112, // 'Q'
+    175, // 'R'
+    192, // 'S'
+    188, // 'T'
+    156, // 'U'
+    140, // 'V'
+    143, // 'W'
+    123, // 'X'
+    133, // 'Y'
+    128, // 'Z'
+    147, // '['
+    138, // '\\'
+    146, // ']'
+    114, // '^'
+    223, // '_'
+    151, // '`'
+    249, // 'a'
+    216, // 'b'
+    238, // 'c'
+    236, // 'd'
+    253, // 'e'
+    227, // 'f'
+    218, // 'g'
+    230, // 'h'
+    247, // 'i'
+    135, // 'j'
+    180, // 'k'
+    241, // 'l'
+    233, // 'm'
+    246, // 'n'
+    244, // 'o'
+    231, // 'p'
+    139, // 'q'
+    245, // 'r'
+    243, // 's'
+    251, // 't'
+    235, // 'u'
+    201, // 'v'
+    196, // 'w'
+    240, // 'x'
+    214, // 'y'
+    152, // 'z'
+    182, // '{'
+    205, // '|'
+    181, // '}'
+    127, // '~'
+     27, // '\x7f'
+    212, // '\x80'
+    211, // '\x81'
+    210, // '\x82'
+    213, // '\x83'
+    228, // '\x84'
+    197, // '\x85'
+    169, // '\x86'
+    159, // '\x87'
+    131, // '\x88'
+    172, // '\x89'
+    105, // '\x8a'
+     80, // '\x8b'
+     98, // '\x8c'
+     96, // '\x8d'
+     97, // '\x8e'
+     81, // '\x8f'
+    207, // '\x90'
+    145, // '\x91'
+    116, // '\x92'
+    115, // '\x93'
+    144, // '\x94'
+    130, // '\x95'
+    153, // '\x96'
+    121, // '\x97'
+    107, // '\x98'
+    132, // '\x99'
+    109, // '\x9a'
+    110, // '\x9b'
+    124, // '\x9c'
+    111, // '\x9d'
+     82, // '\x9e'
+    108, // '\x9f'
+    118, // '\xa0'
+    141, // '¡'
+    113, // '¢'
+    129, // '£'
+    119, // '¤'
+    125, // '¥'
+    165, // '¦'
+    117, // '§'
+     92, // '¨'
+    106, // '©'
+     83, // 'ª'
+     72, // '«'
+     99, // '¬'
+     93, // '\xad'
+     65, // '®'
+     79, // '¯'
+    166, // '°'
+    237, // '±'
+    163, // '²'
+    199, // '³'
+    190, // '´'
+    225, // 'µ'
+    209, // '¶'
+    203, // '·'
+    198, // '¸'
+    217, // '¹'
+    219, // 'º'
+    206, // '»'
+    234, // '¼'
+    248, // '½'
+    158, // '¾'
+    239, // '¿'
+    255, // 'À'
+    255, // 'Á'
+    255, // 'Â'
+    255, // 'Ã'
+    255, // 'Ä'
+    255, // 'Å'
+    255, // 'Æ'
+    255, // 'Ç'
+    255, // 'È'
+    255, // 'É'
+    255, // 'Ê'
+    255, // 'Ë'
+    255, // 'Ì'
+    255, // 'Í'
+    255, // 'Î'
+    255, // 'Ï'
+    255, // 'Ð'
+    255, // 'Ñ'
+    255, // 'Ò'
+    255, // 'Ó'
+    255, // 'Ô'
+    255, // 'Õ'
+    255, // 'Ö'
+    255, // '×'
+    255, // 'Ø'
+    255, // 'Ù'
+    255, // 'Ú'
+    255, // 'Û'
+    255, // 'Ü'
+    255, // 'Ý'
+    255, // 'Þ'
+    255, // 'ß'
+    255, // 'à'
+    255, // 'á'
+    255, // 'â'
+    255, // 'ã'
+    255, // 'ä'
+    255, // 'å'
+    255, // 'æ'
+    255, // 'ç'
+    255, // 'è'
+    255, // 'é'
+    255, // 'ê'
+    255, // 'ë'
+    255, // 'ì'
+    255, // 'í'
+    255, // 'î'
+    255, // 'ï'
+    255, // 'ð'
+    255, // 'ñ'
+    255, // 'ò'
+    255, // 'ó'
+    255, // 'ô'
+    255, // 'õ'
+    255, // 'ö'
+    255, // '÷'
+    255, // 'ø'
+    255, // 'ù'
+    255, // 'ú'
+    255, // 'û'
+    255, // 'ü'
+    255, // 'ý'
+    255, // 'þ'
+    255, // 'ÿ'
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/src/pattern.rs
@@ -0,0 +1,62 @@
+use std::str::pattern::{Pattern, Searcher, SearchStep};
+
+use re_unicode::{Regex, Matches};
+
+pub struct RegexSearcher<'r, 't> {
+    haystack: &'t str,
+    it: Matches<'r, 't>,
+    last_step_end: usize,
+    next_match: Option<(usize, usize)>,
+}
+
+impl<'r, 't> Pattern<'t> for &'r Regex {
+    type Searcher = RegexSearcher<'r, 't>;
+
+    fn into_searcher(self, haystack: &'t str) -> RegexSearcher<'r, 't> {
+        RegexSearcher {
+            haystack: haystack,
+            it: self.find_iter(haystack),
+            last_step_end: 0,
+            next_match: None,
+        }
+    }
+}
+
+unsafe impl<'r, 't> Searcher<'t> for RegexSearcher<'r, 't> {
+    #[inline]
+    fn haystack(&self) -> &'t str {
+        self.haystack
+    }
+
+    #[inline]
+    fn next(&mut self) -> SearchStep {
+        if let Some((s, e)) = self.next_match {
+            self.next_match = None;
+            self.last_step_end = e;
+            return SearchStep::Match(s, e);
+        }
+        match self.it.next() {
+            None => {
+                if self.last_step_end < self.haystack().len() {
+                    let last = self.last_step_end;
+                    self.last_step_end = self.haystack().len();
+                    SearchStep::Reject(last, self.haystack().len())
+                } else {
+                    SearchStep::Done
+                }
+            }
+            Some(m) => {
+                let (s, e) = (m.start(), m.end());
+                if s == self.last_step_end {
+                    self.last_step_end = e;
+                    SearchStep::Match(s, e)
+                } else {
+                    self.next_match = Some((s, e));
+                    let last = self.last_step_end;
+                    self.last_step_end = s;
+                    SearchStep::Reject(last, s)
+                }
+            }
+        }
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/src/sparse.rs
@@ -0,0 +1,78 @@
+use std::ops::Deref;
+use std::slice;
+
+/// A sparse set used for representing ordered NFA states.
+///
+/// This supports constant time addition and membership testing. Clearing an
+/// entire set can also be done in constant time. Iteration yields elements
+/// in the order in which they were inserted.
+///
+/// The data structure is based on: http://research.swtch.com/sparse
+/// Note though that we don't actually use unitialized memory. We generally
+/// reuse allocations, so the initial allocation cost is bareable. However,
+/// its other properties listed above are extremely useful.
+#[derive(Clone, Debug)]
+pub struct SparseSet {
+    /// Dense contains the instruction pointers in the order in which they
+    /// were inserted. Accessing elements >= self.size is illegal.
+    dense: Vec<usize>,
+    /// Sparse maps instruction pointers to their location in dense.
+    ///
+    /// An instruction pointer is in the set if and only if
+    /// sparse[ip] < size && ip == dense[sparse[ip]].
+    sparse: Vec<usize>,
+    /// The number of elements in the set.
+    size: usize,
+}
+
+impl SparseSet {
+    pub fn new(size: usize) -> SparseSet {
+        SparseSet {
+            dense: vec![0; size],
+            sparse: vec![0; size],
+            size: 0,
+        }
+    }
+
+    pub fn len(&self) -> usize {
+        self.size
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.size == 0
+    }
+
+    pub fn capacity(&self) -> usize {
+        self.dense.len()
+    }
+
+    pub fn insert(&mut self, value: usize) {
+        let i = self.size;
+        self.dense[i] = value;
+        self.sparse[value] = i;
+        self.size += 1;
+    }
+
+    pub fn contains(&self, value: usize) -> bool {
+        let i = self.sparse[value];
+        i < self.size && self.dense[i] == value
+    }
+
+    pub fn clear(&mut self) {
+        self.size = 0;
+    }
+}
+
+impl Deref for SparseSet {
+    type Target = [usize];
+
+    fn deref(&self) -> &Self::Target {
+        &self.dense[0..self.size]
+    }
+}
+
+impl<'a> IntoIterator for &'a SparseSet {
+    type Item = &'a usize;
+    type IntoIter = slice::Iter<'a, usize>;
+    fn into_iter(self) -> Self::IntoIter { self.iter() }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/src/testdata/LICENSE
@@ -0,0 +1,19 @@
+The following license covers testregex.c and all associated test data.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
+without restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, and/or sell copies of the
+Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following disclaimer:
+
+THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/src/testdata/README
@@ -0,0 +1,17 @@
+Test data was taken from the Go distribution, which was in turn taken from the
+testregex test suite:
+
+  http://www2.research.att.com/~astopen/testregex/testregex.html
+
+The LICENSE in this directory corresponds to the LICENSE that the data was
+released under.
+
+The tests themselves were modified for RE2/Go. A couple were modified further
+by me (Andrew Gallant) (only in repetition.dat) so that RE2/Go would pass them.
+(Yes, it seems like RE2/Go includes failing test cases.) This may or may not
+have been a bad idea, but I think being consistent with an established Regex
+library is worth something.
+
+Note that these files are read by 'scripts/regex-match-tests.py' and turned
+into Rust tests found in 'regex_macros/tests/matches.rs'.
+
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/src/testdata/basic.dat
@@ -0,0 +1,221 @@
+NOTE	all standard compliant implementations should pass these : 2002-05-31
+
+BE	abracadabra$	abracadabracadabra	(7,18)
+BE	a...b		abababbb		(2,7)
+BE	XXXXXX		..XXXXXX		(2,8)
+E	\)		()	(1,2)
+BE	a]		a]a	(0,2)
+B	}		}	(0,1)
+E	\}		}	(0,1)
+BE	\]		]	(0,1)
+B	]		]	(0,1)
+E	]		]	(0,1)
+B	{		{	(0,1)
+B	}		}	(0,1)
+BE	^a		ax	(0,1)
+BE	\^a		a^a	(1,3)
+BE	a\^		a^	(0,2)
+BE	a$		aa	(1,2)
+BE	a\$		a$	(0,2)
+BE	^$		NULL	(0,0)
+E	$^		NULL	(0,0)
+E	a($)		aa	(1,2)(2,2)
+E	a*(^a)		aa	(0,1)(0,1)
+E	(..)*(...)*		a	(0,0)
+E	(..)*(...)*		abcd	(0,4)(2,4)
+E	(ab|a)(bc|c)		abc	(0,3)(0,2)(2,3)
+E	(ab)c|abc		abc	(0,3)(0,2)
+E	a{0}b		ab			(1,2)
+E	(a*)(b?)(b+)b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	(a*)(b{0,1})(b{1,})b{3}	aaabbbbbbb	(0,10)(0,3)(3,4)(4,7)
+E	a{9876543210}	NULL	BADBR
+E	((a|a)|a)			a	(0,1)(0,1)(0,1)
+E	(a*)(a|aa)			aaaa	(0,4)(0,3)(3,4)
+E	a*(a.|aa)			aaaa	(0,4)(2,4)
+E	a(b)|c(d)|a(e)f			aef	(0,3)(?,?)(?,?)(1,2)
+E	(a|b)?.*			b	(0,1)(0,1)
+E	(a|b)c|a(b|c)			ac	(0,2)(0,1)
+E	(a|b)c|a(b|c)			ab	(0,2)(?,?)(1,2)
+E	(a|b)*c|(a|ab)*c		abc	(0,3)(1,2)
+E	(a|b)*c|(a|ab)*c		xc	(1,2)
+E	(.a|.b).*|.*(.a|.b)		xa	(0,2)(0,2)
+E	a?(ab|ba)ab			abab	(0,4)(0,2)
+E	a?(ac{0}b|ba)ab			abab	(0,4)(0,2)
+E	ab|abab				abbabab	(0,2)
+E	aba|bab|bba			baaabbbaba	(5,8)
+E	aba|bab				baaabbbaba	(6,9)
+E	(aa|aaa)*|(a|aaaaa)		aa	(0,2)(0,2)
+E	(a.|.a.)*|(a|.a...)		aa	(0,2)(0,2)
+E	ab|a				xabc	(1,3)
+E	ab|a				xxabc	(2,4)
+Ei	(Ab|cD)*			aBcD	(0,4)(2,4)
+BE	[^-]			--a		(2,3)
+BE	[a-]*			--a		(0,3)
+BE	[a-m-]*			--amoma--	(0,4)
+E	:::1:::0:|:::1:1:0:	:::0:::1:::1:::0:	(8,17)
+E	:::1:::0:|:::1:1:1:	:::0:::1:::1:::0:	(8,17)
+{E	[[:upper:]]		A		(0,1)	[[<element>]] not supported
+E	[[:lower:]]+		`az{		(1,3)
+E	[[:upper:]]+		@AZ[		(1,3)
+# No collation in Go
+#BE	[[-]]			[[-]]		(2,4)
+#BE	[[.NIL.]]	NULL	ECOLLATE
+#BE	[[=aleph=]]	NULL	ECOLLATE
+}
+BE$	\n		\n	(0,1)
+BEn$	\n		\n	(0,1)
+BE$	[^a]		\n	(0,1)
+BE$	\na		\na	(0,2)
+E	(a)(b)(c)	abc	(0,3)(0,1)(1,2)(2,3)
+BE	xxx		xxx	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 6,	(0,6)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	2/7	(0,3)
+E1	(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)	feb 1,Feb 6	(5,11)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))	x	(0,1)(0,1)(0,1)
+E3	((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*	xx	(0,2)(1,2)(1,2)
+E	a?(ab|ba)*	ababababababababababababababababababababababababababababababababababababababababa	(0,81)(79,81)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabbbbaa	(18,25)
+E	abaa|abbaa|abbbaa|abbbbaa	ababbabbbabbbabbbbabaa	(18,22)
+E	aaac|aabc|abac|abbc|baac|babc|bbac|bbbc	baaabbbabac	(7,11)
+BE$	.*			\x01\x7f	(0,2)
+E	aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	(53,57)
+L	aaaa\nbbbb\ncccc\nddddd\neeeeee\nfffffff\ngggg\nhhhh\niiiii\njjjjj\nkkkkk\nllll		XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa	NOMATCH
+E	a*a*a*a*a*b		aaaaaaaaab	(0,10)
+BE	^			NULL		(0,0)
+BE	$			NULL		(0,0)
+BE	^$			NULL		(0,0)
+BE	^a$			a		(0,1)
+BE	abc			abc		(0,3)
+BE	abc			xabcy		(1,4)
+BE	abc			ababc		(2,5)
+BE	ab*c			abc		(0,3)
+BE	ab*bc			abc		(0,3)
+BE	ab*bc			abbc		(0,4)
+BE	ab*bc			abbbbc		(0,6)
+E	ab+bc			abbc		(0,4)
+E	ab+bc			abbbbc		(0,6)
+E	ab?bc			abbc		(0,4)
+E	ab?bc			abc		(0,3)
+E	ab?c			abc		(0,3)
+BE	^abc$			abc		(0,3)
+BE	^abc			abcc		(0,3)
+BE	abc$			aabc		(1,4)
+BE	^			abc		(0,0)
+BE	$			abc		(3,3)
+BE	a.c			abc		(0,3)
+BE	a.c			axc		(0,3)
+BE	a.*c			axyzc		(0,5)
+BE	a[bc]d			abd		(0,3)
+BE	a[b-d]e			ace		(0,3)
+BE	a[b-d]			aac		(1,3)
+BE	a[-b]			a-		(0,2)
+BE	a[b-]			a-		(0,2)
+BE	a]			a]		(0,2)
+BE	a[]]b			a]b		(0,3)
+BE	a[^bc]d			aed		(0,3)
+BE	a[^-b]c			adc		(0,3)
+BE	a[^]b]c			adc		(0,3)
+E	ab|cd			abc		(0,2)
+E	ab|cd			abcd		(0,2)
+E	a\(b			a(b		(0,3)
+E	a\(*b			ab		(0,2)
+E	a\(*b			a((b		(0,4)
+E	((a))			abc		(0,1)(0,1)(0,1)
+E	(a)b(c)			abc		(0,3)(0,1)(2,3)
+E	a+b+c			aabbabc		(4,7)
+E	a*			aaa		(0,3)
+#E	(a*)*			-		(0,0)(0,0)
+E	(a*)*			-		(0,0)(?,?)	RE2/Go
+E	(a*)+			-		(0,0)(0,0)
+#E	(a*|b)*			-		(0,0)(0,0)
+E	(a*|b)*			-		(0,0)(?,?)	RE2/Go
+E	(a+|b)*			ab		(0,2)(1,2)
+E	(a+|b)+			ab		(0,2)(1,2)
+E	(a+|b)?			ab		(0,1)(0,1)
+BE	[^ab]*			cde		(0,3)
+#E	(^)*			-		(0,0)(0,0)
+E	(^)*			-		(0,0)(?,?)	RE2/Go
+BE	a*			NULL		(0,0)
+E	([abc])*d		abbbcd		(0,6)(4,5)
+E	([abc])*bcd		abcd		(0,4)(0,1)
+E	a|b|c|d|e		e		(0,1)
+E	(a|b|c|d|e)f		ef		(0,2)(0,1)
+#E	((a*|b))*		-		(0,0)(0,0)(0,0)
+E	((a*|b))*		-		(0,0)(?,?)(?,?)	RE2/Go
+BE	abcd*efg		abcdefg		(0,7)
+BE	ab*			xabyabbbz	(1,3)
+BE	ab*			xayabbbz	(1,2)
+E	(ab|cd)e		abcde		(2,5)(2,4)
+BE	[abhgefdc]ij		hij		(0,3)
+E	(a|b)c*d		abcd		(1,4)(1,2)
+E	(ab|ab*)bc		abc		(0,3)(0,1)
+E	a([bc]*)c*		abc		(0,3)(1,3)
+E	a([bc]*)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]+)(c*d)		abcd		(0,4)(1,3)(3,4)
+E	a([bc]*)(c+d)		abcd		(0,4)(1,2)(2,4)
+E	a[bcd]*dcdcde		adcdcde		(0,7)
+E	(ab|a)b*c		abc		(0,3)(0,2)
+E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
+BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
+E	^a(bc+|b[eh])g|.h$	abh		(1,3)
+E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
+E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
+E	(bc+d$|ef*g.|h?i(j|k))	reffgz		(1,6)(1,6)
+E	(((((((((a)))))))))	a		(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)
+BE	multiple words		multiple words yeah	(0,14)
+E	(.*)c(.*)		abcde		(0,5)(0,2)(3,5)
+BE	abcd			abcd		(0,4)
+E	a(bc)d			abcd		(0,4)(1,3)
+E	a[-]?c		ac		(0,3)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mo'ammar Gadhafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Kaddafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Qadhafi	(0,15)(?,?)(10,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gadafi	(0,14)(?,?)(10,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moamar Gaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Mu'ammar Qadhdhafi	(0,18)(?,?)(13,15)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Khaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafy	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghadafi	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Ghaddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muamar Kaddafi	(0,14)(?,?)(9,11)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Quathafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Muammar Gheddafi	(0,16)(?,?)(11,13)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Khadafy	(0,15)(?,?)(11,12)
+E	M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]	Moammar Qudhafi	(0,15)(?,?)(10,12)
+E	a+(b|c)*d+		aabcdd			(0,6)(3,4)
+E	^.+$			vivi			(0,4)
+E	^(.+)$			vivi			(0,4)(0,4)
+E	^([^!.]+).att.com!(.+)$	gryphon.att.com!eby	(0,19)(0,7)(16,19)
+E	^([^!]+!)?([^!]+)$	bas			(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$	bar!bas			(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$	foo!bas			(0,7)(0,4)(4,7)
+E	^.+!([^!]+!)([^!]+)$	foo!bar!bas		(0,11)(4,8)(8,11)
+E	((foo)|(bar))!bas	bar!bas			(0,7)(0,3)(?,?)(0,3)
+E	((foo)|(bar))!bas	foo!bar!bas		(4,11)(4,7)(?,?)(4,7)
+E	((foo)|(bar))!bas	foo!bas			(0,7)(0,3)(0,3)
+E	((foo)|bar)!bas		bar!bas			(0,7)(0,3)
+E	((foo)|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	((foo)|bar)!bas		foo!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		bar!bas			(0,7)(0,3)(0,3)
+E	(foo|(bar))!bas		foo!bar!bas		(4,11)(4,7)(4,7)
+E	(foo|(bar))!bas		foo!bas			(0,7)(0,3)
+E	(foo|bar)!bas		bar!bas			(0,7)(0,3)
+E	(foo|bar)!bas		foo!bar!bas		(4,11)(4,7)
+E	(foo|bar)!bas		foo!bas			(0,7)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bas		(0,3)(?,?)(0,3)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	bar!bas		(0,7)(0,4)(4,7)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bar!bas	(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$	foo!bas		(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bas		(0,3)(0,3)(?,?)(0,3)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	bar!bas		(0,7)(0,7)(0,4)(4,7)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bar!bas	(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)
+E	^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$	foo!bas		(0,7)(0,7)(0,4)(4,7)
+E	.*(/XXX).*			/XXX			(0,4)(0,4)
+E	.*(\\XXX).*			\XXX			(0,4)(0,4)
+E	\\XXX				\XXX			(0,4)
+E	.*(/000).*			/000			(0,4)(0,4)
+E	.*(\\000).*			\000			(0,4)(0,4)
+E	\\000				\000			(0,4)
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/src/testdata/nullsubexpr.dat
@@ -0,0 +1,79 @@
+NOTE	null subexpression matches : 2002-06-06
+
+E	(a*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)*		a		(0,1)(0,1)
+E	SAME		x		(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	(a+)+		a		(0,1)(0,1)
+E	SAME		x		NOMATCH
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+
+E	([a]*)*		a		(0,1)(0,1)
+#E	SAME		x		(0,0)(0,0)
+E	SAME		x		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([a]*)+		a		(0,1)(0,1)
+E	SAME		x		(0,0)(0,0)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaax		(0,6)(0,6)
+E	([^b]*)*	a		(0,1)(0,1)
+#E	SAME		b		(0,0)(0,0)
+E	SAME		b		(0,0)(?,?)	RE2/Go
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		aaaaaab		(0,6)(0,6)
+E	([ab]*)*	a		(0,1)(0,1)
+E	SAME		aaaaaa		(0,6)(0,6)
+E	SAME		ababab		(0,6)(0,6)
+E	SAME		bababa		(0,6)(0,6)
+E	SAME		b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+E	SAME		aaaabcde	(0,5)(0,5)
+E	([^a]*)*	b		(0,1)(0,1)
+E	SAME		bbbbbb		(0,6)(0,6)
+#E	SAME		aaaaaa		(0,0)(0,0)
+E	SAME		aaaaaa		(0,0)(?,?)	RE2/Go
+E	([^ab]*)*	ccccxx		(0,6)(0,6)
+#E	SAME		ababab		(0,0)(0,0)
+E	SAME		ababab		(0,0)(?,?)	RE2/Go
+
+E	((z)+|a)*	zabcde		(0,2)(1,2)
+
+#{E	a+?		aaaaaa		(0,1)	no *? +? mimimal match ops
+#E	(a)		aaa		(0,1)(0,1)
+#E	(a*?)		aaa		(0,0)(0,0)
+#E	(a)*?		aaa		(0,0)
+#E	(a*?)*?		aaa		(0,0)
+#}
+
+B	\(a*\)*\(x\)		x	(0,1)(0,0)(0,1)
+B	\(a*\)*\(x\)		ax	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)		axa	(0,2)(0,1)(1,2)
+B	\(a*\)*\(x\)\(\1\)	x	(0,1)(0,0)(0,1)(1,1)
+B	\(a*\)*\(x\)\(\1\)	ax	(0,2)(1,1)(1,2)(2,2)
+B	\(a*\)*\(x\)\(\1\)	axa	(0,3)(0,1)(1,2)(2,3)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axax	(0,4)(0,1)(1,2)(2,3)(3,4)
+B	\(a*\)*\(x\)\(\1\)\(x\)	axxa	(0,3)(1,1)(1,2)(2,2)(2,3)
+
+#E	(a*)*(x)		x	(0,1)(0,0)(0,1)
+E	(a*)*(x)		x	(0,1)(?,?)(0,1)	RE2/Go
+E	(a*)*(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)*(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*)+(x)		x	(0,1)(0,0)(0,1)
+E	(a*)+(x)		ax	(0,2)(0,1)(1,2)
+E	(a*)+(x)		axa	(0,2)(0,1)(1,2)
+
+E	(a*){2}(x)		x	(0,1)(0,0)(0,1)
+E	(a*){2}(x)		ax	(0,2)(1,1)(1,2)
+E	(a*){2}(x)		axa	(0,2)(1,1)(1,2)
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/src/testdata/repetition.dat
@@ -0,0 +1,163 @@
+NOTE	implicit vs. explicit repetitions : 2009-02-02
+
+# Glenn Fowler <gsf@research.att.com>
+# conforming matches (column 4) must match one of the following BREs
+#	NOMATCH
+#	(0,.)\((\(.\),\(.\))(?,?)(\2,\3)\)*
+#	(0,.)\((\(.\),\(.\))(\2,\3)(?,?)\)*
+# i.e., each 3-tuple has two identical elements and one (?,?)
+
+E	((..)|(.))				NULL		NOMATCH
+E	((..)|(.))((..)|(.))			NULL		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		NULL		NOMATCH
+
+E	((..)|(.)){1}				NULL		NOMATCH
+E	((..)|(.)){2}				NULL		NOMATCH
+E	((..)|(.)){3}				NULL		NOMATCH
+
+E	((..)|(.))*				NULL		(0,0)
+
+E	((..)|(.))				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.))((..)|(.))			a		NOMATCH
+E	((..)|(.))((..)|(.))((..)|(.))		a		NOMATCH
+
+E	((..)|(.)){1}				a		(0,1)(0,1)(?,?)(0,1)
+E	((..)|(.)){2}				a		NOMATCH
+E	((..)|(.)){3}				a		NOMATCH
+
+E	((..)|(.))*				a		(0,1)(0,1)(?,?)(0,1)
+
+E	((..)|(.))				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aa		(0,2)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)
+E	((..)|(.))((..)|(.))((..)|(.))		aa		NOMATCH
+
+E	((..)|(.)){1}				aa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aa		(0,2)(1,2)(?,?)(1,2)
+E	((..)|(.)){3}				aa		NOMATCH
+
+E	((..)|(.))*				aa		(0,2)(0,2)(0,2)(?,?)
+
+E	((..)|(.))				aaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaa		(0,3)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)
+E	((..)|(.))((..)|(.))((..)|(.))		aaa		(0,3)(0,1)(?,?)(0,1)(1,2)(?,?)(1,2)(2,3)(?,?)(2,3)
+
+E	((..)|(.)){1}				aaa		(0,2)(0,2)(0,2)(?,?)
+#E	((..)|(.)){2}				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.)){2}				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+E	((..)|(.)){3}				aaa		(0,3)(2,3)(?,?)(2,3)
+
+#E	((..)|(.))*				aaa		(0,3)(2,3)(?,?)(2,3)
+E	((..)|(.))*				aaa		(0,3)(2,3)(0,2)(2,3)	RE2/Go
+
+E	((..)|(.))				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaa		(0,4)(0,2)(0,2)(?,?)(2,3)(?,?)(2,3)(3,4)(?,?)(3,4)
+
+E	((..)|(.)){1}				aaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaa		(0,4)(3,4)(?,?)(3,4)
+E	((..)|(.)){3}				aaaa		(0,4)(3,4)(0,2)(3,4)	RE2/Go
+
+E	((..)|(.))*				aaaa		(0,4)(2,4)(2,4)(?,?)
+
+E	((..)|(.))				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaa		(0,5)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,5)(?,?)(4,5)
+
+E	((..)|(.)){1}				aaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaa		(0,4)(2,4)(2,4)(?,?)
+#E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.)){3}				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+#E	((..)|(.))*				aaaaa		(0,5)(4,5)(?,?)(4,5)
+E	((..)|(.))*				aaaaa		(0,5)(4,5)(2,4)(4,5)	RE2/Go
+
+E	((..)|(.))				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.))((..)|(.))			aaaaaa		(0,4)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)
+E	((..)|(.))((..)|(.))((..)|(.))		aaaaaa		(0,6)(0,2)(0,2)(?,?)(2,4)(2,4)(?,?)(4,6)(4,6)(?,?)
+
+E	((..)|(.)){1}				aaaaaa		(0,2)(0,2)(0,2)(?,?)
+E	((..)|(.)){2}				aaaaaa		(0,4)(2,4)(2,4)(?,?)
+E	((..)|(.)){3}				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+E	((..)|(.))*				aaaaaa		(0,6)(4,6)(4,6)(?,?)
+
+NOTE	additional repetition tests graciously provided by Chris Kuklewicz www.haskell.org 2009-02-02
+
+# These test a bug in OS X / FreeBSD / NetBSD, and libtree. 
+# Linux/GLIBC gets the {8,} and {8,8} wrong.
+
+:HA#100:E	X(.?){0,}Y	X1234567Y	(0,9)(7,8)
+:HA#101:E	X(.?){1,}Y	X1234567Y	(0,9)(7,8)
+:HA#102:E	X(.?){2,}Y	X1234567Y	(0,9)(7,8)
+:HA#103:E	X(.?){3,}Y	X1234567Y	(0,9)(7,8)
+:HA#104:E	X(.?){4,}Y	X1234567Y	(0,9)(7,8)
+:HA#105:E	X(.?){5,}Y	X1234567Y	(0,9)(7,8)
+:HA#106:E	X(.?){6,}Y	X1234567Y	(0,9)(7,8)
+:HA#107:E	X(.?){7,}Y	X1234567Y	(0,9)(7,8)
+:HA#108:E	X(.?){8,}Y	X1234567Y	(0,9)(8,8)
+#:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(7,8)
+:HA#110:E	X(.?){0,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(7,8)
+:HA#111:E	X(.?){1,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(7,8)
+:HA#112:E	X(.?){2,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(7,8)
+:HA#113:E	X(.?){3,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(7,8)
+:HA#114:E	X(.?){4,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(7,8)
+:HA#115:E	X(.?){5,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(7,8)
+:HA#116:E	X(.?){6,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+#:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(7,8)
+:HA#117:E	X(.?){7,8}Y	X1234567Y	(0,9)(8,8)	RE2/Go
+:HA#118:E	X(.?){8,8}Y	X1234567Y	(0,9)(8,8)
+
+# These test a fixed bug in my regex-tdfa that did not keep the expanded
+# form properly grouped, so right association did the wrong thing with
+# these ambiguous patterns (crafted just to test my code when I became
+# suspicious of my implementation).  The first subexpression should use
+# "ab" then "a" then "bcd".
+
+# OS X / FreeBSD / NetBSD badly fail many of these, with impossible
+# results like (0,6)(4,5)(6,6).
+
+:HA#260:E	(a|ab|c|bcd){0,}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#261:E	(a|ab|c|bcd){1,}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#262:E	(a|ab|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#263:E	(a|ab|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#264:E	(a|ab|c|bcd){4,}(d*)	ababcd	NOMATCH
+:HA#265:E	(a|ab|c|bcd){0,10}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#266:E	(a|ab|c|bcd){1,10}(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#267:E	(a|ab|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#268:E	(a|ab|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#269:E	(a|ab|c|bcd){4,10}(d*)	ababcd	NOMATCH
+:HA#270:E	(a|ab|c|bcd)*(d*)	ababcd	(0,1)(0,1)(1,1)
+:HA#271:E	(a|ab|c|bcd)+(d*)	ababcd	(0,1)(0,1)(1,1)
+
+# The above worked on Linux/GLIBC but the following often fail.
+# They also trip up OS X / FreeBSD / NetBSD:
+
+#:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#280:E	(ab|a|c|bcd){0,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#281:E	(ab|a|c|bcd){1,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#282:E	(ab|a|c|bcd){2,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#283:E	(ab|a|c|bcd){3,}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#284:E	(ab|a|c|bcd){4,}(d*)	ababcd	NOMATCH
+#:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#285:E	(ab|a|c|bcd){0,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#286:E	(ab|a|c|bcd){1,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#287:E	(ab|a|c|bcd){2,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#288:E	(ab|a|c|bcd){3,10}(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+:HA#289:E	(ab|a|c|bcd){4,10}(d*)	ababcd	NOMATCH
+#:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#290:E	(ab|a|c|bcd)*(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
+#:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(3,6)(6,6)
+:HA#291:E	(ab|a|c|bcd)+(d*)	ababcd	(0,6)(4,5)(5,6)	RE2/Go
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/api.rs
@@ -0,0 +1,182 @@
+#[test]
+fn empty_regex_empty_match() {
+    let re = regex!("");
+    assert_eq!(vec![(0, 0)], findall!(re, ""));
+}
+
+#[test]
+fn empty_regex_nonempty_match() {
+    let re = regex!("");
+    assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc"));
+}
+
+#[test]
+fn one_zero_length_match() {
+    let re = regex!(r"\d*");
+    assert_eq!(vec![(0, 0), (1, 2), (3, 4)], findall!(re, "a1b2"));
+}
+
+#[test]
+fn many_zero_length_match() {
+    let re = regex!(r"\d*");
+    assert_eq!(vec![(0, 0), (1, 2), (3, 3), (4, 4), (5, 6)],
+               findall!(re, "a1bbb2"));
+}
+
+#[test]
+fn many_sequential_zero_length_match() {
+    let re = regex!(r"\d?");
+    assert_eq!(vec![(0, 0), (1, 2), (2, 3), (4, 5), (6, 6)],
+               findall!(re, "a12b3c"));
+}
+
+#[test]
+fn quoted_bracket_set() {
+    let re = regex!(r"([\x{5b}\x{5d}])");
+    assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]"));
+    let re = regex!(r"([\[\]])");
+    assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]"));
+}
+
+#[test]
+fn first_range_starts_with_left_bracket() {
+    let re = regex!(r"([\[-z])");
+    assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]"));
+}
+
+#[test]
+fn range_ends_with_escape() {
+    let re = regex!(r"([\[-\x{5d}])");
+    assert_eq!(vec![(0, 1), (1, 2)], findall!(re, "[]"));
+}
+
+#[test]
+fn empty_match_find_iter() {
+    let re = regex!(r".*?");
+    assert_eq!(vec![(0, 0), (1, 1), (2, 2), (3, 3)], findall!(re, "abc"));
+}
+
+#[test]
+fn empty_match_captures_iter() {
+    let re = regex!(r".*?");
+    let ms: Vec<_> = re.captures_iter(text!("abc"))
+                       .map(|c| c.get(0).unwrap())
+                       .map(|m| (m.start(), m.end()))
+                       .collect();
+    assert_eq!(ms, vec![(0, 0), (1, 1), (2, 2), (3, 3)]);
+}
+
+#[test]
+fn capture_names() {
+    let re = regex!(r"(.)(?P<a>.)");
+    assert_eq!(3, re.captures_len());
+    assert_eq!((3, Some(3)), re.capture_names().size_hint());
+    assert_eq!(vec![None, None, Some("a")],
+               re.capture_names().collect::<Vec<_>>());
+}
+
+#[test]
+fn regex_string() {
+    assert_eq!(r"[a-zA-Z0-9]+", regex!(r"[a-zA-Z0-9]+").as_str());
+    assert_eq!(r"[a-zA-Z0-9]+", &format!("{}", regex!(r"[a-zA-Z0-9]+")));
+    assert_eq!(r"[a-zA-Z0-9]+", &format!("{:?}", regex!(r"[a-zA-Z0-9]+")));
+}
+
+#[test]
+fn capture_index() {
+    let re = regex!(r"^(?P<name>.+)$");
+    let cap = re.captures(t!("abc")).unwrap();
+    assert_eq!(&cap[0], t!("abc"));
+    assert_eq!(&cap[1], t!("abc"));
+    assert_eq!(&cap["name"], t!("abc"));
+}
+
+#[test]
+#[should_panic]
+#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)]
+fn capture_index_panic_usize() {
+    let re = regex!(r"^(?P<name>.+)$");
+    let cap = re.captures(t!("abc")).unwrap();
+    let _ = cap[2];
+}
+
+#[test]
+#[should_panic]
+#[cfg_attr(all(target_env = "msvc", target_pointer_width = "32"), ignore)]
+fn capture_index_panic_name() {
+    let re = regex!(r"^(?P<name>.+)$");
+    let cap = re.captures(t!("abc")).unwrap();
+    let _ = cap["bad name"];
+}
+
+#[test]
+fn capture_index_lifetime() {
+    // This is a test of whether the types on `caps["..."]` are general
+    // enough. If not, this will fail to typecheck.
+    fn inner(s: &str) -> usize {
+        let re = regex!(r"(?P<number>\d+)");
+        let caps = re.captures(t!(s)).unwrap();
+        caps["number"].len()
+    }
+    assert_eq!(3, inner("123"));
+}
+
+#[test]
+fn capture_misc() {
+    let re = regex!(r"(.)(?P<a>a)?(.)(?P<b>.)");
+    let cap = re.captures(t!("abc")).unwrap();
+
+    assert_eq!(5, cap.len());
+
+    assert_eq!((0, 3), { let m = cap.get(0).unwrap(); (m.start(), m.end()) });
+    assert_eq!(None, cap.get(2));
+    assert_eq!((2, 3), { let m = cap.get(4).unwrap(); (m.start(), m.end()) });
+
+    assert_eq!(t!("abc"), match_text!(cap.get(0).unwrap()));
+    assert_eq!(None, cap.get(2));
+    assert_eq!(t!("c"), match_text!(cap.get(4).unwrap()));
+
+    assert_eq!(None, cap.name("a"));
+    assert_eq!(t!("c"), match_text!(cap.name("b").unwrap()));
+}
+
+#[test]
+fn sub_capture_matches() {
+    let re = regex!(r"([a-z])(([a-z])|([0-9]))");
+    let cap = re.captures(t!("a5")).unwrap();
+    let subs: Vec<_> = cap.iter().collect();
+
+    assert_eq!(5, subs.len());
+    assert!(subs[0].is_some());
+    assert!(subs[1].is_some());
+    assert!(subs[2].is_some());
+    assert!(subs[3].is_none());
+    assert!(subs[4].is_some());
+
+    assert_eq!(t!("a5"), match_text!(subs[0].unwrap()));
+    assert_eq!(t!("a"), match_text!(subs[1].unwrap()));
+    assert_eq!(t!("5"), match_text!(subs[2].unwrap()));
+    assert_eq!(t!("5"), match_text!(subs[4].unwrap()));
+}
+
+expand!(expand1, r"(?P<foo>\w+)", "abc", "$foo", "abc");
+expand!(expand2, r"(?P<foo>\w+)", "abc", "$0", "abc");
+expand!(expand3, r"(?P<foo>\w+)", "abc", "$1", "abc");
+expand!(expand4, r"(?P<foo>\w+)", "abc", "$$1", "$1");
+expand!(expand5, r"(?P<foo>\w+)", "abc", "$$foo", "$foo");
+expand!(expand6, r"(?P<a>\w+)\s+(?P<b>\d+)",
+        "abc 123", "$b$a", "123abc");
+expand!(expand7, r"(?P<a>\w+)\s+(?P<b>\d+)",
+        "abc 123", "z$bz$az", "z");
+expand!(expand8, r"(?P<a>\w+)\s+(?P<b>\d+)",
+        "abc 123", ".$b.$a.", ".123.abc.");
+expand!(expand9, r"(?P<a>\w+)\s+(?P<b>\d+)",
+        "abc 123", " $b $a ", " 123 abc ");
+expand!(expand10, r"(?P<a>\w+)\s+(?P<b>\d+)",
+        "abc 123", "$bz$az", "");
+
+split!(split1, r"\s+", "a b\nc\td\n\t e",
+       &[t!("a"), t!("b"), t!("c"), t!("d"), t!("e")]);
+split!(split2, r"\b", "a b c",
+       &[t!(""), t!("a"), t!(" "), t!("b"), t!(" "), t!("c")]);
+split!(split3, r"a$", "a", &[t!("")]);
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/flags.rs
@@ -0,0 +1,11 @@
+mat!(match_flag_case, "(?i)abc", "ABC", Some((0, 3)));
+mat!(match_flag_weird_case, "(?i)a(?-i)bc", "Abc", Some((0, 3)));
+mat!(match_flag_weird_case_not, "(?i)a(?-i)bc", "ABC", None);
+mat!(match_flag_case_dotnl, "(?is)a.", "A\n", Some((0, 2)));
+mat!(match_flag_case_dotnl_toggle, "(?is)a.(?-is)a.", "A\nab", Some((0, 4)));
+mat!(match_flag_case_dotnl_toggle_not, "(?is)a.(?-is)a.", "A\na\n", None);
+mat!(match_flag_case_dotnl_toggle_ok, "(?is)a.(?-is:a.)?", "A\na\n", Some((0, 2)));
+mat!(match_flag_multi, "(?m)(?:^\\d+$\n?)+", "123\n456\n789", Some((0, 11)));
+mat!(match_flag_ungreedy, "(?U)a+", "aa", Some((0, 1)));
+mat!(match_flag_ungreedy_greedy, "(?U)a+?", "aa", Some((0, 2)));
+mat!(match_flag_ungreedy_noop, "(?U)(?-U)a+", "aa", Some((0, 2)));
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/fowler.rs
@@ -0,0 +1,371 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// DO NOT EDIT. Automatically generated by 'scripts/regex-match-tests.py'
+// on 2015-02-28 11:00:00.161706.
+
+// Tests from basic.dat
+mat!(match_basic_3, r"abracadabra$", r"abracadabracadabra", Some((7, 18)));
+mat!(match_basic_4, r"a...b", r"abababbb", Some((2, 7)));
+mat!(match_basic_5, r"XXXXXX", r"..XXXXXX", Some((2, 8)));
+mat!(match_basic_6, r"\)", r"()", Some((1, 2)));
+mat!(match_basic_7, r"a]", r"a]a", Some((0, 2)));
+mat!(match_basic_9, r"\}", r"}", Some((0, 1)));
+mat!(match_basic_10, r"\]", r"]", Some((0, 1)));
+mat!(match_basic_12, r"]", r"]", Some((0, 1)));
+mat!(match_basic_15, r"^a", r"ax", Some((0, 1)));
+mat!(match_basic_16, r"\^a", r"a^a", Some((1, 3)));
+mat!(match_basic_17, r"a\^", r"a^", Some((0, 2)));
+mat!(match_basic_18, r"a$", r"aa", Some((1, 2)));
+mat!(match_basic_19, r"a\$", r"a$", Some((0, 2)));
+mat!(match_basic_20, r"^$", r"", Some((0, 0)));
+mat!(match_basic_21, r"$^", r"", Some((0, 0)));
+mat!(match_basic_22, r"a($)", r"aa", Some((1, 2)), Some((2, 2)));
+mat!(match_basic_23, r"a*(^a)", r"aa", Some((0, 1)), Some((0, 1)));
+mat!(match_basic_24, r"(..)*(...)*", r"a", Some((0, 0)));
+mat!(match_basic_25, r"(..)*(...)*", r"abcd", Some((0, 4)), Some((2, 4)));
+mat!(match_basic_26, r"(ab|a)(bc|c)", r"abc", Some((0, 3)), Some((0, 2)), Some((2, 3)));
+mat!(match_basic_27, r"(ab)c|abc", r"abc", Some((0, 3)), Some((0, 2)));
+mat!(match_basic_28, r"a{0}b", r"ab", Some((1, 2)));
+mat!(match_basic_29, r"(a*)(b?)(b+)b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7)));
+mat!(match_basic_30, r"(a*)(b{0,1})(b{1,})b{3}", r"aaabbbbbbb", Some((0, 10)), Some((0, 3)), Some((3, 4)), Some((4, 7)));
+mat!(match_basic_32, r"((a|a)|a)", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1)));
+mat!(match_basic_33, r"(a*)(a|aa)", r"aaaa", Some((0, 4)), Some((0, 3)), Some((3, 4)));
+mat!(match_basic_34, r"a*(a.|aa)", r"aaaa", Some((0, 4)), Some((2, 4)));
+mat!(match_basic_35, r"a(b)|c(d)|a(e)f", r"aef", Some((0, 3)), None, None, Some((1, 2)));
+mat!(match_basic_36, r"(a|b)?.*", r"b", Some((0, 1)), Some((0, 1)));
+mat!(match_basic_37, r"(a|b)c|a(b|c)", r"ac", Some((0, 2)), Some((0, 1)));
+mat!(match_basic_38, r"(a|b)c|a(b|c)", r"ab", Some((0, 2)), None, Some((1, 2)));
+mat!(match_basic_39, r"(a|b)*c|(a|ab)*c", r"abc", Some((0, 3)), Some((1, 2)));
+mat!(match_basic_40, r"(a|b)*c|(a|ab)*c", r"xc", Some((1, 2)));
+mat!(match_basic_41, r"(.a|.b).*|.*(.a|.b)", r"xa", Some((0, 2)), Some((0, 2)));
+mat!(match_basic_42, r"a?(ab|ba)ab", r"abab", Some((0, 4)), Some((0, 2)));
+mat!(match_basic_43, r"a?(ac{0}b|ba)ab", r"abab", Some((0, 4)), Some((0, 2)));
+mat!(match_basic_44, r"ab|abab", r"abbabab", Some((0, 2)));
+mat!(match_basic_45, r"aba|bab|bba", r"baaabbbaba", Some((5, 8)));
+mat!(match_basic_46, r"aba|bab", r"baaabbbaba", Some((6, 9)));
+mat!(match_basic_47, r"(aa|aaa)*|(a|aaaaa)", r"aa", Some((0, 2)), Some((0, 2)));
+mat!(match_basic_48, r"(a.|.a.)*|(a|.a...)", r"aa", Some((0, 2)), Some((0, 2)));
+mat!(match_basic_49, r"ab|a", r"xabc", Some((1, 3)));
+mat!(match_basic_50, r"ab|a", r"xxabc", Some((2, 4)));
+mat!(match_basic_51, r"(?i)(Ab|cD)*", r"aBcD", Some((0, 4)), Some((2, 4)));
+mat!(match_basic_52, r"[^-]", r"--a", Some((2, 3)));
+mat!(match_basic_53, r"[a-]*", r"--a", Some((0, 3)));
+mat!(match_basic_54, r"[a-m-]*", r"--amoma--", Some((0, 4)));
+mat!(match_basic_55, r":::1:::0:|:::1:1:0:", r":::0:::1:::1:::0:", Some((8, 17)));
+mat!(match_basic_56, r":::1:::0:|:::1:1:1:", r":::0:::1:::1:::0:", Some((8, 17)));
+mat!(match_basic_57, r"[[:upper:]]", r"A", Some((0, 1)));
+mat!(match_basic_58, r"[[:lower:]]+", r"`az{", Some((1, 3)));
+mat!(match_basic_59, r"[[:upper:]]+", r"@AZ[", Some((1, 3)));
+mat!(match_basic_65, r"
+", r"
+", Some((0, 1)));
+mat!(match_basic_66, r"
+", r"
+", Some((0, 1)));
+mat!(match_basic_67, r"[^a]", r"
+", Some((0, 1)));
+mat!(match_basic_68, r"
+a", r"
+a", Some((0, 2)));
+mat!(match_basic_69, r"(a)(b)(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((2, 3)));
+mat!(match_basic_70, r"xxx", r"xxx", Some((0, 3)));
+mat!(match_basic_71, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 6,", Some((0, 6)));
+mat!(match_basic_72, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"2/7", Some((0, 3)));
+mat!(match_basic_73, r"(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)", r"feb 1,Feb 6", Some((5, 11)));
+mat!(match_basic_74, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))", r"x", Some((0, 1)), Some((0, 1)), Some((0, 1)));
+mat!(match_basic_75, r"((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*", r"xx", Some((0, 2)), Some((1, 2)), Some((1, 2)));
+mat!(match_basic_76, r"a?(ab|ba)*", r"ababababababababababababababababababababababababababababababababababababababababa", Some((0, 81)), Some((79, 81)));
+mat!(match_basic_77, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabbbbaa", Some((18, 25)));
+mat!(match_basic_78, r"abaa|abbaa|abbbaa|abbbbaa", r"ababbabbbabbbabbbbabaa", Some((18, 22)));
+mat!(match_basic_79, r"aaac|aabc|abac|abbc|baac|babc|bbac|bbbc", r"baaabbbabac", Some((7, 11)));
+mat!(match_basic_80, r".*", r"", Some((0, 2)));
+mat!(match_basic_81, r"aaaa|bbbb|cccc|ddddd|eeeeee|fffffff|gggg|hhhh|iiiii|jjjjj|kkkkk|llll", r"XaaaXbbbXcccXdddXeeeXfffXgggXhhhXiiiXjjjXkkkXlllXcbaXaaaa", Some((53, 57)));
+mat!(match_basic_83, r"a*a*a*a*a*b", r"aaaaaaaaab", Some((0, 10)));
+mat!(match_basic_84, r"^", r"", Some((0, 0)));
+mat!(match_basic_85, r"$", r"", Some((0, 0)));
+mat!(match_basic_86, r"^$", r"", Some((0, 0)));
+mat!(match_basic_87, r"^a$", r"a", Some((0, 1)));
+mat!(match_basic_88, r"abc", r"abc", Some((0, 3)));
+mat!(match_basic_89, r"abc", r"xabcy", Some((1, 4)));
+mat!(match_basic_90, r"abc", r"ababc", Some((2, 5)));
+mat!(match_basic_91, r"ab*c", r"abc", Some((0, 3)));
+mat!(match_basic_92, r"ab*bc", r"abc", Some((0, 3)));
+mat!(match_basic_93, r"ab*bc", r"abbc", Some((0, 4)));
+mat!(match_basic_94, r"ab*bc", r"abbbbc", Some((0, 6)));
+mat!(match_basic_95, r"ab+bc", r"abbc", Some((0, 4)));
+mat!(match_basic_96, r"ab+bc", r"abbbbc", Some((0, 6)));
+mat!(match_basic_97, r"ab?bc", r"abbc", Some((0, 4)));
+mat!(match_basic_98, r"ab?bc", r"abc", Some((0, 3)));
+mat!(match_basic_99, r"ab?c", r"abc", Some((0, 3)));
+mat!(match_basic_100, r"^abc$", r"abc", Some((0, 3)));
+mat!(match_basic_101, r"^abc", r"abcc", Some((0, 3)));
+mat!(match_basic_102, r"abc$", r"aabc", Some((1, 4)));
+mat!(match_basic_103, r"^", r"abc", Some((0, 0)));
+mat!(match_basic_104, r"$", r"abc", Some((3, 3)));
+mat!(match_basic_105, r"a.c", r"abc", Some((0, 3)));
+mat!(match_basic_106, r"a.c", r"axc", Some((0, 3)));
+mat!(match_basic_107, r"a.*c", r"axyzc", Some((0, 5)));
+mat!(match_basic_108, r"a[bc]d", r"abd", Some((0, 3)));
+mat!(match_basic_109, r"a[b-d]e", r"ace", Some((0, 3)));
+mat!(match_basic_110, r"a[b-d]", r"aac", Some((1, 3)));
+mat!(match_basic_111, r"a[-b]", r"a-", Some((0, 2)));
+mat!(match_basic_112, r"a[b-]", r"a-", Some((0, 2)));
+mat!(match_basic_113, r"a]", r"a]", Some((0, 2)));
+mat!(match_basic_114, r"a[]]b", r"a]b", Some((0, 3)));
+mat!(match_basic_115, r"a[^bc]d", r"aed", Some((0, 3)));
+mat!(match_basic_116, r"a[^-b]c", r"adc", Some((0, 3)));
+mat!(match_basic_117, r"a[^]b]c", r"adc", Some((0, 3)));
+mat!(match_basic_118, r"ab|cd", r"abc", Some((0, 2)));
+mat!(match_basic_119, r"ab|cd", r"abcd", Some((0, 2)));
+mat!(match_basic_120, r"a\(b", r"a(b", Some((0, 3)));
+mat!(match_basic_121, r"a\(*b", r"ab", Some((0, 2)));
+mat!(match_basic_122, r"a\(*b", r"a((b", Some((0, 4)));
+mat!(match_basic_123, r"((a))", r"abc", Some((0, 1)), Some((0, 1)), Some((0, 1)));
+mat!(match_basic_124, r"(a)b(c)", r"abc", Some((0, 3)), Some((0, 1)), Some((2, 3)));
+mat!(match_basic_125, r"a+b+c", r"aabbabc", Some((4, 7)));
+mat!(match_basic_126, r"a*", r"aaa", Some((0, 3)));
+mat!(match_basic_128, r"(a*)*", r"-", Some((0, 0)), None);
+mat!(match_basic_129, r"(a*)+", r"-", Some((0, 0)), Some((0, 0)));
+mat!(match_basic_131, r"(a*|b)*", r"-", Some((0, 0)), None);
+mat!(match_basic_132, r"(a+|b)*", r"ab", Some((0, 2)), Some((1, 2)));
+mat!(match_basic_133, r"(a+|b)+", r"ab", Some((0, 2)), Some((1, 2)));
+mat!(match_basic_134, r"(a+|b)?", r"ab", Some((0, 1)), Some((0, 1)));
+mat!(match_basic_135, r"[^ab]*", r"cde", Some((0, 3)));
+mat!(match_basic_137, r"(^)*", r"-", Some((0, 0)), None);
+mat!(match_basic_138, r"a*", r"", Some((0, 0)));
+mat!(match_basic_139, r"([abc])*d", r"abbbcd", Some((0, 6)), Some((4, 5)));
+mat!(match_basic_140, r"([abc])*bcd", r"abcd", Some((0, 4)), Some((0, 1)));
+mat!(match_basic_141, r"a|b|c|d|e", r"e", Some((0, 1)));
+mat!(match_basic_142, r"(a|b|c|d|e)f", r"ef", Some((0, 2)), Some((0, 1)));
+mat!(match_basic_144, r"((a*|b))*", r"-", Some((0, 0)), None, None);
+mat!(match_basic_145, r"abcd*efg", r"abcdefg", Some((0, 7)));
+mat!(match_basic_146, r"ab*", r"xabyabbbz", Some((1, 3)));
+mat!(match_basic_147, r"ab*", r"xayabbbz", Some((1, 2)));
+mat!(match_basic_148, r"(ab|cd)e", r"abcde", Some((2, 5)), Some((2, 4)));
+mat!(match_basic_149, r"[abhgefdc]ij", r"hij", Some((0, 3)));
+mat!(match_basic_150, r"(a|b)c*d", r"abcd", Some((1, 4)), Some((1, 2)));
+mat!(match_basic_151, r"(ab|ab*)bc", r"abc", Some((0, 3)), Some((0, 1)));
+mat!(match_basic_152, r"a([bc]*)c*", r"abc", Some((0, 3)), Some((1, 3)));
+mat!(match_basic_153, r"a([bc]*)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4)));
+mat!(match_basic_154, r"a([bc]+)(c*d)", r"abcd", Some((0, 4)), Some((1, 3)), Some((3, 4)));
+mat!(match_basic_155, r"a([bc]*)(c+d)", r"abcd", Some((0, 4)), Some((1, 2)), Some((2, 4)));
+mat!(match_basic_156, r"a[bcd]*dcdcde", r"adcdcde", Some((0, 7)));
+mat!(match_basic_157, r"(ab|a)b*c", r"abc", Some((0, 3)), Some((0, 2)));
+mat!(match_basic_158, r"((a)(b)c)(d)", r"abcd", Some((0, 4)), Some((0, 3)), Some((0, 1)), Some((1, 2)), Some((3, 4)));
+mat!(match_basic_159, r"[A-Za-z_][A-Za-z0-9_]*", r"alpha", Some((0, 5)));
+mat!(match_basic_160, r"^a(bc+|b[eh])g|.h$", r"abh", Some((1, 3)));
+mat!(match_basic_161, r"(bc+d$|ef*g.|h?i(j|k))", r"effgz", Some((0, 5)), Some((0, 5)));
+mat!(match_basic_162, r"(bc+d$|ef*g.|h?i(j|k))", r"ij", Some((0, 2)), Some((0, 2)), Some((1, 2)));
+mat!(match_basic_163, r"(bc+d$|ef*g.|h?i(j|k))", r"reffgz", Some((1, 6)), Some((1, 6)));
+mat!(match_basic_164, r"(((((((((a)))))))))", r"a", Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)), Some((0, 1)));
+mat!(match_basic_165, r"multiple words", r"multiple words yeah", Some((0, 14)));
+mat!(match_basic_166, r"(.*)c(.*)", r"abcde", Some((0, 5)), Some((0, 2)), Some((3, 5)));
+mat!(match_basic_167, r"abcd", r"abcd", Some((0, 4)));
+mat!(match_basic_168, r"a(bc)d", r"abcd", Some((0, 4)), Some((1, 3)));
+mat!(match_basic_169, r"a[-]?c", r"ac", Some((0, 3)));
+mat!(match_basic_170, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qaddafi", Some((0, 15)), None, Some((10, 12)));
+mat!(match_basic_171, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mo'ammar Gadhafi", Some((0, 16)), None, Some((11, 13)));
+mat!(match_basic_172, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Kaddafi", Some((0, 15)), None, Some((10, 12)));
+mat!(match_basic_173, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Qadhafi", Some((0, 15)), None, Some((10, 12)));
+mat!(match_basic_174, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gadafi", Some((0, 14)), None, Some((10, 11)));
+mat!(match_basic_175, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadafi", Some((0, 15)), None, Some((11, 12)));
+mat!(match_basic_176, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moamar Gaddafi", Some((0, 14)), None, Some((9, 11)));
+mat!(match_basic_177, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Mu'ammar Qadhdhafi", Some((0, 18)), None, Some((13, 15)));
+mat!(match_basic_178, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Khaddafi", Some((0, 16)), None, Some((11, 13)));
+mat!(match_basic_179, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafy", Some((0, 16)), None, Some((11, 13)));
+mat!(match_basic_180, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghadafi", Some((0, 15)), None, Some((11, 12)));
+mat!(match_basic_181, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Ghaddafi", Some((0, 16)), None, Some((11, 13)));
+mat!(match_basic_182, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muamar Kaddafi", Some((0, 14)), None, Some((9, 11)));
+mat!(match_basic_183, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Quathafi", Some((0, 16)), None, Some((11, 13)));
+mat!(match_basic_184, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Muammar Gheddafi", Some((0, 16)), None, Some((11, 13)));
+mat!(match_basic_185, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Khadafy", Some((0, 15)), None, Some((11, 12)));
+mat!(match_basic_186, r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]", r"Moammar Qudhafi", Some((0, 15)), None, Some((10, 12)));
+mat!(match_basic_187, r"a+(b|c)*d+", r"aabcdd", Some((0, 6)), Some((3, 4)));
+mat!(match_basic_188, r"^.+$", r"vivi", Some((0, 4)));
+mat!(match_basic_189, r"^(.+)$", r"vivi", Some((0, 4)), Some((0, 4)));
+mat!(match_basic_190, r"^([^!.]+).att.com!(.+)$", r"gryphon.att.com!eby", Some((0, 19)), Some((0, 7)), Some((16, 19)));
+mat!(match_basic_191, r"^([^!]+!)?([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3)));
+mat!(match_basic_192, r"^([^!]+!)?([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7)));
+mat!(match_basic_193, r"^([^!]+!)?([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7)));
+mat!(match_basic_194, r"^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), Some((4, 8)), Some((8, 11)));
+mat!(match_basic_195, r"((foo)|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), None, Some((0, 3)));
+mat!(match_basic_196, r"((foo)|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), None, Some((4, 7)));
+mat!(match_basic_197, r"((foo)|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3)));
+mat!(match_basic_198, r"((foo)|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3)));
+mat!(match_basic_199, r"((foo)|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)));
+mat!(match_basic_200, r"((foo)|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3)), Some((0, 3)));
+mat!(match_basic_201, r"(foo|(bar))!bas", r"bar!bas", Some((0, 7)), Some((0, 3)), Some((0, 3)));
+mat!(match_basic_202, r"(foo|(bar))!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)), Some((4, 7)));
+mat!(match_basic_203, r"(foo|(bar))!bas", r"foo!bas", Some((0, 7)), Some((0, 3)));
+mat!(match_basic_204, r"(foo|bar)!bas", r"bar!bas", Some((0, 7)), Some((0, 3)));
+mat!(match_basic_205, r"(foo|bar)!bas", r"foo!bar!bas", Some((4, 11)), Some((4, 7)));
+mat!(match_basic_206, r"(foo|bar)!bas", r"foo!bas", Some((0, 7)), Some((0, 3)));
+mat!(match_basic_207, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11)));
+mat!(match_basic_208, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bas", Some((0, 3)), None, Some((0, 3)));
+mat!(match_basic_209, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"bar!bas", Some((0, 7)), Some((0, 4)), Some((4, 7)));
+mat!(match_basic_210, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bar!bas", Some((0, 11)), None, None, Some((4, 8)), Some((8, 11)));
+mat!(match_basic_211, r"^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$", r"foo!bas", Some((0, 7)), Some((0, 4)), Some((4, 7)));
+mat!(match_basic_212, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bas", Some((0, 3)), Some((0, 3)), None, Some((0, 3)));
+mat!(match_basic_213, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"bar!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7)));
+mat!(match_basic_214, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bar!bas", Some((0, 11)), Some((0, 11)), None, None, Some((4, 8)), Some((8, 11)));
+mat!(match_basic_215, r"^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$", r"foo!bas", Some((0, 7)), Some((0, 7)), Some((0, 4)), Some((4, 7)));
+mat!(match_basic_216, r".*(/XXX).*", r"/XXX", Some((0, 4)), Some((0, 4)));
+mat!(match_basic_217, r".*(\\XXX).*", r"\XXX", Some((0, 4)), Some((0, 4)));
+mat!(match_basic_218, r"\\XXX", r"\XXX", Some((0, 4)));
+mat!(match_basic_219, r".*(/000).*", r"/000", Some((0, 4)), Some((0, 4)));
+mat!(match_basic_220, r".*(\\000).*", r"\000", Some((0, 4)), Some((0, 4)));
+mat!(match_basic_221, r"\\000", r"\000", Some((0, 4)));
+
+// Tests from nullsubexpr.dat
+mat!(match_nullsubexpr_3, r"(a*)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_5, r"(a*)*", r"x", Some((0, 0)), None);
+mat!(match_nullsubexpr_6, r"(a*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_7, r"(a*)*", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_8, r"(a*)+", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_9, r"(a*)+", r"x", Some((0, 0)), Some((0, 0)));
+mat!(match_nullsubexpr_10, r"(a*)+", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_11, r"(a*)+", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_12, r"(a+)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_13, r"(a+)*", r"x", Some((0, 0)));
+mat!(match_nullsubexpr_14, r"(a+)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_15, r"(a+)*", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_16, r"(a+)+", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_17, r"(a+)+", r"x", None);
+mat!(match_nullsubexpr_18, r"(a+)+", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_19, r"(a+)+", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_21, r"([a]*)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_23, r"([a]*)*", r"x", Some((0, 0)), None);
+mat!(match_nullsubexpr_24, r"([a]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_25, r"([a]*)*", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_26, r"([a]*)+", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_27, r"([a]*)+", r"x", Some((0, 0)), Some((0, 0)));
+mat!(match_nullsubexpr_28, r"([a]*)+", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_29, r"([a]*)+", r"aaaaaax", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_30, r"([^b]*)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_32, r"([^b]*)*", r"b", Some((0, 0)), None);
+mat!(match_nullsubexpr_33, r"([^b]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_34, r"([^b]*)*", r"aaaaaab", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_35, r"([ab]*)*", r"a", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_36, r"([ab]*)*", r"aaaaaa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_37, r"([ab]*)*", r"ababab", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_38, r"([ab]*)*", r"bababa", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_39, r"([ab]*)*", r"b", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_40, r"([ab]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_41, r"([ab]*)*", r"aaaabcde", Some((0, 5)), Some((0, 5)));
+mat!(match_nullsubexpr_42, r"([^a]*)*", r"b", Some((0, 1)), Some((0, 1)));
+mat!(match_nullsubexpr_43, r"([^a]*)*", r"bbbbbb", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_45, r"([^a]*)*", r"aaaaaa", Some((0, 0)), None);
+mat!(match_nullsubexpr_46, r"([^ab]*)*", r"ccccxx", Some((0, 6)), Some((0, 6)));
+mat!(match_nullsubexpr_48, r"([^ab]*)*", r"ababab", Some((0, 0)), None);
+mat!(match_nullsubexpr_50, r"((z)+|a)*", r"zabcde", Some((0, 2)), Some((1, 2)));
+mat!(match_nullsubexpr_69, r"(a*)*(x)", r"x", Some((0, 1)), None, Some((0, 1)));
+mat!(match_nullsubexpr_70, r"(a*)*(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2)));
+mat!(match_nullsubexpr_71, r"(a*)*(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2)));
+mat!(match_nullsubexpr_73, r"(a*)+(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1)));
+mat!(match_nullsubexpr_74, r"(a*)+(x)", r"ax", Some((0, 2)), Some((0, 1)), Some((1, 2)));
+mat!(match_nullsubexpr_75, r"(a*)+(x)", r"axa", Some((0, 2)), Some((0, 1)), Some((1, 2)));
+mat!(match_nullsubexpr_77, r"(a*){2}(x)", r"x", Some((0, 1)), Some((0, 0)), Some((0, 1)));
+mat!(match_nullsubexpr_78, r"(a*){2}(x)", r"ax", Some((0, 2)), Some((1, 1)), Some((1, 2)));
+mat!(match_nullsubexpr_79, r"(a*){2}(x)", r"axa", Some((0, 2)), Some((1, 1)), Some((1, 2)));
+
+// Tests from repetition.dat
+mat!(match_repetition_10, r"((..)|(.))", r"", None);
+mat!(match_repetition_11, r"((..)|(.))((..)|(.))", r"", None);
+mat!(match_repetition_12, r"((..)|(.))((..)|(.))((..)|(.))", r"", None);
+mat!(match_repetition_14, r"((..)|(.)){1}", r"", None);
+mat!(match_repetition_15, r"((..)|(.)){2}", r"", None);
+mat!(match_repetition_16, r"((..)|(.)){3}", r"", None);
+mat!(match_repetition_18, r"((..)|(.))*", r"", Some((0, 0)));
+mat!(match_repetition_20, r"((..)|(.))", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1)));
+mat!(match_repetition_21, r"((..)|(.))((..)|(.))", r"a", None);
+mat!(match_repetition_22, r"((..)|(.))((..)|(.))((..)|(.))", r"a", None);
+mat!(match_repetition_24, r"((..)|(.)){1}", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1)));
+mat!(match_repetition_25, r"((..)|(.)){2}", r"a", None);
+mat!(match_repetition_26, r"((..)|(.)){3}", r"a", None);
+mat!(match_repetition_28, r"((..)|(.))*", r"a", Some((0, 1)), Some((0, 1)), None, Some((0, 1)));
+mat!(match_repetition_30, r"((..)|(.))", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_31, r"((..)|(.))((..)|(.))", r"aa", Some((0, 2)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2)));
+mat!(match_repetition_32, r"((..)|(.))((..)|(.))((..)|(.))", r"aa", None);
+mat!(match_repetition_34, r"((..)|(.)){1}", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_35, r"((..)|(.)){2}", r"aa", Some((0, 2)), Some((1, 2)), None, Some((1, 2)));
+mat!(match_repetition_36, r"((..)|(.)){3}", r"aa", None);
+mat!(match_repetition_38, r"((..)|(.))*", r"aa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_40, r"((..)|(.))", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_41, r"((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3)));
+mat!(match_repetition_42, r"((..)|(.))((..)|(.))((..)|(.))", r"aaa", Some((0, 3)), Some((0, 1)), None, Some((0, 1)), Some((1, 2)), None, Some((1, 2)), Some((2, 3)), None, Some((2, 3)));
+mat!(match_repetition_44, r"((..)|(.)){1}", r"aaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_46, r"((..)|(.)){2}", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3)));
+mat!(match_repetition_47, r"((..)|(.)){3}", r"aaa", Some((0, 3)), Some((2, 3)), None, Some((2, 3)));
+mat!(match_repetition_50, r"((..)|(.))*", r"aaa", Some((0, 3)), Some((2, 3)), Some((0, 2)), Some((2, 3)));
+mat!(match_repetition_52, r"((..)|(.))", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_53, r"((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None);
+mat!(match_repetition_54, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 3)), None, Some((2, 3)), Some((3, 4)), None, Some((3, 4)));
+mat!(match_repetition_56, r"((..)|(.)){1}", r"aaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_57, r"((..)|(.)){2}", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None);
+mat!(match_repetition_59, r"((..)|(.)){3}", r"aaaa", Some((0, 4)), Some((3, 4)), Some((0, 2)), Some((3, 4)));
+mat!(match_repetition_61, r"((..)|(.))*", r"aaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None);
+mat!(match_repetition_63, r"((..)|(.))", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_64, r"((..)|(.))((..)|(.))", r"aaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None);
+mat!(match_repetition_65, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaa", Some((0, 5)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 5)), None, Some((4, 5)));
+mat!(match_repetition_67, r"((..)|(.)){1}", r"aaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_68, r"((..)|(.)){2}", r"aaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None);
+mat!(match_repetition_70, r"((..)|(.)){3}", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5)));
+mat!(match_repetition_73, r"((..)|(.))*", r"aaaaa", Some((0, 5)), Some((4, 5)), Some((2, 4)), Some((4, 5)));
+mat!(match_repetition_75, r"((..)|(.))", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_76, r"((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 4)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None);
+mat!(match_repetition_77, r"((..)|(.))((..)|(.))((..)|(.))", r"aaaaaa", Some((0, 6)), Some((0, 2)), Some((0, 2)), None, Some((2, 4)), Some((2, 4)), None, Some((4, 6)), Some((4, 6)), None);
+mat!(match_repetition_79, r"((..)|(.)){1}", r"aaaaaa", Some((0, 2)), Some((0, 2)), Some((0, 2)), None);
+mat!(match_repetition_80, r"((..)|(.)){2}", r"aaaaaa", Some((0, 4)), Some((2, 4)), Some((2, 4)), None);
+mat!(match_repetition_81, r"((..)|(.)){3}", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None);
+mat!(match_repetition_83, r"((..)|(.))*", r"aaaaaa", Some((0, 6)), Some((4, 6)), Some((4, 6)), None);
+mat!(match_repetition_90, r"X(.?){0,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)));
+mat!(match_repetition_91, r"X(.?){1,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)));
+mat!(match_repetition_92, r"X(.?){2,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)));
+mat!(match_repetition_93, r"X(.?){3,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)));
+mat!(match_repetition_94, r"X(.?){4,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)));
+mat!(match_repetition_95, r"X(.?){5,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)));
+mat!(match_repetition_96, r"X(.?){6,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)));
+mat!(match_repetition_97, r"X(.?){7,}Y", r"X1234567Y", Some((0, 9)), Some((7, 8)));
+mat!(match_repetition_98, r"X(.?){8,}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_100, r"X(.?){0,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_102, r"X(.?){1,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_104, r"X(.?){2,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_106, r"X(.?){3,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_108, r"X(.?){4,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_110, r"X(.?){5,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_112, r"X(.?){6,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_114, r"X(.?){7,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_115, r"X(.?){8,8}Y", r"X1234567Y", Some((0, 9)), Some((8, 8)));
+mat!(match_repetition_126, r"(a|ab|c|bcd){0,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)));
+mat!(match_repetition_127, r"(a|ab|c|bcd){1,}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)));
+mat!(match_repetition_128, r"(a|ab|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6)));
+mat!(match_repetition_129, r"(a|ab|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6)));
+mat!(match_repetition_130, r"(a|ab|c|bcd){4,}(d*)", r"ababcd", None);
+mat!(match_repetition_131, r"(a|ab|c|bcd){0,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)));
+mat!(match_repetition_132, r"(a|ab|c|bcd){1,10}(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)));
+mat!(match_repetition_133, r"(a|ab|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6)));
+mat!(match_repetition_134, r"(a|ab|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((3, 6)), Some((6, 6)));
+mat!(match_repetition_135, r"(a|ab|c|bcd){4,10}(d*)", r"ababcd", None);
+mat!(match_repetition_136, r"(a|ab|c|bcd)*(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)));
+mat!(match_repetition_137, r"(a|ab|c|bcd)+(d*)", r"ababcd", Some((0, 1)), Some((0, 1)), Some((1, 1)));
+mat!(match_repetition_143, r"(ab|a|c|bcd){0,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+mat!(match_repetition_145, r"(ab|a|c|bcd){1,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+mat!(match_repetition_147, r"(ab|a|c|bcd){2,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+mat!(match_repetition_149, r"(ab|a|c|bcd){3,}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+mat!(match_repetition_150, r"(ab|a|c|bcd){4,}(d*)", r"ababcd", None);
+mat!(match_repetition_152, r"(ab|a|c|bcd){0,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+mat!(match_repetition_154, r"(ab|a|c|bcd){1,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+mat!(match_repetition_156, r"(ab|a|c|bcd){2,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+mat!(match_repetition_158, r"(ab|a|c|bcd){3,10}(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+mat!(match_repetition_159, r"(ab|a|c|bcd){4,10}(d*)", r"ababcd", None);
+mat!(match_repetition_161, r"(ab|a|c|bcd)*(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+mat!(match_repetition_163, r"(ab|a|c|bcd)+(d*)", r"ababcd", Some((0, 6)), Some((4, 5)), Some((5, 6)));
+
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/misc.rs
@@ -0,0 +1,14 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+mat!(prefix_literal_match, r"^abc", r"abc", Some((0, 3)));
+mat!(prefix_literal_nomatch, r"^abc", r"zabc", None);
+mat!(one_literal_edge, r"abc", r"xxxxxab", None);
+matiter!(terminates, r"a$", r"a", (0, 1));
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/multiline.rs
@@ -0,0 +1,49 @@
+matiter!(match_multi_1, r"(?m)^[a-z]+$", "abc\ndef\nxyz",
+         (0, 3), (4, 7), (8, 11));
+matiter!(match_multi_2, r"(?m)^$", "abc\ndef\nxyz");
+matiter!(match_multi_3, r"(?m)^", "abc\ndef\nxyz",
+         (0, 0), (4, 4), (8, 8));
+matiter!(match_multi_4, r"(?m)$", "abc\ndef\nxyz",
+         (3, 3), (7, 7), (11, 11));
+matiter!(match_multi_5, r"(?m)^[a-z]", "abc\ndef\nxyz",
+         (0, 1), (4, 5), (8, 9));
+matiter!(match_multi_6, r"(?m)[a-z]^", "abc\ndef\nxyz");
+matiter!(match_multi_7, r"(?m)[a-z]$", "abc\ndef\nxyz",
+         (2, 3), (6, 7), (10, 11));
+matiter!(match_multi_8, r"(?m)$[a-z]", "abc\ndef\nxyz");
+matiter!(match_multi_9, r"(?m)^$", "", (0, 0));
+
+matiter!(match_multi_rep_1, r"(?m)(?:^$)*", "a\nb\nc",
+         (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+matiter!(match_multi_rep_2, r"(?m)(?:^|a)+", "a\naaa\n",
+         (0, 0), (2, 2), (3, 5), (6, 6));
+matiter!(match_multi_rep_3, r"(?m)(?:^|a)*", "a\naaa\n",
+         (0, 1), (2, 5), (6, 6));
+matiter!(match_multi_rep_4, r"(?m)(?:^[a-z])+", "abc\ndef\nxyz",
+         (0, 1), (4, 5), (8, 9));
+matiter!(match_multi_rep_5, r"(?m)(?:^[a-z]{3}\n?)+", "abc\ndef\nxyz",
+         (0, 11));
+matiter!(match_multi_rep_6, r"(?m)(?:^[a-z]{3}\n?)*", "abc\ndef\nxyz",
+         (0, 11));
+matiter!(match_multi_rep_7, r"(?m)(?:\n?[a-z]{3}$)+", "abc\ndef\nxyz",
+         (0, 11));
+matiter!(match_multi_rep_8, r"(?m)(?:\n?[a-z]{3}$)*", "abc\ndef\nxyz",
+         (0, 11));
+matiter!(match_multi_rep_9, r"(?m)^*", "\naa\n",
+         (0, 0), (1, 1), (2, 2), (3, 3), (4, 4));
+matiter!(match_multi_rep_10, r"(?m)^+", "\naa\n",
+         (0, 0), (1, 1), (4, 4));
+matiter!(match_multi_rep_11, r"(?m)$*", "\naa\n",
+         (0, 0), (1, 1), (2, 2), (3, 3), (4, 4));
+matiter!(match_multi_rep_12, r"(?m)$+", "\naa\n",
+         (0, 0), (3, 3), (4, 4));
+matiter!(match_multi_rep_13, r"(?m)(?:$\n)+", "\n\naaa\n\n",
+         (0, 2), (5, 7));
+matiter!(match_multi_rep_14, r"(?m)(?:$\n)*", "\n\naaa\n\n",
+         (0, 2), (3, 3), (4, 4), (5, 7));
+matiter!(match_multi_rep_15, r"(?m)(?:$\n^)+", "\n\naaa\n\n",
+         (0, 2), (5, 7));
+matiter!(match_multi_rep_16, r"(?m)(?:^|$)+", "\n\naaa\n\n",
+         (0, 0), (1, 1), (2, 2), (5, 5), (6, 6), (7, 7));
+matiter!(match_multi_rep_17, r"(?m)(?:$\n)*", "\n\naaa\n\n",
+         (0, 2), (3, 3), (4, 4), (5, 7));
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/searcher.rs
@@ -0,0 +1,66 @@
+macro_rules! searcher {
+    ($name:ident, $re:expr, $haystack:expr) => (
+        searcher!($name, $re, $haystack, vec vec![]);
+    );
+    ($name:ident, $re:expr, $haystack:expr, $($steps:expr,)*) => (
+        searcher!($name, $re, $haystack, vec vec![$($steps),*]);
+    );
+    ($name:ident, $re:expr, $haystack:expr, $($steps:expr),*) => (
+        searcher!($name, $re, $haystack, vec vec![$($steps),*]);
+    );
+    ($name:ident, $re:expr, $haystack:expr, vec $expect_steps:expr) => (
+        #[test]
+        #[allow(unused_imports)]
+        fn $name() {
+            searcher_expr! {{
+                use std::str::pattern::{Pattern, Searcher};
+                use std::str::pattern::SearchStep::{Match, Reject, Done};
+                let re = regex!($re);
+                let mut se = re.into_searcher($haystack);
+                let mut got_steps = vec![];
+                loop {
+                    match se.next() {
+                        Done => break,
+                        step => { got_steps.push(step); }
+                    }
+                }
+                assert_eq!(got_steps, $expect_steps);
+            }}
+        }
+    );
+}
+
+searcher!(searcher_empty_regex_empty_haystack, r"", "", Match(0, 0));
+searcher!(searcher_empty_regex, r"", "ab",
+          Match(0, 0), Reject(0, 1), Match(1, 1), Reject(1, 2), Match(2, 2));
+searcher!(searcher_empty_haystack, r"\d", "");
+searcher!(searcher_one_match, r"\d", "5",
+          Match(0, 1));
+searcher!(searcher_no_match, r"\d", "a",
+          Reject(0, 1));
+searcher!(searcher_two_adjacent_matches, r"\d", "56",
+          Match(0, 1), Match(1, 2));
+searcher!(searcher_two_non_adjacent_matches, r"\d", "5a6",
+          Match(0, 1), Reject(1, 2), Match(2, 3));
+searcher!(searcher_reject_first, r"\d", "a6",
+          Reject(0, 1), Match(1, 2));
+searcher!(searcher_one_zero_length_matches, r"\d*", "a1b2",
+          Match(0, 0),  // ^
+          Reject(0, 1), // a
+          Match(1, 2),  // a1
+          Reject(2, 3), // a1b
+          Match(3, 4),  // a1b2
+);
+searcher!(searcher_many_zero_length_matches, r"\d*", "a1bbb2",
+          Match(0, 0),  // ^
+          Reject(0, 1), // a
+          Match(1, 2),  // a1
+          Reject(2, 3), // a1b
+          Match(3, 3),  // a1bb
+          Reject(3, 4), // a1bb
+          Match(4, 4),  // a1bbb
+          Reject(4, 5), // a1bbb
+          Match(5, 6),  // a1bbba
+);
+searcher!(searcher_unicode, r".+?", "Ⅰ1Ⅱ2",
+          Match(0, 3), Match(3, 4), Match(4, 7), Match(7, 8));
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/set.rs
@@ -0,0 +1,32 @@
+matset!(set1, &["a", "a"], "a", 0, 1);
+matset!(set2, &["a", "a"], "ba", 0, 1);
+matset!(set3, &["a", "b"], "a", 0);
+matset!(set4, &["a", "b"], "b", 1);
+matset!(set5, &["a|b", "b|a"], "b", 0, 1);
+matset!(set6, &["foo", "oo"], "foo", 0, 1);
+matset!(set7, &["^foo", "bar$"], "foo", 0);
+matset!(set8, &["^foo", "bar$"], "foo bar", 0, 1);
+matset!(set9, &["^foo", "bar$"], "bar", 1);
+matset!(set10, &[r"[a-z]+$", "foo"], "01234 foo", 0, 1);
+matset!(set11, &[r"[a-z]+$", "foo"], "foo 01234", 1);
+matset!(set12, &[r".*?", "a"], "zzzzzza", 0, 1);
+matset!(set13, &[r".*", "a"], "zzzzzza", 0, 1);
+matset!(set14, &[r".*", "a"], "zzzzzz", 0);
+matset!(set15, &[r"\ba\b"], "hello a bye", 0);
+matset!(set16, &["a"], "a", 0);
+matset!(set17, &[".*a"], "a", 0);
+matset!(set18, &["a", "β"], "β", 1);
+
+nomatset!(nset1, &["a", "a"], "b");
+nomatset!(nset2, &["^foo", "bar$"], "bar foo");
+nomatset!(nset3, { let xs: &[&str] = &[]; xs }, "a");
+nomatset!(nset4, &[r"^rooted$", r"\.log$"], "notrooted");
+
+// See: https://github.com/rust-lang/regex/issues/187
+#[test]
+fn regression_subsequent_matches() {
+    let set = regex_set!(&["ab", "b"]);
+    let text = text!("ba");
+    assert!(set.matches(text).matched(1));
+    assert!(set.matches(text).matched(1));
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/shortest_match.rs
@@ -0,0 +1,14 @@
+macro_rules! shortmat {
+    ($name:ident, $re:expr, $text:expr, $shortest_match:expr) => {
+        #[test]
+        fn $name() {
+            let text = text!($text);
+            let re = regex!($re);
+            assert_eq!($shortest_match, re.shortest_match(text));
+        }
+    }
+}
+
+shortmat!(t01, r"a+", r"aa", Some(1));
+// Test that the reverse suffix optimization gets it right.
+shortmat!(t02, r".*(?:abcd)+", r"abcdabcd", Some(4));
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/suffix_reverse.rs
@@ -0,0 +1,16 @@
+// Copyright 2014 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+mat!(t01, r".*abcd", r"abcd", Some((0, 4)));
+mat!(t02, r".*(?:abcd)+", r"abcd", Some((0, 4)));
+mat!(t03, r".*(?:abcd)+", r"abcdabcd", Some((0, 8)));
+mat!(t04, r".*(?:abcd)+", r"abcdxabcd", Some((0, 9)));
+mat!(t05, r".*x(?:abcd)+", r"abcdxabcd", Some((0, 9)));
+mat!(t06, r"[^abcd]*x(?:abcd)+", r"abcdxabcd", Some((4, 9)));
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/test_backtrack.rs
@@ -0,0 +1,64 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re)
+            .bounded_backtracking().build().map(|e| e.into_regex())
+    }}
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    }
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .bounded_backtracking()
+            .build()
+            .map(|e| e.into_regex_set())
+    }}
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    }
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod searcher;
+mod set;
+mod suffix_reverse;
+mod unicode;
+mod word_boundary;
+mod word_boundary_unicode;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/test_backtrack_bytes.rs
@@ -0,0 +1,65 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re)
+            .bounded_backtracking()
+            .only_utf8(false)
+            .build()
+            .map(|e| e.into_byte_regex())
+    }}
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    }
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .bounded_backtracking()
+            .only_utf8(false)
+            .build()
+            .map(|e| e.into_byte_regex_set())
+    }}
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    }
+}
+
+// Must come before other module definitions.
+include!("macros_bytes.rs");
+include!("macros.rs");
+
+mod api;
+mod bytes;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod set;
+mod suffix_reverse;
+mod unicode;
+mod word_boundary;
+mod word_boundary_ascii;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/test_backtrack_utf8bytes.rs
@@ -0,0 +1,65 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re)
+            .bounded_backtracking().bytes(true).build().map(|e| e.into_regex())
+    }}
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    }
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .bounded_backtracking()
+            .bytes(true)
+            .build()
+            .map(|e| e.into_regex_set())
+    }}
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    }
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod searcher;
+mod set;
+mod suffix_reverse;
+mod unicode;
+mod word_boundary;
+mod word_boundary_unicode;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/test_nfa.rs
@@ -0,0 +1,60 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re).nfa().build().map(|e| e.into_regex())
+    }}
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    }
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re).nfa().build().map(|e| e.into_regex_set())
+    }}
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    }
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod searcher;
+mod set;
+mod suffix_reverse;
+mod unicode;
+mod word_boundary;
+mod word_boundary_unicode;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/test_nfa_bytes.rs
@@ -0,0 +1,65 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re)
+            .nfa()
+            .only_utf8(false)
+            .build()
+            .map(|e| e.into_byte_regex())
+    }}
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    }
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .nfa()
+            .only_utf8(false)
+            .build()
+            .map(|e| e.into_byte_regex_set())
+    }}
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    }
+}
+
+// Must come before other module definitions.
+include!("macros_bytes.rs");
+include!("macros.rs");
+
+mod api;
+mod bytes;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod set;
+mod suffix_reverse;
+mod unicode;
+mod word_boundary;
+mod word_boundary_ascii;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/test_nfa_utf8bytes.rs
@@ -0,0 +1,61 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![cfg_attr(feature = "pattern", feature(pattern))]
+
+extern crate rand;
+extern crate regex;
+
+macro_rules! regex_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new($re).nfa().bytes(true).build().map(|e| e.into_regex())
+    }}
+}
+
+macro_rules! regex {
+    ($re:expr) => {
+        regex_new!($re).unwrap()
+    }
+}
+
+macro_rules! regex_set_new {
+    ($re:expr) => {{
+        use regex::internal::ExecBuilder;
+        ExecBuilder::new_many($re)
+            .nfa().bytes(true).build().map(|e| e.into_regex_set())
+    }}
+}
+
+macro_rules! regex_set {
+    ($res:expr) => {
+        regex_set_new!($res).unwrap()
+    }
+}
+
+// Must come before other module definitions.
+include!("macros_str.rs");
+include!("macros.rs");
+
+mod api;
+mod api_str;
+mod crazy;
+mod flags;
+mod fowler;
+mod multiline;
+mod noparse;
+mod regression;
+mod replace;
+mod searcher;
+mod set;
+mod suffix_reverse;
+mod unicode;
+mod word_boundary;
+mod word_boundary_unicode;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/word_boundary.rs
@@ -0,0 +1,89 @@
+// Many of these are cribbed from RE2's test suite.
+
+matiter!(wb1, r"\b", "");
+matiter!(wb2, r"\b", "a", (0, 0), (1, 1));
+matiter!(wb3, r"\b", "ab", (0, 0), (2, 2));
+matiter!(wb4, r"^\b", "ab", (0, 0));
+matiter!(wb5, r"\b$", "ab", (2, 2));
+matiter!(wb6, r"^\b$", "ab");
+matiter!(wb7, r"\bbar\b", "nobar bar foo bar", (6, 9), (14, 17));
+matiter!(wb8, r"a\b", "faoa x", (3, 4));
+matiter!(wb9, r"\bbar", "bar x", (0, 3));
+matiter!(wb10, r"\bbar", "foo\nbar x", (4, 7));
+matiter!(wb11, r"bar\b", "foobar", (3, 6));
+matiter!(wb12, r"bar\b", "foobar\nxxx", (3, 6));
+matiter!(wb13, r"(foo|bar|[A-Z])\b", "foo", (0, 3));
+matiter!(wb14, r"(foo|bar|[A-Z])\b", "foo\n", (0, 3));
+matiter!(wb15, r"\b(foo|bar|[A-Z])", "foo", (0, 3));
+matiter!(wb16, r"\b(foo|bar|[A-Z])\b", "X", (0, 1));
+matiter!(wb17, r"\b(foo|bar|[A-Z])\b", "XY");
+matiter!(wb18, r"\b(foo|bar|[A-Z])\b", "bar", (0, 3));
+matiter!(wb19, r"\b(foo|bar|[A-Z])\b", "foo", (0, 3));
+matiter!(wb20, r"\b(foo|bar|[A-Z])\b", "foo\n", (0, 3));
+matiter!(wb21, r"\b(foo|bar|[A-Z])\b", "ffoo bbar N x", (10, 11));
+matiter!(wb22, r"\b(fo|foo)\b", "fo", (0, 2));
+matiter!(wb23, r"\b(fo|foo)\b", "foo", (0, 3));
+matiter!(wb24, r"\b\b", "");
+matiter!(wb25, r"\b\b", "a", (0, 0), (1, 1));
+matiter!(wb26, r"\b$", "");
+matiter!(wb27, r"\b$", "x", (1, 1));
+matiter!(wb28, r"\b$", "y x", (3, 3));
+matiter!(wb29, r"\b.$", "x", (0, 1));
+matiter!(wb30, r"^\b(fo|foo)\b", "fo", (0, 2));
+matiter!(wb31, r"^\b(fo|foo)\b", "foo", (0, 3));
+matiter!(wb32, r"^\b$", "");
+matiter!(wb33, r"^\b$", "x");
+matiter!(wb34, r"^\b.$", "x", (0, 1));
+matiter!(wb35, r"^\b.\b$", "x", (0, 1));
+matiter!(wb36, r"^^^^^\b$$$$$", "");
+matiter!(wb37, r"^^^^^\b.$$$$$", "x", (0, 1));
+matiter!(wb38, r"^^^^^\b$$$$$", "x");
+matiter!(wb39, r"^^^^^\b\b\b.\b\b\b$$$$$", "x", (0, 1));
+matiter!(wb40, r"\b.+\b", "$$abc$$", (2, 5));
+matiter!(wb41, r"\b", "a b c", (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
+
+matiter!(nb1, r"\Bfoo\B", "n foo xfoox that", (7, 10));
+matiter!(nb2, r"a\B", "faoa x", (1, 2));
+matiter!(nb3, r"\Bbar", "bar x");
+matiter!(nb4, r"\Bbar", "foo\nbar x");
+matiter!(nb5, r"bar\B", "foobar");
+matiter!(nb6, r"bar\B", "foobar\nxxx");
+matiter!(nb7, r"(foo|bar|[A-Z])\B", "foox", (0, 3));
+matiter!(nb8, r"(foo|bar|[A-Z])\B", "foo\n");
+matiter!(nb9, r"\B", "", (0, 0));
+matiter!(nb10, r"\B", "x");
+matiter!(nb11, r"\B(foo|bar|[A-Z])", "foo");
+matiter!(nb12, r"\B(foo|bar|[A-Z])\B", "xXy", (1, 2));
+matiter!(nb13, r"\B(foo|bar|[A-Z])\B", "XY");
+matiter!(nb14, r"\B(foo|bar|[A-Z])\B", "XYZ", (1, 2));
+matiter!(nb15, r"\B(foo|bar|[A-Z])\B", "abara", (1, 4));
+matiter!(nb16, r"\B(foo|bar|[A-Z])\B", "xfoo_", (1, 4));
+matiter!(nb17, r"\B(foo|bar|[A-Z])\B", "xfoo\n");
+matiter!(nb18, r"\B(foo|bar|[A-Z])\B", "foo bar vNX", (9, 10));
+matiter!(nb19, r"\B(fo|foo)\B", "xfoo", (1, 3));
+matiter!(nb20, r"\B(foo|fo)\B", "xfooo", (1, 4));
+matiter!(nb21, r"\B\B", "", (0, 0));
+matiter!(nb22, r"\B\B", "x");
+matiter!(nb23, r"\B$", "", (0, 0));
+matiter!(nb24, r"\B$", "x");
+matiter!(nb25, r"\B$", "y x");
+matiter!(nb26, r"\B.$", "x");
+matiter!(nb27, r"^\B(fo|foo)\B", "fo");
+matiter!(nb28, r"^\B(fo|foo)\B", "foo");
+matiter!(nb29, r"^\B", "", (0, 0));
+matiter!(nb30, r"^\B", "x");
+matiter!(nb31, r"^\B\B", "", (0, 0));
+matiter!(nb32, r"^\B\B", "x");
+matiter!(nb33, r"^\B$", "", (0, 0));
+matiter!(nb34, r"^\B$", "x");
+matiter!(nb35, r"^\B.$", "x");
+matiter!(nb36, r"^\B.\B$", "x");
+matiter!(nb37, r"^^^^^\B$$$$$", "", (0, 0));
+matiter!(nb38, r"^^^^^\B.$$$$$", "x");
+matiter!(nb39, r"^^^^^\B$$$$$", "x");
+
+// These work for both Unicode and ASCII because all matches are reported as
+// byte offsets, and « and » do not correspond to word boundaries at either
+// the character or byte level.
+matiter!(unicode1, r"\bx\b", "«x", (2, 3));
+matiter!(unicode2, r"\bx\b", "x»", (0, 1));
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-0.2.2/tests/word_boundary_ascii.rs
@@ -0,0 +1,9 @@
+// ASCII word boundaries are completely oblivious to Unicode characters.
+// For Unicode word boundaries, the tests are precisely inverted.
+matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3));
+matiter!(ascii2, r"(?-u:\B)x(?-u:\B)", "áxβ");
+matiter!(ascii3, r"(?-u:\B)", "0\u{7EF5E}", (2, 2), (3, 3), (4, 4), (5, 5));
+
+// We still get Unicode word boundaries by default in byte regexes.
+matiter!(unicode1, r"\bx\b", "áxβ");
+matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3));
--- a/third_party/rust/regex-syntax/.cargo-checksum.json
+++ b/third_party/rust/regex-syntax/.cargo-checksum.json
@@ -1,1 +1,1 @@
-{"files":{"Cargo.toml":"264a95f5e516e60aecbe9f8a5f1c551bf71debb7249cf4bc3c102d68b9bb0dde","src/lib.rs":"e8eb3529e5bf62913439605d478b7385e43e5ef8ebb6c4800630a16f8ab3cb26","src/literals.rs":"4e7d4afa303ffe241bd17621728a1e0c018386f4f77b4d8f0f4403f74650bc6a","src/parser.rs":"975f570f86914c8bc8a132f5ced11484fa912ed0f7071f2bd2764600b611ec46","src/properties.rs":"ba19030e905d133039eb97b167dc82b4a9f78dd88c3b79b381ec7e5fbc6f6a8c","src/unicode.rs":"5bbab13485910b55a56e8f8437786e820f409902e96d07ba2018c28cecb033fc"},"package":"ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db"}
\ No newline at end of file
+{"files":{"Cargo.toml":"41308d53904dc3a51c211e1d0263cae8b656f66f4e18c760b74719b2f0ec17b6","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","benches/bench.rs":"4d09c75f457459f850877ee334a924f331c701a22fa19b1f7942c3504c8981f4","src/ast/mod.rs":"31825eaa0fdc0ae9939cf118f07c9a3ce7c0485065e0646df353a2d310ae5cd8","src/ast/parse.rs":"d1b5a837df7f7b09e5f6b70350407eeef56dfa5e6d27519fd903c07d16968fd1","src/ast/print.rs":"c7281fdec95b796c4e32393d3f750ab58cb5bea1dd77e87d4284c0ea0a304f1e","src/ast/visitor.rs":"493be4ba270f8cfeffe096b981947a12a5992542c1df1ed470e015f1c41ed081","src/either.rs":"1758e3edd056884eccadd995708d1e374ba9aa65846bd0e13b1aae852607c560","src/error.rs":"ae638c6e6adcc7843dec98eb52b2dda2e5085a7c029a8d7f005ef40d35557617","src/hir/interval.rs":"3c2599c9c07149afb0770eca720ab66ed9599402ee58b34bf6709eac4c895239","src/hir/literal/mod.rs":"4fd26946232d056342a5052b4c84a28b6c0b956f57b4a4ff8fe64eb1b710aae5","src/hir/mod.rs":"07bdd0dc1c7c60194ab8a3242cb2fe58d5b51ba36d303d495dcd394726ea469e","src/hir/print.rs":"ac00ea33d17353ee418cfbc604a213683499870acf2db6de85c734aeb30a0550","src/hir/translate.rs":"035b9c55924a7813b1f61ae8c36e03624343b6eb5f5055909518ddeaa7a6dd75","src/hir/visitor.rs":"8e15e646c8c219fd1f4620fc3611dfbd0d63d598bc5c8bcbfb0106053134613c","src/lib.rs":"c01d0c7224a32a11f7a63d0840c9dfa5484b90632f80a801eac40d2904f19660","src/parser.rs":"0c5c5d539cf3b770b83b9097948226aa7c12c8b655864cb8e2f91831b9a44528","src/unicode.rs":"4eab97f8aad32dc0c671fc4c324c3310ff94e230d13f23819e5722fdefd60d6d","src/unicode_tables/age.rs":"29519a86a3fc9dc600e3b4d102e8708b45917e89c8f8418706c8f451781f795a","src/unicode_tables/case_folding_simple.rs":"426a08a04bcd250ef2825bd718245ef5e7d5ba0500b436ab86bff84507293955","src/unicode_tables/general_category.rs":"bc9781e1cbaafdaa14ad54942a19d75d6e7cb9d4cc39dce076f898f1da865fcd","src/unicode_tables/mod.rs":"c395afbe606c4a9ef8656cd315b1ce2d5c1729fd60c99da8c6d661c49c47adf4","src/unicode_tables/perl_word.rs":"e2accaa8ed515374c1b32e8b854a57b3e20b9fc8b9e293d972913de3cc4d4234","src/unicode_tables/property_bool.rs":"7babe7f5737f6418709072f68aa7cc5c5e807cee14c05b30bf735efb6b9fb36a","src/unicode_tables/property_names.rs":"1427ee7831f2ccd6504714474408907a5ae1c20d7f85c4e0733b72ef56b4e867","src/unicode_tables/property_values.rs":"596f6f6c4fba90552357dfa133d16a0055eef45da9b50121d91035dd0a6404c6","src/unicode_tables/script.rs":"013b630faa01389e096bd3428186206c616dd9a9591cb94e78cc16c19354e2c6","src/unicode_tables/script_extension.rs":"d43169d6351a884a0dbe67255a26007dcb01966994444e33ab24083dcf31f30c"},"package":"8f1ac0f60d675cc6cf13a20ec076568254472551051ad5dd050364d70671bf6b"}
\ No newline at end of file
--- a/third_party/rust/regex-syntax/Cargo.toml
+++ b/third_party/rust/regex-syntax/Cargo.toml
@@ -1,13 +1,23 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g. crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
 [package]
 name = "regex-syntax"
-version = "0.4.1"  #:version
+version = "0.6.0"
 authors = ["The Rust Project Developers"]
+description = "A regular expression parser."
+homepage = "https://github.com/rust-lang/regex"
+documentation = "https://docs.rs/regex-syntax"
 license = "MIT/Apache-2.0"
 repository = "https://github.com/rust-lang/regex"
-documentation = "http://doc.rust-lang.org/regex/regex_syntax/index.html"
-homepage = "https://github.com/rust-lang/regex"
-description = "A regular expression parser."
-
-[dev-dependencies]
-quickcheck = { version = "0.4.1", default-features = false }
-rand = "0.3.15"
+[dependencies.ucd-util]
+version = "0.1.0"
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/LICENSE-APACHE
@@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/LICENSE-MIT
@@ -0,0 +1,25 @@
+Copyright (c) 2014 The Rust Project Developers
+
+Permission is hereby granted, free of charge, to any
+person obtaining a copy of this software and associated
+documentation files (the "Software"), to deal in the
+Software without restriction, including without
+limitation the rights to use, copy, modify, merge,
+publish, distribute, sublicense, and/or sell copies of
+the Software, and to permit persons to whom the Software
+is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice
+shall be included in all copies or substantial portions
+of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
+ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
+TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
+PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
+SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/benches/bench.rs
@@ -0,0 +1,73 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![feature(test)]
+
+extern crate regex_syntax;
+extern crate test;
+
+use regex_syntax::Parser;
+use test::Bencher;
+
+#[bench]
+fn parse_simple1(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"^bc(d|e)*$";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_simple2(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"'[a-zA-Z_][a-zA-Z0-9_]*(')\b";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_small1(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\p{L}|\p{N}|\s|.|\d";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_medium1(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\pL\p{Greek}\p{Hiragana}\p{Alphabetic}\p{Hebrew}\p{Arabic}";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_medium2(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\s\S\w\W\d\D";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_medium3(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\p{age:3.2}\p{hira}\p{scx:hira}\p{alphabetic}\p{sc:Greek}\pL";
+        Parser::new().parse(re).unwrap()
+    });
+}
+
+#[bench]
+fn parse_huge(b: &mut Bencher) {
+    b.iter(|| {
+        let re = r"\p{L}{100}";
+        Parser::new().parse(re).unwrap()
+    });
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/ast/mod.rs
@@ -0,0 +1,1515 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+Defines an abstract syntax for regular expressions.
+*/
+
+use std::cmp::Ordering;
+use std::error;
+use std::fmt;
+
+pub use ast::visitor::{Visitor, visit};
+
+pub mod parse;
+pub mod print;
+mod visitor;
+
+/// An error that occurred while parsing a regular expression into an abstract
+/// syntax tree.
+///
+/// Note that note all ASTs represents a valid regular expression. For example,
+/// an AST is constructed without error for `\p{Quux}`, but `Quux` is not a
+/// valid Unicode property name. That particular error is reported when
+/// translating an AST to the high-level intermediate representation (`HIR`).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+    /// The kind of error.
+    kind: ErrorKind,
+    /// The original pattern that the parser generated the error from. Every
+    /// span in an error is a valid range into this string.
+    pattern: String,
+    /// The span of this error.
+    span: Span,
+}
+
+impl Error {
+    /// Return the type of this error.
+    pub fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+
+    /// The original pattern string in which this error occurred.
+    ///
+    /// Every span reported by this error is reported in terms of this string.
+    pub fn pattern(&self) -> &str {
+        &self.pattern
+    }
+
+    /// Return the span at which this error occurred.
+    pub fn span(&self) -> &Span {
+        &self.span
+    }
+
+    /// Return an auxiliary span. This span exists only for some errors that
+    /// benefit from being able to point to two locations in the original
+    /// regular expression. For example, "duplicate" errors will have the
+    /// main error position set to the duplicate occurrence while its
+    /// auxiliary span will be set to the initial occurrence.
+    pub fn auxiliary_span(&self) -> Option<&Span> {
+        use self::ErrorKind::*;
+        match self.kind {
+            FlagDuplicate { ref original } => Some(original),
+            FlagRepeatedNegation { ref original, .. } => Some(original),
+            GroupNameDuplicate { ref original, .. } => Some(original),
+            _ => None,
+        }
+    }
+}
+
+/// The type of an error that occurred while building an AST.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+    /// The capturing group limit was exceeded.
+    ///
+    /// Note that this represents a limit on the total number of capturing
+    /// groups in a regex and not necessarily the number of nested capturing
+    /// groups. That is, the nest limit can be low and it is still possible for
+    /// this error to occur.
+    CaptureLimitExceeded,
+    /// An invalid escape sequence was found in a character class set.
+    ClassEscapeInvalid,
+    /// An invalid character class range was found. An invalid range is any
+    /// range where the start is greater than the end.
+    ClassRangeInvalid,
+    /// An invalid range boundary was found in a character class. Range
+    /// boundaries must be a single literal codepoint, but this error indicates
+    /// that something else was found, such as a nested class.
+    ClassRangeLiteral,
+    /// An opening `[` was found with no corresponding closing `]`.
+    ClassUnclosed,
+    /// An empty decimal number was given where one was expected.
+    DecimalEmpty,
+    /// An invalid decimal number was given where one was expected.
+    DecimalInvalid,
+    /// A bracketed hex literal was empty.
+    EscapeHexEmpty,
+    /// A bracketed hex literal did not correspond to a Unicode scalar value.
+    EscapeHexInvalid,
+    /// An invalid hexadecimal digit was found.
+    EscapeHexInvalidDigit,
+    /// EOF was found before an escape sequence was completed.
+    EscapeUnexpectedEof,
+    /// An unrecognized escape sequence.
+    EscapeUnrecognized,
+    /// A dangling negation was used when setting flags, e.g., `i-`.
+    FlagDanglingNegation,
+    /// A flag was used twice, e.g., `i-i`.
+    FlagDuplicate {
+        /// The position of the original flag. The error position
+        /// points to the duplicate flag.
+        original: Span,
+    },
+    /// The negation operator was used twice, e.g., `-i-s`.
+    FlagRepeatedNegation {
+        /// The position of the original negation operator. The error position
+        /// points to the duplicate negation operator.
+        original: Span,
+    },
+    /// Expected a flag but got EOF, e.g., `(?`.
+    FlagUnexpectedEof,
+    /// Unrecognized flag, e.g., `a`.
+    FlagUnrecognized,
+    /// A duplicate capture name was found.
+    GroupNameDuplicate {
+        /// The position of the initial occurrence of the capture name. The
+        /// error position itself points to the duplicate occurrence.
+        original: Span,
+    },
+    /// A capture group name is empty, e.g., `(?P<>abc)`.
+    GroupNameEmpty,
+    /// An invalid character was seen for a capture group name. This includes
+    /// errors where the first character is a digit (even though subsequent
+    /// characters are allowed to be digits).
+    GroupNameInvalid,
+    /// A closing `>` could not be found for a capture group name.
+    GroupNameUnexpectedEof,
+    /// An unclosed group, e.g., `(ab`.
+    ///
+    /// The span of this error corresponds to the unclosed parenthesis.
+    GroupUnclosed,
+    /// An unopened group, e.g., `ab)`.
+    GroupUnopened,
+    /// The nest limit was exceeded. The limit stored here is the limit
+    /// configured in the parser.
+    NestLimitExceeded(u32),
+    /// The range provided in a counted repetition operator is invalid. The
+    /// range is invalid if the start is greater than the end.
+    RepetitionCountInvalid,
+    /// An opening `{` was found with no corresponding closing `}`.
+    RepetitionCountUnclosed,
+    /// A repetition operator was applied to a missing sub-expression. This
+    /// occurs, for example, in the regex consisting of just a `*` or even
+    /// `(?i)*`. It is, however, possible to create a repetition operating on
+    /// an empty sub-expression. For example, `()*` is still considered valid.
+    RepetitionMissing,
+    /// When octal support is disabled, this error is produced when an octal
+    /// escape is used. The octal escape is assumed to be an invocation of
+    /// a backreference, which is the common case.
+    UnsupportedBackreference,
+    /// When syntax similar to PCRE's look-around is used, this error is
+    /// returned. Some example syntaxes that are rejected include, but are
+    /// not necessarily limited to, `(?=re)`, `(?!re)`, `(?<=re)` and
+    /// `(?<!re)`. Note that all of these syntaxes are otherwise invalid; this
+    /// error is used to improve the user experience.
+    UnsupportedLookAround,
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl error::Error for Error {
+    fn description(&self) -> &str {
+        use self::ErrorKind::*;
+        match self.kind {
+            CaptureLimitExceeded => "capture group limit exceeded",
+            ClassEscapeInvalid => "invalid escape sequence in character class",
+            ClassRangeInvalid => "invalid character class range",
+            ClassRangeLiteral => "invalid range boundary, must be a literal",
+            ClassUnclosed => "unclosed character class",
+            DecimalEmpty => "empty decimal literal",
+            DecimalInvalid => "invalid decimal literal",
+            EscapeHexEmpty => "empty hexadecimal literal",
+            EscapeHexInvalid => "invalid hexadecimal literal",
+            EscapeHexInvalidDigit => "invalid hexadecimal digit",
+            EscapeUnexpectedEof => "unexpected eof (escape sequence)",
+            EscapeUnrecognized => "unrecognized escape sequence",
+            FlagDanglingNegation => "dangling flag negation operator",
+            FlagDuplicate{..} => "duplicate flag",
+            FlagRepeatedNegation{..} => "repeated negation",
+            FlagUnexpectedEof => "unexpected eof (flag)",
+            FlagUnrecognized => "unrecognized flag",
+            GroupNameDuplicate{..} => "duplicate capture group name",
+            GroupNameEmpty => "empty capture group name",
+            GroupNameInvalid => "invalid capture group name",
+            GroupNameUnexpectedEof => "unclosed capture group name",
+            GroupUnclosed => "unclosed group",
+            GroupUnopened => "unopened group",
+            NestLimitExceeded(_) => "nest limit exceeded",
+            RepetitionCountInvalid => "invalid repetition count range",
+            RepetitionCountUnclosed => "unclosed counted repetition",
+            RepetitionMissing => "repetition operator missing expression",
+            UnsupportedBackreference => "backreferences are not supported",
+            UnsupportedLookAround => "look-around is not supported",
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        ::error::Formatter::from(self).fmt(f)
+    }
+}
+
+impl fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use self::ErrorKind::*;
+        match *self {
+            CaptureLimitExceeded => {
+                write!(f, "exceeded the maximum number of \
+                           capturing groups ({})", ::std::u32::MAX)
+            }
+            ClassEscapeInvalid => {
+                write!(f, "invalid escape sequence found in character class")
+            }
+            ClassRangeInvalid => {
+                write!(f, "invalid character class range, \
+                           the start must be <= the end")
+            }
+            ClassRangeLiteral => {
+                write!(f, "invalid range boundary, must be a literal")
+            }
+            ClassUnclosed => {
+                write!(f, "unclosed character class")
+            }
+            DecimalEmpty => {
+                write!(f, "decimal literal empty")
+            }
+            DecimalInvalid => {
+                write!(f, "decimal literal invalid")
+            }
+            EscapeHexEmpty => {
+                write!(f, "hexadecimal literal empty")
+            }
+            EscapeHexInvalid => {
+                write!(f, "hexadecimal literal is not a Unicode scalar value")
+            }
+            EscapeHexInvalidDigit => {
+                write!(f, "invalid hexadecimal digit")
+            }
+            EscapeUnexpectedEof => {
+                write!(f, "incomplete escape sequence, \
+                           reached end of pattern prematurely")
+            }
+            EscapeUnrecognized => {
+                write!(f, "unrecognized escape sequence")
+            }
+            FlagDanglingNegation => {
+                write!(f, "dangling flag negation operator")
+            }
+            FlagDuplicate{..} => {
+                write!(f, "duplicate flag")
+            }
+            FlagRepeatedNegation{..} => {
+                write!(f, "flag negation operator repeated")
+            }
+            FlagUnexpectedEof => {
+                write!(f, "expected flag but got end of regex")
+            }
+            FlagUnrecognized => {
+                write!(f, "unrecognized flag")
+            }
+            GroupNameDuplicate{..} => {
+                write!(f, "duplicate capture group name")
+            }
+            GroupNameEmpty => {
+                write!(f, "empty capture group name")
+            }
+            GroupNameInvalid => {
+                write!(f, "invalid capture group character")
+            }
+            GroupNameUnexpectedEof => {
+                write!(f, "unclosed capture group name")
+            }
+            GroupUnclosed => {
+                write!(f, "unclosed group")
+            }
+            GroupUnopened => {
+                write!(f, "unopened group")
+            }
+            NestLimitExceeded(limit) => {
+                write!(f, "exceed the maximum number of \
+                           nested parentheses/brackets ({})", limit)
+            }
+            RepetitionCountInvalid => {
+                write!(f, "invalid repetition count range, \
+                           the start must be <= the end")
+            }
+            RepetitionCountUnclosed => {
+                write!(f, "unclosed counted repetition")
+            }
+            RepetitionMissing => {
+                write!(f, "repetition operator missing expression")
+            }
+            UnsupportedBackreference => {
+                write!(f, "backreferences are not supported")
+            }
+            UnsupportedLookAround => {
+                write!(f, "look-around, including look-ahead and look-behind, \
+                           is not supported")
+            }
+            _ => unreachable!(),
+        }
+    }
+}
+
+/// Span represents the position information of a single AST item.
+///
+/// All span positions are absolute byte offsets that can be used on the
+/// original regular expression that was parsed.
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub struct Span {
+    /// The start byte offset.
+    pub start: Position,
+    /// The end byte offset.
+    pub end: Position,
+}
+
+impl fmt::Debug for Span {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "Span({:?}, {:?})", self.start, self.end)
+    }
+}
+
+impl Ord for Span {
+    fn cmp(&self, other: &Span) -> Ordering {
+        (&self.start, &self.end).cmp(&(&other.start, &other.end))
+    }
+}
+
+impl PartialOrd for Span {
+    fn partial_cmp(&self, other: &Span) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+/// A single position in a regular expression.
+///
+/// A position encodes one half of a span, and include the byte offset, line
+/// number and column number.
+#[derive(Clone, Copy, Eq, PartialEq)]
+pub struct Position {
+    /// The absolute offset of this position, starting at `0` from the
+    /// beginning of the regular expression pattern string.
+    pub offset: usize,
+    /// The line number, starting at `1`.
+    pub line: usize,
+    /// The approximate column number, starting at `1`.
+    pub column: usize,
+}
+
+impl fmt::Debug for Position {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(
+            f,
+            "Position(o: {:?}, l: {:?}, c: {:?})",
+            self.offset, self.line, self.column)
+    }
+}
+
+impl Ord for Position {
+    fn cmp(&self, other: &Position) -> Ordering {
+        self.offset.cmp(&other.offset)
+    }
+}
+
+impl PartialOrd for Position {
+    fn partial_cmp(&self, other: &Position) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Span {
+    /// Create a new span with the given positions.
+    pub fn new(start: Position, end: Position) -> Span {
+        Span { start: start, end: end }
+    }
+
+    /// Create a new span using the given position as the start and end.
+    pub fn splat(pos: Position) -> Span {
+        Span::new(pos, pos)
+    }
+
+    /// Create a new span by replacing the starting the position with the one
+    /// given.
+    pub fn with_start(self, pos: Position) -> Span {
+        Span { start: pos, ..self }
+    }
+
+    /// Create a new span by replacing the ending the position with the one
+    /// given.
+    pub fn with_end(self, pos: Position) -> Span {
+        Span { end: pos, ..self }
+    }
+
+    /// Returns true if and only if this span occurs on a single line.
+    pub fn is_one_line(&self) -> bool {
+        self.start.line == self.end.line
+    }
+
+    /// Returns true if and only if this span is empty. That is, it points to
+    /// a single position in the concrete syntax of a regular expression.
+    pub fn is_empty(&self) -> bool {
+        self.start.offset == self.end.offset
+    }
+}
+
+impl Position {
+    /// Create a new position with the given information.
+    ///
+    /// `offset` is the absolute offset of the position, starting at `0` from
+    /// the beginning of the regular expression pattern string.
+    ///
+    /// `line` is the line number, starting at `1`.
+    ///
+    /// `column` is the approximate column number, starting at `1`.
+    pub fn new(offset: usize, line: usize, column: usize) -> Position {
+        Position { offset: offset, line: line, column: column }
+    }
+}
+
+/// An abstract syntax tree for a singular expression along with comments
+/// found.
+///
+/// Comments are not stored in the tree itself to avoid complexity. Each
+/// comment contains a span of precisely where it occurred in the original
+/// regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct WithComments {
+    /// The actual ast.
+    pub ast: Ast,
+    /// All comments found in the original regular expression.
+    pub comments: Vec<Comment>,
+}
+
+/// A comment from a regular expression with an associated span.
+///
+/// A regular expression can only contain comments when the `x` flag is
+/// enabled.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Comment {
+    /// The span of this comment, including the beginning `#` and ending `\n`.
+    pub span: Span,
+    /// The comment text, starting with the first character following the `#`
+    /// and ending with the last character preceding the `\n`.
+    pub comment: String,
+}
+
+/// An abstract syntax tree for a single regular expression.
+///
+/// An `Ast`'s `fmt::Display` implementation uses constant stack space and heap
+/// space proportional to the size of the `Ast`.
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the `Ast`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Ast {
+    /// An empty regex that matches everything.
+    Empty(Span),
+    /// A set of flags, e.g., `(?is)`.
+    Flags(SetFlags),
+    /// A single character literal, which includes escape sequences.
+    Literal(Literal),
+    /// The "any character" class.
+    Dot(Span),
+    /// A single zero-width assertion.
+    Assertion(Assertion),
+    /// A single character class. This includes all forms of character classes
+    /// except for `.`. e.g., `\d`, `\pN`, `[a-z]` and `[[:alpha:]]`.
+    Class(Class),
+    /// A repetition operator applied to an arbitrary regular expression.
+    Repetition(Repetition),
+    /// A grouped regular expression.
+    Group(Group),
+    /// An alternation of regular expressions.
+    Alternation(Alternation),
+    /// A concatenation of regular expressions.
+    Concat(Concat),
+}
+
+impl Ast {
+    /// Return the span of this abstract syntax tree.
+    pub fn span(&self) -> &Span {
+        match *self {
+            Ast::Empty(ref span) => span,
+            Ast::Flags(ref x) => &x.span,
+            Ast::Literal(ref x) => &x.span,
+            Ast::Dot(ref span) => span,
+            Ast::Assertion(ref x) => &x.span,
+            Ast::Class(ref x) => x.span(),
+            Ast::Repetition(ref x) => &x.span,
+            Ast::Group(ref x) => &x.span,
+            Ast::Alternation(ref x) => &x.span,
+            Ast::Concat(ref x) => &x.span,
+        }
+    }
+
+    /// Return true if and only if this Ast is empty.
+    pub fn is_empty(&self) -> bool {
+        match *self {
+            Ast::Empty(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if and only if this AST has any (including possibly empty)
+    /// subexpressions.
+    fn has_subexprs(&self) -> bool {
+        match *self {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_) => false,
+            Ast::Class(_)
+            | Ast::Repetition(_)
+            | Ast::Group(_)
+            | Ast::Alternation(_)
+            | Ast::Concat(_) => true,
+        }
+    }
+}
+
+/// Print a display representation of this Ast.
+///
+/// This does not preserve any of the original whitespace formatting that may
+/// have originally been present in the concrete syntax from which this Ast
+/// was generated.
+///
+/// This implementation uses constant stack space and heap space proportional
+/// to the size of the `Ast`.
+impl fmt::Display for Ast {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use ast::print::Printer;
+        Printer::new().print(self, f)
+    }
+}
+
+/// An alternation of regular expressions.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Alternation {
+    /// The span of this alternation.
+    pub span: Span,
+    /// The alternate regular expressions.
+    pub asts: Vec<Ast>,
+}
+
+impl Alternation {
+    /// Return this alternation as an AST.
+    ///
+    /// If this alternation contains zero ASTs, then Ast::Empty is
+    /// returned. If this alternation contains exactly 1 AST, then the
+    /// corresponding AST is returned. Otherwise, Ast::Alternation is returned.
+    pub fn into_ast(mut self) -> Ast {
+        match self.asts.len() {
+            0 => Ast::Empty(self.span),
+            1 => self.asts.pop().unwrap(),
+            _ => Ast::Alternation(self),
+        }
+    }
+}
+
+/// A concatenation of regular expressions.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Concat {
+    /// The span of this concatenation.
+    pub span: Span,
+    /// The concatenation regular expressions.
+    pub asts: Vec<Ast>,
+}
+
+impl Concat {
+    /// Return this concatenation as an AST.
+    ///
+    /// If this concatenation contains zero ASTs, then Ast::Empty is
+    /// returned. If this concatenation contains exactly 1 AST, then the
+    /// corresponding AST is returned. Otherwise, Ast::Concat is returned.
+    pub fn into_ast(mut self) -> Ast {
+        match self.asts.len() {
+            0 => Ast::Empty(self.span),
+            1 => self.asts.pop().unwrap(),
+            _ => Ast::Concat(self),
+        }
+    }
+}
+
+/// A single literal expression.
+///
+/// A literal corresponds to a single Unicode scalar value. Literals may be
+/// represented in their literal form, e.g., `a` or in their escaped form,
+/// e.g., `\x61`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Literal {
+    /// The span of this literal.
+    pub span: Span,
+    /// The kind of this literal.
+    pub kind: LiteralKind,
+    /// The Unicode scalar value corresponding to this literal.
+    pub c: char,
+}
+
+impl Literal {
+    /// If this literal was written as a `\x` hex escape, then this returns
+    /// the corresponding byte value. Otherwise, this returns `None`.
+    pub fn byte(&self) -> Option<u8> {
+        let short_hex = LiteralKind::HexFixed(HexLiteralKind::X);
+        if self.c as u32 <= 255 && self.kind == short_hex {
+            Some(self.c as u8)
+        } else {
+            None
+        }
+    }
+}
+
+/// The kind of a single literal expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum LiteralKind {
+    /// The literal is written verbatim, e.g., `a` or `☃`.
+    Verbatim,
+    /// The literal is written as an escape because it is punctuation, e.g.,
+    /// `\*` or `\[`.
+    Punctuation,
+    /// The literal is written as an octal escape, e.g., `\141`.
+    Octal,
+    /// The literal is written as a hex code with a fixed number of digits
+    /// depending on the type of the escape, e.g., `\x61` or or `\u0061` or
+    /// `\U00000061`.
+    HexFixed(HexLiteralKind),
+    /// The literal is written as a hex code with a bracketed number of
+    /// digits. The only restriction is that the bracketed hex code must refer
+    /// to a valid Unicode scalar value.
+    HexBrace(HexLiteralKind),
+    /// The literal is written as a specially recognized escape, e.g., `\f`
+    /// or `\n`.
+    Special(SpecialLiteralKind),
+}
+
+/// The type of a special literal.
+///
+/// A special literal is a special escape sequence recognized by the regex
+/// parser, e.g., `\f` or `\n`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum SpecialLiteralKind {
+    /// Bell, spelled `\a` (`\x07`).
+    Bell,
+    /// Form feed, spelled `\f` (`\x0C`).
+    FormFeed,
+    /// Tab, spelled `\t` (`\x09`).
+    Tab,
+    /// Line feed, spelled `\n` (`\x0A`).
+    LineFeed,
+    /// Carriage return, spelled `\r` (`\x0D`).
+    CarriageReturn,
+    /// Vertical tab, spelled `\v` (`\x0B`).
+    VerticalTab,
+    /// Space, spelled `\ ` (`\x20`). Note that this can only appear when
+    /// parsing in verbose mode.
+    Space,
+}
+
+/// The type of a Unicode hex literal.
+///
+/// Note that all variants behave the same when used with brackets. They only
+/// differ when used without brackets in the number of hex digits that must
+/// follow.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HexLiteralKind {
+    /// A `\x` prefix. When used without brackets, this form is limited to
+    /// two digits.
+    X,
+    /// A `\u` prefix. When used without brackets, this form is limited to
+    /// four digits.
+    UnicodeShort,
+    /// A `\U` prefix. When used without brackets, this form is limited to
+    /// eight digits.
+    UnicodeLong,
+}
+
+impl HexLiteralKind {
+    /// The number of digits that must be used with this literal form when
+    /// used without brackets. When used with brackets, there is no
+    /// restriction on the number of digits.
+    pub fn digits(&self) -> u32 {
+        match *self {
+            HexLiteralKind::X => 2,
+            HexLiteralKind::UnicodeShort => 4,
+            HexLiteralKind::UnicodeLong => 8,
+        }
+    }
+}
+
+/// A single character class expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Class {
+    /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
+    Unicode(ClassUnicode),
+    /// A perl character class, e.g., `\d` or `\W`.
+    Perl(ClassPerl),
+    /// A bracketed character class set, which may contain zero or more
+    /// character ranges and/or zero or more nested classes. e.g.,
+    /// `[a-zA-Z\pL]`.
+    Bracketed(ClassBracketed),
+}
+
+impl Class {
+    /// Return the span of this character class.
+    pub fn span(&self) -> &Span {
+        match *self {
+            Class::Perl(ref x) => &x.span,
+            Class::Unicode(ref x) => &x.span,
+            Class::Bracketed(ref x) => &x.span,
+        }
+    }
+}
+
+/// A Perl character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassPerl {
+    /// The span of this class.
+    pub span: Span,
+    /// The kind of Perl class.
+    pub kind: ClassPerlKind,
+    /// Whether the class is negated or not. e.g., `\d` is not negated but
+    /// `\D` is.
+    pub negated: bool,
+}
+
+/// The available Perl character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassPerlKind {
+    /// Decimal numbers.
+    Digit,
+    /// Whitespace.
+    Space,
+    /// Word characters.
+    Word,
+}
+
+/// An ASCII character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassAscii {
+    /// The span of this class.
+    pub span: Span,
+    /// The kind of ASCII class.
+    pub kind: ClassAsciiKind,
+    /// Whether the class is negated or not. e.g., `[[:alpha:]]` is not negated
+    /// but `[[:^alpha:]]` is.
+    pub negated: bool,
+}
+
+/// The available ASCII character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassAsciiKind {
+    /// `[0-9A-Za-z]`
+    Alnum,
+    /// `[A-Za-z]`
+    Alpha,
+    /// `[\x00-\x7F]`
+    Ascii,
+    /// `[ \t]`
+    Blank,
+    /// `[\x00-\x1F\x7F]`
+    Cntrl,
+    /// `[0-9]`
+    Digit,
+    /// `[!-~]`
+    Graph,
+    /// `[a-z]`
+    Lower,
+    /// `[ -~]`
+    Print,
+    /// `[!-/:-@\[-`{-~]`
+    Punct,
+    /// `[\t\n\v\f\r ]`
+    Space,
+    /// `[A-Z]`
+    Upper,
+    /// `[0-9A-Za-z_]`
+    Word,
+    /// `[0-9A-Fa-f]`
+    Xdigit,
+}
+
+impl ClassAsciiKind {
+    /// Return the corresponding ClassAsciiKind variant for the given name.
+    ///
+    /// The name given should correspond to the lowercase version of the
+    /// variant name. e.g., `cntrl` is the name for `ClassAsciiKind::Cntrl`.
+    ///
+    /// If no variant with the corresponding name exists, then `None` is
+    /// returned.
+    pub fn from_name(name: &str) -> Option<ClassAsciiKind> {
+        use self::ClassAsciiKind::*;
+        match name {
+            "alnum" => Some(Alnum),
+            "alpha" => Some(Alpha),
+            "ascii" => Some(Ascii),
+            "blank" => Some(Blank),
+            "cntrl" => Some(Cntrl),
+            "digit" => Some(Digit),
+            "graph" => Some(Graph),
+            "lower" => Some(Lower),
+            "print" => Some(Print),
+            "punct" => Some(Punct),
+            "space" => Some(Space),
+            "upper" => Some(Upper),
+            "word" => Some(Word),
+            "xdigit" => Some(Xdigit),
+            _ => None,
+        }
+    }
+}
+
+/// A Unicode character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+    /// The span of this class.
+    pub span: Span,
+    /// Whether this class is negated or not.
+    ///
+    /// Note: be careful when using this attribute. This specifically refers
+    /// to whether the class is written as `\p` or `\P`, where the latter
+    /// is `negated = true`. However, it also possible to write something like
+    /// `\P{scx!=Katakana}` which is actually equivalent to
+    /// `\p{scx=Katakana}` and is therefore not actually negated even though
+    /// `negated = true` here. To test whether this class is truly negated
+    /// or not, use the `is_negated` method.
+    pub negated: bool,
+    /// The kind of Unicode class.
+    pub kind: ClassUnicodeKind,
+}
+
+impl ClassUnicode {
+    /// Returns true if this class has been negated.
+    ///
+    /// Note that this takes the Unicode op into account, if it's present.
+    /// e.g., `is_negated` for `\P{scx!=Katakana}` will return `false`.
+    pub fn is_negated(&self) -> bool {
+        match self.kind {
+            ClassUnicodeKind::NamedValue {
+                op: ClassUnicodeOpKind::NotEqual, ..
+            } => !self.negated,
+            _ => self.negated,
+        }
+    }
+}
+
+/// The available forms of Unicode character classes.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassUnicodeKind {
+    /// A one letter abbreviated class, e.g., `\pN`.
+    OneLetter(char),
+    /// A binary property, general category or script. The string may be
+    /// empty.
+    Named(String),
+    /// A property name and an associated value.
+    NamedValue {
+        /// The type of Unicode op used to associate `name` with `value`.
+        op: ClassUnicodeOpKind,
+        /// The property name (which may be empty).
+        name: String,
+        /// The property value (which may be empty).
+        value: String,
+    },
+}
+
+/// The type of op used in a Unicode character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassUnicodeOpKind {
+    /// A property set to a specific value, e.g., `\p{scx=Katakana}`.
+    Equal,
+    /// A property set to a specific value using a colon, e.g.,
+    /// `\p{scx:Katakana}`.
+    Colon,
+    /// A property that isn't a particular value, e.g., `\p{scx!=Katakana}`.
+    NotEqual,
+}
+
+impl ClassUnicodeOpKind {
+    /// Whether the op is an equality op or not.
+    pub fn is_equal(&self) -> bool {
+        match *self {
+            ClassUnicodeOpKind::Equal|ClassUnicodeOpKind::Colon => true,
+            _ => false,
+        }
+    }
+}
+
+/// A bracketed character class, e.g., `[a-z0-9]`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassBracketed {
+    /// The span of this class.
+    pub span: Span,
+    /// Whether this class is negated or not. e.g., `[a]` is not negated but
+    /// `[^a]` is.
+    pub negated: bool,
+    /// The type of this set. A set is either a normal union of things, e.g.,
+    /// `[abc]` or a result of applying set operations, e.g., `[\pL--c]`.
+    pub kind: ClassSet,
+}
+
+/// A character class set.
+///
+/// This type corresponds to the internal structure of a bracketed character
+/// class. That is, every bracketed character is one of two types: a union of
+/// items (literals, ranges, other bracketed classes) or a tree of binary set
+/// operations.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassSet {
+    /// An item, which can be a single literal, range, nested character class
+    /// or a union of items.
+    Item(ClassSetItem),
+    /// A single binary operation (i.e., &&, -- or ~~).
+    BinaryOp(ClassSetBinaryOp),
+}
+
+impl ClassSet {
+    /// Build a set from a union.
+    pub fn union(ast: ClassSetUnion) -> ClassSet {
+        ClassSet::Item(ClassSetItem::Union(ast))
+    }
+
+    /// Return the span of this character class set.
+    pub fn span(&self) -> &Span {
+        match *self {
+            ClassSet::Item(ref x) => x.span(),
+            ClassSet::BinaryOp(ref x) => &x.span,
+        }
+    }
+
+    /// Return true if and only if this class set is empty.
+    fn is_empty(&self) -> bool {
+        match *self {
+            ClassSet::Item(ClassSetItem::Empty(_)) => true,
+            _ => false,
+        }
+    }
+}
+
+/// A single component of a character class set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ClassSetItem {
+    /// An empty item.
+    ///
+    /// Note that a bracketed character class cannot contain a single empty
+    /// item. Empty items can appear when using one of the binary operators.
+    /// For example, `[&&]` is the intersection of two empty classes.
+    Empty(Span),
+    /// A single literal.
+    Literal(Literal),
+    /// A range between two literals.
+    Range(ClassSetRange),
+    /// An ASCII character class, e.g., `[:alnum:]` or `[:punct:]`.
+    Ascii(ClassAscii),
+    /// A Unicode character class, e.g., `\pL` or `\p{Greek}`.
+    Unicode(ClassUnicode),
+    /// A perl character class, e.g., `\d` or `\W`.
+    Perl(ClassPerl),
+    /// A bracketed character class set, which may contain zero or more
+    /// character ranges and/or zero or more nested classes. e.g.,
+    /// `[a-zA-Z\pL]`.
+    Bracketed(Box<ClassBracketed>),
+    /// A union of items.
+    Union(ClassSetUnion),
+}
+
+impl ClassSetItem {
+    /// Return the span of this character class set item.
+    pub fn span(&self) -> &Span {
+        match *self {
+            ClassSetItem::Empty(ref span) => span,
+            ClassSetItem::Literal(ref x) => &x.span,
+            ClassSetItem::Range(ref x) => &x.span,
+            ClassSetItem::Ascii(ref x) => &x.span,
+            ClassSetItem::Perl(ref x) => &x.span,
+            ClassSetItem::Unicode(ref x) => &x.span,
+            ClassSetItem::Bracketed(ref x) => &x.span,
+            ClassSetItem::Union(ref x) => &x.span,
+        }
+    }
+}
+
+/// A single character class range in a set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetRange {
+    /// The span of this range.
+    pub span: Span,
+    /// The start of this range.
+    pub start: Literal,
+    /// The end of this range.
+    pub end: Literal,
+}
+
+impl ClassSetRange {
+    /// Returns true if and only if this character class range is valid.
+    ///
+    /// The only case where a range is invalid is if its start is greater than
+    /// its end.
+    pub fn is_valid(&self) -> bool {
+        self.start.c <= self.end.c
+    }
+}
+
+/// A union of items inside a character class set.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetUnion {
+    /// The span of the items in this operation. e.g., the `a-z0-9` in
+    /// `[^a-z0-9]`
+    pub span: Span,
+    /// The sequence of items that make up this union.
+    pub items: Vec<ClassSetItem>,
+}
+
+impl ClassSetUnion {
+    /// Push a new item in this union.
+    ///
+    /// The ending position of this union's span is updated to the ending
+    /// position of the span of the item given. If the union is empty, then
+    /// the starting position of this union is set to the starting position
+    /// of this item.
+    ///
+    /// In other words, if you only use this method to add items to a union
+    /// and you set the spans on each item correctly, then you should never
+    /// need to adjust the span of the union directly.
+    pub fn push(&mut self, item: ClassSetItem) {
+        if self.items.is_empty() {
+            self.span.start = item.span().start;
+        }
+        self.span.end = item.span().end;
+        self.items.push(item);
+    }
+
+    /// Return this union as a character class set item.
+    ///
+    /// If this union contains zero items, then an empty union is
+    /// returned. If this concatenation contains exactly 1 item, then the
+    /// corresponding item is returned. Otherwise, ClassSetItem::Union is
+    /// returned.
+    pub fn into_item(mut self) -> ClassSetItem {
+        match self.items.len() {
+            0 => ClassSetItem::Empty(self.span),
+            1 => self.items.pop().unwrap(),
+            _ => ClassSetItem::Union(self),
+        }
+    }
+}
+
+/// A Unicode character class set operation.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassSetBinaryOp {
+    /// The span of this operation. e.g., the `a-z--[h-p]` in `[a-z--h-p]`.
+    pub span: Span,
+    /// The type of this set operation.
+    pub kind: ClassSetBinaryOpKind,
+    /// The left hand side of the operation.
+    pub lhs: Box<ClassSet>,
+    /// The right hand side of the operation.
+    pub rhs: Box<ClassSet>,
+}
+
+/// The type of a Unicode character class set operation.
+///
+/// Note that this doesn't explicitly represent union since there is no
+/// explicit union operator. Concatenation inside a character class corresponds
+/// to the union operation.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum ClassSetBinaryOpKind {
+    /// The intersection of two sets, e.g., `\pN&&[a-z]`.
+    Intersection,
+    /// The difference of two sets, e.g., `\pN--[0-9]`.
+    Difference,
+    /// The symmetric difference of two sets. The symmetric difference is the
+    /// set of elements belonging to one but not both sets.
+    /// e.g., `[\pL~~[:ascii:]]`.
+    SymmetricDifference,
+}
+
+/// A single zero-width assertion.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Assertion {
+    /// The span of this assertion.
+    pub span: Span,
+    /// The assertion kind, e.g., `\b` or `^`.
+    pub kind: AssertionKind,
+}
+
+/// An assertion kind.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum AssertionKind {
+    /// `^`
+    StartLine,
+    /// `$`
+    EndLine,
+    /// `\A`
+    StartText,
+    /// `\z`
+    EndText,
+    /// `\b`
+    WordBoundary,
+    /// `\B`
+    NotWordBoundary,
+}
+
+/// A repetition operation applied to a regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+    /// The span of this operation.
+    pub span: Span,
+    /// The actual operation.
+    pub op: RepetitionOp,
+    /// Whether this operation was applied greedily or not.
+    pub greedy: bool,
+    /// The regular expression under repetition.
+    pub ast: Box<Ast>,
+}
+
+/// The repetition operator itself.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct RepetitionOp {
+    /// The span of this operator. This includes things like `+`, `*?` and
+    /// `{m,n}`.
+    pub span: Span,
+    /// The type of operation.
+    pub kind: RepetitionKind,
+}
+
+/// The kind of a repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionKind {
+    /// `?`
+    ZeroOrOne,
+    /// `*`
+    ZeroOrMore,
+    /// `+`
+    OneOrMore,
+    /// `{m,n}`
+    Range(RepetitionRange),
+}
+
+/// A range repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionRange {
+    /// `{m}`
+    Exactly(u32),
+    /// `{m,}`
+    AtLeast(u32),
+    /// `{m,n}`
+    Bounded(u32, u32),
+}
+
+impl RepetitionRange {
+    /// Returns true if and only if this repetition range is valid.
+    ///
+    /// The only case where a repetition range is invalid is if it is bounded
+    /// and its start is greater than its end.
+    pub fn is_valid(&self) -> bool {
+        match *self {
+            RepetitionRange::Bounded(s, e) if s > e => false,
+            _ => true,
+        }
+    }
+}
+
+/// A grouped regular expression.
+///
+/// This includes both capturing and non-capturing groups. This does **not**
+/// include flag-only groups like `(?is)`, but does contain any group that
+/// contains a sub-expression, e.g., `(a)`, `(?P<name>a)`, `(?:a)` and
+/// `(?is:a)`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Group {
+    /// The span of this group.
+    pub span: Span,
+    /// The kind of this group.
+    pub kind: GroupKind,
+    /// The regular expression in this group.
+    pub ast: Box<Ast>,
+}
+
+impl Group {
+    /// If this group is non-capturing, then this returns the (possibly empty)
+    /// set of flags. Otherwise, `None` is returned.
+    pub fn flags(&self) -> Option<&Flags> {
+        match self.kind {
+            GroupKind::NonCapturing(ref flags) => Some(flags),
+            _ => None,
+        }
+    }
+
+    /// Returns true if and only if this group is capturing.
+    pub fn is_capturing(&self) -> bool {
+        match self.kind {
+            GroupKind::CaptureIndex(_) | GroupKind::CaptureName(_) => true,
+            GroupKind::NonCapturing(_) => false,
+        }
+    }
+
+    /// Returns the capture index of this group, if this is a capturing group.
+    ///
+    /// This returns a capture index precisely when `is_capturing` is `true`.
+    pub fn capture_index(&self) -> Option<u32> {
+        match self.kind {
+            GroupKind::CaptureIndex(i) => Some(i),
+            GroupKind::CaptureName(ref x) => Some(x.index),
+            GroupKind::NonCapturing(_) => None,
+        }
+    }
+}
+
+/// The kind of a group.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum GroupKind {
+    /// `(a)`
+    CaptureIndex(u32),
+    /// `(?P<name>a)`
+    CaptureName(CaptureName),
+    /// `(?:a)` and `(?i:a)`
+    NonCapturing(Flags),
+}
+
+/// A capture name.
+///
+/// This corresponds to the name itself between the angle brackets in, e.g.,
+/// `(?P<foo>expr)`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct CaptureName {
+    /// The span of this capture name.
+    pub span: Span,
+    /// The capture name.
+    pub name: String,
+    /// The capture index.
+    pub index: u32,
+}
+
+/// A group of flags that is not applied to a particular regular expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct SetFlags {
+    /// The span of these flags, including the grouping parentheses.
+    pub span: Span,
+    /// The actual sequence of flags.
+    pub flags: Flags,
+}
+
+/// A group of flags.
+///
+/// This corresponds only to the sequence of flags themselves, e.g., `is-u`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Flags {
+    /// The span of this group of flags.
+    pub span: Span,
+    /// A sequence of flag items. Each item is either a flag or a negation
+    /// operator.
+    pub items: Vec<FlagsItem>,
+}
+
+impl Flags {
+    /// Add the given item to this sequence of flags.
+    ///
+    /// If the item was added successfully, then `None` is returned. If the
+    /// given item is a duplicate, then `Some(i)` is returned, where
+    /// `items[i].kind == item.kind`.
+    pub fn add_item(&mut self, item: FlagsItem) -> Option<usize> {
+        for (i, x) in self.items.iter().enumerate() {
+            if x.kind == item.kind {
+                return Some(i);
+            }
+        }
+        self.items.push(item);
+        None
+    }
+
+    /// Returns the state of the given flag in this set.
+    ///
+    /// If the given flag is in the set but is negated, then `Some(false)` is
+    /// returned.
+    ///
+    /// If the given flag is in the set and is not negated, then `Some(true)`
+    /// is returned.
+    ///
+    /// Otherwise, `None` is returned.
+    pub fn flag_state(&self, flag: Flag) -> Option<bool> {
+        let mut negated = false;
+        for x in &self.items {
+            match x.kind {
+                FlagsItemKind::Negation => {
+                    negated = true;
+                }
+                FlagsItemKind::Flag(ref xflag) if xflag == &flag => {
+                    return Some(!negated);
+                }
+                _ => {}
+            }
+        }
+        None
+    }
+}
+
+/// A single item in a group of flags.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct FlagsItem {
+    /// The span of this item.
+    pub span: Span,
+    /// The kind of this item.
+    pub kind: FlagsItemKind,
+}
+
+/// The kind of an item in a group of flags.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum FlagsItemKind {
+    /// A negation operator applied to all subsequent flags in the enclosing
+    /// group.
+    Negation,
+    /// A single flag in a group.
+    Flag(Flag),
+}
+
+impl FlagsItemKind {
+    /// Returns true if and only if this item is a negation operator.
+    pub fn is_negation(&self) -> bool {
+        match *self {
+            FlagsItemKind::Negation => true,
+            _ => false,
+        }
+    }
+}
+
+/// A single flag.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum Flag {
+    /// `i`
+    CaseInsensitive,
+    /// `m`
+    MultiLine,
+    /// `s`
+    DotMatchesNewLine,
+    /// `U`
+    SwapGreed,
+    /// `u`
+    Unicode,
+    /// `x`
+    IgnoreWhitespace,
+}
+
+/// A custom `Drop` impl is used for `Ast` such that it uses constant stack
+/// space but heap space proportional to the depth of the `Ast`.
+impl Drop for Ast {
+    fn drop(&mut self) {
+        use std::mem;
+
+        match *self {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_)
+            // Classes are recursive, so they get their own Drop impl.
+            | Ast::Class(_) => return,
+            Ast::Repetition(ref x) if !x.ast.has_subexprs() => return,
+            Ast::Group(ref x) if !x.ast.has_subexprs() => return,
+            Ast::Alternation(ref x) if x.asts.is_empty() => return,
+            Ast::Concat(ref x) if x.asts.is_empty() => return,
+            _ => {}
+        }
+
+        let empty_span = || Span::splat(Position::new(0, 0, 0));
+        let empty_ast = || Ast::Empty(empty_span());
+        let mut stack = vec![mem::replace(self, empty_ast())];
+        while let Some(mut ast) = stack.pop() {
+            match ast {
+                Ast::Empty(_)
+                | Ast::Flags(_)
+                | Ast::Literal(_)
+                | Ast::Dot(_)
+                | Ast::Assertion(_)
+                // Classes are recursive, so they get their own Drop impl.
+                | Ast::Class(_) => {}
+                Ast::Repetition(ref mut x) => {
+                    stack.push(mem::replace(&mut x.ast, empty_ast()));
+                }
+                Ast::Group(ref mut x) => {
+                    stack.push(mem::replace(&mut x.ast, empty_ast()));
+                }
+                Ast::Alternation(ref mut x) => {
+                    stack.extend(x.asts.drain(..));
+                }
+                Ast::Concat(ref mut x) => {
+                    stack.extend(x.asts.drain(..));
+                }
+            }
+        }
+    }
+}
+
+/// A custom `Drop` impl is used for `ClassSet` such that it uses constant
+/// stack space but heap space proportional to the depth of the `ClassSet`.
+impl Drop for ClassSet {
+    fn drop(&mut self) {
+        use std::mem;
+
+        match *self {
+            ClassSet::Item(ref item) => {
+                match *item {
+                    ClassSetItem::Empty(_)
+                    | ClassSetItem::Literal(_)
+                    | ClassSetItem::Range(_)
+                    | ClassSetItem::Ascii(_)
+                    | ClassSetItem::Unicode(_)
+                    | ClassSetItem::Perl(_) => return,
+                    ClassSetItem::Bracketed(ref x) => {
+                        if x.kind.is_empty() {
+                            return;
+                        }
+                    }
+                    ClassSetItem::Union(ref x) => {
+                        if x.items.is_empty() {
+                            return;
+                        }
+                    }
+                }
+            }
+            ClassSet::BinaryOp(ref op) => {
+                if op.lhs.is_empty() && op.rhs.is_empty() {
+                    return;
+                }
+            }
+        }
+
+        let empty_span = || Span::splat(Position::new(0, 0, 0));
+        let empty_set = || ClassSet::Item(ClassSetItem::Empty(empty_span()));
+        let mut stack = vec![mem::replace(self, empty_set())];
+        while let Some(mut set) = stack.pop() {
+            match set {
+                ClassSet::Item(ref mut item) => {
+                    match *item {
+                        ClassSetItem::Empty(_)
+                        | ClassSetItem::Literal(_)
+                        | ClassSetItem::Range(_)
+                        | ClassSetItem::Ascii(_)
+                        | ClassSetItem::Unicode(_)
+                        | ClassSetItem::Perl(_) => {}
+                        ClassSetItem::Bracketed(ref mut x) => {
+                            stack.push(mem::replace(&mut x.kind, empty_set()));
+                        }
+                        ClassSetItem::Union(ref mut x) => {
+                            stack.extend(
+                                x.items.drain(..).map(ClassSet::Item));
+                        }
+                    }
+                }
+                ClassSet::BinaryOp(ref mut op) => {
+                    stack.push(mem::replace(&mut op.lhs, empty_set()));
+                    stack.push(mem::replace(&mut op.rhs, empty_set()));
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // We use a thread with an explicit stack size to test that our destructor
+    // for Ast can handle arbitrarily sized expressions in constant stack
+    // space. In case we run on a platform without threads (WASM?), we limit
+    // this test to Windows/Unix.
+    #[test]
+    #[cfg(any(unix, windows))]
+    fn no_stack_overflow_on_drop() {
+        use std::thread;
+
+        let run = || {
+            let span = || Span::splat(Position::new(0, 0, 0));
+            let mut ast = Ast::Empty(span());
+            for i in 0..200 {
+                ast = Ast::Group(Group {
+                    span: span(),
+                    kind: GroupKind::CaptureIndex(i),
+                    ast: Box::new(ast),
+                });
+            }
+            assert!(!ast.is_empty());
+        };
+
+        // We run our test on a thread with a small stack size so we can
+        // force the issue more easily.
+        thread::Builder::new()
+            .stack_size(1<<10)
+            .spawn(run)
+            .unwrap()
+            .join()
+            .unwrap();
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/ast/parse.rs
@@ -0,0 +1,5365 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+This module provides a regular expression parser.
+*/
+
+use std::borrow::Borrow;
+use std::cell::{Cell, RefCell};
+use std::mem;
+use std::result;
+
+use ast::{self, Ast, Position, Span};
+use either::Either;
+
+use is_meta_character;
+
+type Result<T> = result::Result<T, ast::Error>;
+
+/// A primitive is an expression with no sub-expressions. This includes
+/// literals, assertions and non-set character classes. This representation
+/// is used as intermediate state in the parser.
+///
+/// This does not include ASCII character classes, since they can only appear
+/// within a set character class.
+#[derive(Clone, Debug, Eq, PartialEq)]
+enum Primitive {
+    Literal(ast::Literal),
+    Assertion(ast::Assertion),
+    Dot(Span),
+    Perl(ast::ClassPerl),
+    Unicode(ast::ClassUnicode),
+}
+
+impl Primitive {
+    /// Return the span of this primitive.
+    fn span(&self) -> &Span {
+        match *self {
+            Primitive::Literal(ref x) => &x.span,
+            Primitive::Assertion(ref x) => &x.span,
+            Primitive::Dot(ref span) => span,
+            Primitive::Perl(ref x) => &x.span,
+            Primitive::Unicode(ref x) => &x.span,
+        }
+    }
+
+    /// Convert this primitive into a proper AST.
+    fn into_ast(self) -> Ast {
+        match self {
+            Primitive::Literal(lit) => Ast::Literal(lit),
+            Primitive::Assertion(assert) => Ast::Assertion(assert),
+            Primitive::Dot(span) => Ast::Dot(span),
+            Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
+            Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
+        }
+    }
+
+    /// Convert this primitive into an item in a character class.
+    ///
+    /// If this primitive is not a legal item (i.e., an assertion or a dot),
+    /// then return an error.
+    fn into_class_set_item<P: Borrow<Parser>>(
+        self,
+        p: &ParserI<P>,
+    ) -> Result<ast::ClassSetItem> {
+        use ast::ClassSetItem;
+        use self::Primitive::*;
+
+        match self {
+            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
+            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
+            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
+            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
+        }
+    }
+
+    /// Convert this primitive into a literal in a character class. In
+    /// particular, literals are the only valid items that can appear in
+    /// ranges.
+    ///
+    /// If this primitive is not a legal item (i.e., a class, assertion or a
+    /// dot), then return an error.
+    fn into_class_literal<P: Borrow<Parser>>(
+        self,
+        p: &ParserI<P>,
+    ) -> Result<ast::Literal> {
+        use self::Primitive::*;
+
+        match self {
+            Literal(lit) => Ok(lit),
+            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
+        }
+    }
+}
+
+/// Returns true if the given character is a hexadecimal digit.
+fn is_hex(c: char) -> bool {
+    ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
+}
+
+/// Returns true if the given character is a valid in a capture group name.
+///
+/// If `first` is true, then `c` is treated as the first character in the
+/// group name (which is not allowed to be a digit).
+fn is_capture_char(c: char, first: bool) -> bool {
+    c == '_' || (!first && c >= '0' && c <= '9')
+    || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
+}
+
+/// A builder for a regular expression parser.
+///
+/// This builder permits modifying configuration options for the parser.
+#[derive(Clone, Debug)]
+pub struct ParserBuilder {
+    ignore_whitespace: bool,
+    nest_limit: u32,
+    octal: bool,
+}
+
+impl Default for ParserBuilder {
+    fn default() -> ParserBuilder {
+        ParserBuilder::new()
+    }
+}
+
+impl ParserBuilder {
+    /// Create a new parser builder with a default configuration.
+    pub fn new() -> ParserBuilder {
+        ParserBuilder {
+            ignore_whitespace: false,
+            nest_limit: 250,
+            octal: false,
+        }
+    }
+
+    /// Build a parser from this configuration with the given pattern.
+    pub fn build(&self) -> Parser {
+        Parser {
+            pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
+            capture_index: Cell::new(0),
+            nest_limit: self.nest_limit,
+            octal: self.octal,
+            initial_ignore_whitespace: self.ignore_whitespace,
+            ignore_whitespace: Cell::new(self.ignore_whitespace),
+            comments: RefCell::new(vec![]),
+            stack_group: RefCell::new(vec![]),
+            stack_class: RefCell::new(vec![]),
+            capture_names: RefCell::new(vec![]),
+            scratch: RefCell::new(String::new()),
+        }
+    }
+
+    /// Set the nesting limit for this parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow for consumers that do structural induction on an `Ast` using
+    /// explicit recursion. While this crate never does this (instead using
+    /// constant stack space and moving the call stack to the heap), other
+    /// crates may.
+    ///
+    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since this parser
+    /// implementation will limit itself to heap space proportional to the
+    /// lenth of the pattern string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation, which results in a nest
+    /// depth of `1`. In general, a nest limit is not something that manifests
+    /// in an obvious way in the concrete syntax, therefore, it should not be
+    /// used in a granular way.
+    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
+        self.nest_limit = limit;
+        self
+    }
+
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\0` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.octal = yes;
+        self
+    }
+
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ignore_whitespace = yes;
+        self
+    }
+}
+
+/// A regular expression parser.
+///
+/// This parses a string representation of a regular expression into an
+/// abstract syntax tree. The size of the tree is proportional to the length
+/// of the regular expression pattern.
+///
+/// A `Parser` can be configured in more detail via a
+/// [`ParserBuilder`](struct.ParserBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Parser {
+    /// The current position of the parser.
+    pos: Cell<Position>,
+    /// The current capture index.
+    capture_index: Cell<u32>,
+    /// The maximum number of open parens/brackets allowed. If the parser
+    /// exceeds this number, then an error is returned.
+    nest_limit: u32,
+    /// Whether to support octal syntax or not. When `false`, the parser will
+    /// return an error helpfully pointing out that backreferences are not
+    /// supported.
+    octal: bool,
+    /// The initial setting for `ignore_whitespace` as provided by
+    /// Th`ParserBuilder`. is is used when reseting the parser's state.
+    initial_ignore_whitespace: bool,
+    /// Whether whitespace should be ignored. When enabled, comments are
+    /// also permitted.
+    ignore_whitespace: Cell<bool>,
+    /// A list of comments, in order of appearance.
+    comments: RefCell<Vec<ast::Comment>>,
+    /// A stack of grouped sub-expressions, including alternations.
+    stack_group: RefCell<Vec<GroupState>>,
+    /// A stack of nested character classes. This is only non-empty when
+    /// parsing a class.
+    stack_class: RefCell<Vec<ClassState>>,
+    /// A sorted sequence of capture names. This is used to detect duplicate
+    /// capture names and report an error if one is detected.
+    capture_names: RefCell<Vec<ast::CaptureName>>,
+    /// A scratch buffer used in various places. Mostly this is used to
+    /// accumulate relevant characters from parts of a pattern.
+    scratch: RefCell<String>,
+}
+
+/// ParserI is the internal parser implementation.
+///
+/// We use this separate type so that we can carry the provided pattern string
+/// along with us. In particular, a `Parser` internal state is not tied to any
+/// one pattern, but `ParserI` is.
+///
+/// This type also lets us use `ParserI<&Parser>` in production code while
+/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
+/// work against the internal interface of the parser.
+#[derive(Clone, Debug)]
+struct ParserI<'s, P> {
+    /// The parser state/configuration.
+    parser: P,
+    /// The full regular expression provided by the user.
+    pattern: &'s str,
+}
+
+/// GroupState represents a single stack frame while parsing nested groups
+/// and alternations. Each frame records the state up to an opening parenthesis
+/// or a alternating bracket `|`.
+#[derive(Clone, Debug)]
+enum GroupState {
+    /// This state is pushed whenever an opening group is found.
+    Group {
+        /// The concatenation immediately preceding the opening group.
+        concat: ast::Concat,
+        /// The group that has been opened. Its sub-AST is always empty.
+        group: ast::Group,
+        /// Whether this group has the `x` flag enabled or not.
+        ignore_whitespace: bool,
+    },
+    /// This state is pushed whenever a new alternation branch is found. If
+    /// an alternation branch is found and this state is at the top of the
+    /// stack, then this state should be modified to include the new
+    /// alternation.
+    Alternation(ast::Alternation),
+}
+
+/// ClassState represents a single stack frame while parsing character classes.
+/// Each frame records the state up to an intersection, difference, symmetric
+/// difference or nested class.
+///
+/// Note that a parser's character class stack is only non-empty when parsing
+/// a character class. In all other cases, it is empty.
+#[derive(Clone, Debug)]
+enum ClassState {
+    /// This state is pushed whenever an opening bracket is found.
+    Open {
+        /// The union of class items immediately preceding this class.
+        union: ast::ClassSetUnion,
+        /// The class that has been opened. Typically this just corresponds
+        /// to the `[`, but it can also include `[^` since `^` indicates
+        /// negation of the class.
+        set: ast::ClassBracketed,
+    },
+    /// This state is pushed when a operator is seen. When popped, the stored
+    /// set becomes the left hand side of the operator.
+    Op {
+        /// The type of the operation, i.e., &&, -- or ~~.
+        kind: ast::ClassSetBinaryOpKind,
+        /// The left-hand side of the operator.
+        lhs: ast::ClassSet,
+    },
+}
+
+impl Parser {
+    /// Create a new parser with a default configuration.
+    ///
+    /// The parser can be run with either the `parse` or `parse_with_comments`
+    /// methods. The parse methods return an abstract syntax tree.
+    ///
+    /// To set configuration options on the parser, use
+    /// [`ParserBuilder`](struct.ParserBuilder.html).
+    pub fn new() -> Parser {
+        ParserBuilder::new().build()
+    }
+
+    /// Parse the regular expression into an abstract syntax tree.
+    pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
+        ParserI::new(self, pattern).parse()
+    }
+
+    /// Parse the regular expression and return an abstract syntax tree with
+    /// all of the comments found in the pattern.
+    pub fn parse_with_comments(
+        &mut self,
+        pattern: &str,
+    ) -> Result<ast::WithComments> {
+        ParserI::new(self, pattern).parse_with_comments()
+    }
+
+    /// Reset the internal state of a parser.
+    ///
+    /// This is called at the beginning of every parse. This prevents the
+    /// parser from running with inconsistent state (say, if a previous
+    /// invocation returned an error and the parser is reused).
+    fn reset(&self) {
+        // These settings should be in line with the construction
+        // in `ParserBuilder::build`.
+        self.pos.set(Position { offset: 0, line: 1, column: 1});
+        self.ignore_whitespace.set(self.initial_ignore_whitespace);
+        self.comments.borrow_mut().clear();
+        self.stack_group.borrow_mut().clear();
+        self.stack_class.borrow_mut().clear();
+    }
+}
+
+impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
+    /// Build an internal parser from a parser configuration and a pattern.
+    fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
+        ParserI { parser: parser, pattern: pattern }
+    }
+
+    /// Return a reference to the parser state.
+    fn parser(&self) -> &Parser {
+        self.parser.borrow()
+    }
+
+    /// Return a reference to the pattern being parsed.
+    fn pattern(&self) -> &str {
+        self.pattern.borrow()
+    }
+
+    /// Create a new error with the given span and error type.
+    fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
+        ast::Error {
+            kind: kind,
+            pattern: self.pattern().to_string(),
+            span: span,
+        }
+    }
+
+    /// Return the current offset of the parser.
+    ///
+    /// The offset starts at `0` from the beginning of the regular expression
+    /// pattern string.
+    fn offset(&self) -> usize {
+        self.parser().pos.get().offset
+    }
+
+    /// Return the current line number of the parser.
+    ///
+    /// The line number starts at `1`.
+    fn line(&self) -> usize {
+        self.parser().pos.get().line
+    }
+
+    /// Return the current column of the parser.
+    ///
+    /// The column number starts at `1` and is reset whenever a `\n` is seen.
+    fn column(&self) -> usize {
+        self.parser().pos.get().column
+    }
+
+    /// Return the next capturing index. Each subsequent call increments the
+    /// internal index.
+    ///
+    /// The span given should correspond to the location of the opening
+    /// parenthesis.
+    ///
+    /// If the capture limit is exceeded, then an error is returned.
+    fn next_capture_index(&self, span: Span) -> Result<u32> {
+        let current = self.parser().capture_index.get();
+        let i = current.checked_add(1).ok_or_else(|| {
+            self.error(span, ast::ErrorKind::CaptureLimitExceeded)
+        })?;
+        self.parser().capture_index.set(i);
+        Ok(i)
+    }
+
+    /// Adds the given capture name to this parser. If this capture name has
+    /// already been used, then an error is returned.
+    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
+        let mut names = self.parser().capture_names.borrow_mut();
+        match names.binary_search_by_key(
+            &cap.name.as_str(),
+            |c| c.name.as_str(),
+        ) {
+            Err(i) => {
+                names.insert(i, cap.clone());
+                Ok(())
+            }
+            Ok(i) => {
+                Err(self.error(cap.span, ast::ErrorKind::GroupNameDuplicate {
+                    original: names[i].span,
+                }))
+            }
+        }
+    }
+
+    /// Return whether the parser should ignore whitespace or not.
+    fn ignore_whitespace(&self) -> bool {
+        self.parser().ignore_whitespace.get()
+    }
+
+    /// Return the character at the current position of the parser.
+    ///
+    /// This panics if the current position does not point to a valid char.
+    fn char(&self) -> char {
+        self.char_at(self.offset())
+    }
+
+    /// Return the character at the given position.
+    ///
+    /// This panics if the given position does not point to a valid char.
+    fn char_at(&self, i: usize) -> char {
+        self.pattern()[i..].chars().next()
+            .unwrap_or_else(|| {
+                panic!("expected char at offset {}", i)
+            })
+    }
+
+    /// Bump the parser to the next Unicode scalar value.
+    ///
+    /// If the end of the input has been reached, then `false` is returned.
+    fn bump(&self) -> bool {
+        if self.is_eof() {
+            return false;
+        }
+        let Position { mut offset, mut line, mut column } = self.pos();
+        if self.char() == '\n' {
+            line = line.checked_add(1).unwrap();
+            column = 1;
+        } else {
+            column = column.checked_add(1).unwrap();
+        }
+        offset += self.char().len_utf8();
+        self.parser().pos.set(Position {
+            offset: offset,
+            line: line,
+            column: column,
+        });
+        self.pattern()[self.offset()..].chars().next().is_some()
+    }
+
+    /// If the substring starting at the current position of the parser has
+    /// the given prefix, then bump the parser to the character immediately
+    /// following the prefix and return true. Otherwise, don't bump the parser
+    /// and return false.
+    fn bump_if(&self, prefix: &str) -> bool {
+        if self.pattern()[self.offset()..].starts_with(prefix) {
+            for _ in 0..prefix.chars().count() {
+                self.bump();
+            }
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Returns true if and only if the parser is positioned at a look-around
+    /// prefix. The conditions under which this returns true must always
+    /// correspond to a regular expression that would otherwise be consider
+    /// invalid.
+    ///
+    /// This should only be called immediately after parsing the opening of
+    /// a group or a set of flags.
+    fn is_lookaround_prefix(&self) -> bool {
+        self.bump_if("?=")
+        || self.bump_if("?!")
+        || self.bump_if("?<=")
+        || self.bump_if("?<!")
+    }
+
+    /// Bump the parser, and if the `x` flag is enabled, bump through any
+    /// subsequent spaces. Return true if and only if the parser is not at
+    /// EOF.
+    fn bump_and_bump_space(&self) -> bool {
+        if !self.bump() {
+            return false;
+        }
+        self.bump_space();
+        !self.is_eof()
+    }
+
+    /// If the `x` flag is enabled (i.e., whitespace insensitivity with
+    /// comments), then this will advance the parser through all whitespace
+    /// and comments to the next non-whitespace non-comment byte.
+    ///
+    /// If the `x` flag is disabled, then this is a no-op.
+    ///
+    /// This should be used selectively throughout the parser where
+    /// arbitrary whitespace is permitted when the `x` flag is enabled. For
+    /// example, `{   5  , 6}` is equivalent to `{5,6}`.
+    fn bump_space(&self) {
+        if !self.ignore_whitespace() {
+            return;
+        }
+        while !self.is_eof() {
+            if self.char().is_whitespace() {
+                self.bump();
+            } else if self.char() == '#' {
+                let start = self.pos();
+                let mut comment_text = String::new();
+                self.bump();
+                while !self.is_eof() {
+                    let c = self.char();
+                    self.bump();
+                    if c == '\n' {
+                        break;
+                    }
+                    comment_text.push(c);
+                }
+                let comment = ast::Comment {
+                    span: Span::new(start, self.pos()),
+                    comment: comment_text,
+                };
+                self.parser().comments.borrow_mut().push(comment);
+            } else {
+                break;
+            }
+        }
+    }
+
+    /// Peek at the next character in the input without advancing the parser.
+    ///
+    /// If the input has been exhausted, then this returns `None`.
+    fn peek(&self) -> Option<char> {
+        if self.is_eof() {
+            return None;
+        }
+        self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
+    }
+
+    /// Like peek, but will ignore spaces when the parser is in whitespace
+    /// insensitive mode.
+    fn peek_space(&self) -> Option<char> {
+        if !self.ignore_whitespace() {
+            return self.peek();
+        }
+        if self.is_eof() {
+            return None;
+        }
+        let mut start = self.offset() + self.char().len_utf8();
+        let mut in_comment = false;
+        for (i, c) in self.pattern()[start..].char_indices() {
+            if c.is_whitespace() {
+                continue;
+            } else if !in_comment && c == '#' {
+                in_comment = true;
+            } else if in_comment && c == '\n' {
+                in_comment = false;
+            } else {
+                start += i;
+                break;
+            }
+        }
+        self.pattern()[start..].chars().next()
+    }
+
+    /// Returns true if the next call to `bump` would return false.
+    fn is_eof(&self) -> bool {
+        self.offset() == self.pattern().len()
+    }
+
+    /// Return the current position of the parser, which includes the offset,
+    /// line and column.
+    fn pos(&self) -> Position {
+        self.parser().pos.get()
+    }
+
+    /// Create a span at the current position of the parser. Both the start
+    /// and end of the span are set.
+    fn span(&self) -> Span {
+        Span::splat(self.pos())
+    }
+
+    /// Create a span that covers the current character.
+    fn span_char(&self) -> Span {
+        let mut next = Position {
+            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
+            line: self.line(),
+            column: self.column().checked_add(1).unwrap(),
+        };
+        if self.char() == '\n' {
+            next.line += 1;
+            next.column = 1;
+        }
+        Span::new(self.pos(), next)
+    }
+
+    /// Parse and push a single alternation on to the parser's internal stack.
+    /// If the top of the stack already has an alternation, then add to that
+    /// instead of pushing a new one.
+    ///
+    /// The concatenation given corresponds to a single alternation branch.
+    /// The concatenation returned starts the next branch and is empty.
+    ///
+    /// This assumes the parser is currently positioned at `|` and will advance
+    /// the parser to the character following `|`.
+    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
+        assert_eq!(self.char(), '|');
+        concat.span.end = self.pos();
+        self.push_or_add_alternation(concat);
+        self.bump();
+        Ok(ast::Concat {
+            span: self.span(),
+            asts: vec![],
+        })
+    }
+
+    /// Pushes or adds the given branch of an alternation to the parser's
+    /// internal stack of state.
+    fn push_or_add_alternation(&self, concat: ast::Concat) {
+        use self::GroupState::*;
+
+        let mut stack = self.parser().stack_group.borrow_mut();
+        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
+            alts.asts.push(concat.into_ast());
+            return;
+        }
+        stack.push(Alternation(ast::Alternation {
+            span: Span::new(concat.span.start, self.pos()),
+            asts: vec![concat.into_ast()],
+        }));
+    }
+
+    /// Parse and push a group AST (and its parent concatenation) on to the
+    /// parser's internal stack. Return a fresh concatenation corresponding
+    /// to the group's sub-AST.
+    ///
+    /// If a set of flags was found (with no group), then the concatenation
+    /// is returned with that set of flags added.
+    ///
+    /// This assumes that the parser is currently positioned on the opening
+    /// parenthesis. It advances the parser to the character at the start
+    /// of the sub-expression (or adjoining expression).
+    ///
+    /// If there was a problem parsing the start of the group, then an error
+    /// is returned.
+    fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
+        assert_eq!(self.char(), '(');
+        match self.parse_group()? {
+            Either::Left(set) => {
+                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
+                if let Some(v) = ignore {
+                    self.parser().ignore_whitespace.set(v);
+                }
+
+                concat.asts.push(Ast::Flags(set));
+                Ok(concat)
+            }
+            Either::Right(group) => {
+                let old_ignore_whitespace = self.ignore_whitespace();
+                let new_ignore_whitespace = group
+                    .flags()
+                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
+                    .unwrap_or(old_ignore_whitespace);
+                self.parser().stack_group.borrow_mut().push(GroupState::Group {
+                    concat: concat,
+                    group: group,
+                    ignore_whitespace: old_ignore_whitespace,
+                });
+                self.parser().ignore_whitespace.set(new_ignore_whitespace);
+                Ok(ast::Concat {
+                    span: self.span(),
+                    asts: vec![],
+                })
+            }
+        }
+    }
+
+    /// Pop a group AST from the parser's internal stack and set the group's
+    /// AST to the given concatenation. Return the concatenation containing
+    /// the group.
+    ///
+    /// This assumes that the parser is currently positioned on the closing
+    /// parenthesis and advances the parser to the character following the `)`.
+    ///
+    /// If no such group could be popped, then an unopened group error is
+    /// returned.
+    fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
+        use self::GroupState::*;
+
+        assert_eq!(self.char(), ')');
+        let mut stack = self.parser().stack_group.borrow_mut();
+        let (mut prior_concat, mut group, ignore_whitespace, alt) =
+            match stack.pop() {
+                Some(Group { concat, group, ignore_whitespace }) => {
+                    (concat, group, ignore_whitespace, None)
+                }
+                Some(Alternation(alt)) => {
+                    match stack.pop() {
+                        Some(Group { concat, group, ignore_whitespace }) => {
+                            (concat, group, ignore_whitespace, Some(alt))
+                        }
+                        None | Some(Alternation(_)) => {
+                            return Err(self.error(
+                                self.span_char(),
+                                ast::ErrorKind::GroupUnopened,
+                            ));
+                        }
+                    }
+                }
+                None => {
+                    return Err(self.error(
+                        self.span_char(),
+                        ast::ErrorKind::GroupUnopened,
+                    ));
+                }
+            };
+        self.parser().ignore_whitespace.set(ignore_whitespace);
+        group_concat.span.end = self.pos();
+        self.bump();
+        group.span.end = self.pos();
+        match alt {
+            Some(mut alt) => {
+                alt.span.end = group_concat.span.end;
+                alt.asts.push(group_concat.into_ast());
+                group.ast = Box::new(alt.into_ast());
+            }
+            None => {
+                group.ast = Box::new(group_concat.into_ast());
+            }
+        }
+        prior_concat.asts.push(Ast::Group(group));
+        Ok(prior_concat)
+    }
+
+    /// Pop the last state from the parser's internal stack, if it exists, and
+    /// add the given concatenation to it. There either must be no state or a
+    /// single alternation item on the stack. Any other scenario produces an
+    /// error.
+    ///
+    /// This assumes that the parser has advanced to the end.
+    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
+        concat.span.end = self.pos();
+        let mut stack = self.parser().stack_group.borrow_mut();
+        let ast = match stack.pop() {
+            None => Ok(concat.into_ast()),
+            Some(GroupState::Alternation(mut alt)) => {
+                alt.span.end = self.pos();
+                alt.asts.push(concat.into_ast());
+                Ok(Ast::Alternation(alt))
+            }
+            Some(GroupState::Group { group, .. }) => {
+                return Err(self.error(
+                    group.span,
+                    ast::ErrorKind::GroupUnclosed,
+                ));
+            }
+        };
+        // If we try to pop again, there should be nothing.
+        match stack.pop() {
+            None => ast,
+            Some(GroupState::Alternation(_)) => {
+                // This unreachable is unfortunate. This case can't happen
+                // because the only way we can be here is if there were two
+                // `GroupState::Alternation`s adjacent in the parser's stack,
+                // which we guarantee to never happen because we never push a
+                // `GroupState::Alternation` if one is already at the top of
+                // the stack.
+                unreachable!()
+            }
+            Some(GroupState::Group { group, .. }) => {
+                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
+            }
+        }
+    }
+
+    /// Parse the opening of a character class and push the current class
+    /// parsing context onto the parser's stack. This assumes that the parser
+    /// is positioned at an opening `[`. The given union should correspond to
+    /// the union of set items built up before seeing the `[`.
+    ///
+    /// If there was a problem parsing the opening of the class, then an error
+    /// is returned. Otherwise, a new union of set items for the class is
+    /// returned (which may be populated with either a `]` or a `-`).
+    fn push_class_open(
+        &self,
+        parent_union: ast::ClassSetUnion,
+    ) -> Result<ast::ClassSetUnion> {
+        assert_eq!(self.char(), '[');
+
+        let (nested_set, nested_union) = self.parse_set_class_open()?;
+        self.parser().stack_class.borrow_mut().push(ClassState::Open {
+            union: parent_union,
+            set: nested_set,
+        });
+        Ok(nested_union)
+    }
+
+    /// Parse the end of a character class set and pop the character class
+    /// parser stack. The union given corresponds to the last union built
+    /// before seeing the closing `]`. The union returned corresponds to the
+    /// parent character class set with the nested class added to it.
+    ///
+    /// This assumes that the parser is positioned at a `]` and will advance
+    /// the parser to the byte immediately following the `]`.
+    ///
+    /// If the stack is empty after popping, then this returns the final
+    /// "top-level" character class AST (where a "top-level" character class
+    /// is one that is not nested inside any other character class).
+    ///
+    /// If there is no corresponding opening bracket on the parser's stack,
+    /// then an error is returned.
+    fn pop_class(
+        &self,
+        nested_union: ast::ClassSetUnion,
+    ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
+        assert_eq!(self.char(), ']');
+
+        let item = ast::ClassSet::Item(nested_union.into_item());
+        let prevset = self.pop_class_op(item);
+        let mut stack = self.parser().stack_class.borrow_mut();
+        match stack.pop() {
+            None => {
+                // We can never observe an empty stack:
+                //
+                // 1) We are guaranteed to start with a non-empty stack since
+                //    the character class parser is only initiated when it sees
+                //    a `[`.
+                // 2) If we ever observe an empty stack while popping after
+                //    seeing a `]`, then we signal the character class parser
+                //    to terminate.
+                panic!("unexpected empty character class stack")
+            },
+            Some(ClassState::Op { .. }) => {
+                // This panic is unfortunate, but this case is impossible
+                // since we already popped the Op state if one exists above.
+                // Namely, every push to the class parser stack is guarded by
+                // whether an existing Op is already on the top of the stack.
+                // If it is, the existing Op is modified. That is, the stack
+                // can never have consecutive Op states.
+                panic!("unexpected ClassState::Op")
+            }
+            Some(ClassState::Open { mut union, mut set }) => {
+                self.bump();
+                set.span.end = self.pos();
+                set.kind = prevset;
+                if stack.is_empty() {
+                    Ok(Either::Right(ast::Class::Bracketed(set)))
+                } else {
+                    union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
+                    Ok(Either::Left(union))
+                }
+            }
+        }
+    }
+
+    /// Return an "unclosed class" error whose span points to the most
+    /// recently opened class.
+    ///
+    /// This should only be called while parsing a character class.
+    fn unclosed_class_error(&self) -> ast::Error {
+        for state in self.parser().stack_class.borrow().iter().rev() {
+            match *state {
+                ClassState::Open { ref set, .. } => {
+                    return self.error(set.span, ast::ErrorKind::ClassUnclosed);
+                }
+                _ => {}
+            }
+        }
+        // We are guaranteed to have a non-empty stack with at least
+        // one open bracket, so we should never get here.
+        panic!("no open character class found")
+    }
+
+    /// Push the current set of class items on to the class parser's stack as
+    /// the left hand side of the given operator.
+    ///
+    /// A fresh set union is returned, which should be used to build the right
+    /// hand side of this operator.
+    fn push_class_op(
+        &self,
+        next_kind: ast::ClassSetBinaryOpKind,
+        next_union: ast::ClassSetUnion,
+    ) -> ast::ClassSetUnion {
+
+        let item = ast::ClassSet::Item(next_union.into_item());
+        let new_lhs = self.pop_class_op(item);
+        self.parser().stack_class.borrow_mut().push(ClassState::Op {
+            kind: next_kind,
+            lhs: new_lhs,
+        });
+        ast::ClassSetUnion { span: self.span(), items: vec![] }
+    }
+
+    /// Pop a character class set from the character class parser stack. If the
+    /// top of the stack is just an item (not an operation), then return the
+    /// given set unchanged. If the top of the stack is an operation, then the
+    /// given set will be used as the rhs of the operation on the top of the
+    /// stack. In that case, the binary operation is returned as a set.
+    fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
+        let mut stack = self.parser().stack_class.borrow_mut();
+        let (kind, lhs) = match stack.pop() {
+            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
+            Some(state @ ClassState::Open { .. }) => {
+                stack.push(state);
+                return rhs;
+            }
+            None => unreachable!(),
+        };
+        let span = Span::new(lhs.span().start, rhs.span().end);
+        ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+            span: span,
+            kind: kind,
+            lhs: Box::new(lhs),
+            rhs: Box::new(rhs),
+        })
+    }
+}
+
+impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
+    /// Parse the regular expression into an abstract syntax tree.
+    fn parse(&self) -> Result<Ast> {
+        self.parse_with_comments().map(|astc| astc.ast)
+    }
+
+    /// Parse the regular expression and return an abstract syntax tree with
+    /// all of the comments found in the pattern.
+    fn parse_with_comments(&self) -> Result<ast::WithComments> {
+        assert_eq!(self.offset(), 0, "parser can only be used once");
+        self.parser().reset();
+        let mut concat = ast::Concat {
+            span: self.span(),
+            asts: vec![],
+        };
+        loop {
+            self.bump_space();
+            if self.is_eof() {
+                break;
+            }
+            match self.char() {
+                '(' => concat = self.push_group(concat)?,
+                ')' => concat = self.pop_group(concat)?,
+                '|' => concat = self.push_alternate(concat)?,
+                '[' => {
+                    let class = self.parse_set_class()?;
+                    concat.asts.push(Ast::Class(class));
+                }
+                '?' => {
+                    concat = self.parse_uncounted_repetition(
+                        concat, ast::RepetitionKind::ZeroOrOne)?;
+                }
+                '*' => {
+                    concat = self.parse_uncounted_repetition(
+                        concat, ast::RepetitionKind::ZeroOrMore)?;
+                }
+                '+' => {
+                    concat = self.parse_uncounted_repetition(
+                        concat, ast::RepetitionKind::OneOrMore)?;
+                }
+                '{' => {
+                    concat = self.parse_counted_repetition(concat)?;
+                }
+                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
+            }
+        }
+        let ast = self.pop_group_end(concat)?;
+        NestLimiter::new(self).check(&ast)?;
+        Ok(ast::WithComments {
+            ast: ast,
+            comments: mem::replace(
+                &mut *self.parser().comments.borrow_mut(),
+                vec![],
+            ),
+        })
+    }
+
+    /// Parses an uncounted repetition operation. An uncounted repetition
+    /// operator includes ?, * and +, but does not include the {m,n} syntax.
+    /// The given `kind` should correspond to the operator observed by the
+    /// caller.
+    ///
+    /// This assumes that the paser is currently positioned at the repetition
+    /// operator and advances the parser to the first character after the
+    /// operator. (Note that the operator may include a single additional `?`,
+    /// which makes the operator ungreedy.)
+    ///
+    /// The caller should include the concatenation that is being built. The
+    /// concatenation returned includes the repetition operator applied to the
+    /// last expression in the given concatenation.
+    fn parse_uncounted_repetition(
+        &self,
+        mut concat: ast::Concat,
+        kind: ast::RepetitionKind,
+    ) -> Result<ast::Concat> {
+        assert!(
+            self.char() == '?' || self.char() == '*' || self.char() == '+');
+        let op_start = self.pos();
+        let ast = match concat.asts.pop() {
+            Some(ast) => ast,
+            None => return Err(self.error(
+                self.span(),
+                ast::ErrorKind::RepetitionMissing,
+            )),
+        };
+        match ast {
+            Ast::Empty(_) | Ast::Flags(_) => return Err(self.error(
+                self.span(),
+                ast::ErrorKind::RepetitionMissing,
+            )),
+            _ => {}
+        }
+        let mut greedy = true;
+        if self.bump() && self.char() == '?' {
+            greedy = false;
+            self.bump();
+        }
+        concat.asts.push(Ast::Repetition(ast::Repetition {
+            span: ast.span().with_end(self.pos()),
+            op: ast::RepetitionOp {
+                span: Span::new(op_start, self.pos()),
+                kind: kind,
+            },
+            greedy: greedy,
+            ast: Box::new(ast),
+        }));
+        Ok(concat)
+    }
+
+    /// Parses a counted repetition operation. A counted repetition operator
+    /// corresponds to the {m,n} syntax, and does not include the ?, * or +
+    /// operators.
+    ///
+    /// This assumes that the paser is currently positioned at the opening `{`
+    /// and advances the parser to the first character after the operator.
+    /// (Note that the operator may include a single additional `?`, which
+    /// makes the operator ungreedy.)
+    ///
+    /// The caller should include the concatenation that is being built. The
+    /// concatenation returned includes the repetition operator applied to the
+    /// last expression in the given concatenation.
+    fn parse_counted_repetition(
+        &self,
+        mut concat: ast::Concat,
+    ) -> Result<ast::Concat> {
+        assert!(self.char() == '{');
+        let start = self.pos();
+        let ast = match concat.asts.pop() {
+            Some(ast) => ast,
+            None => return Err(self.error(
+                self.span(),
+                ast::ErrorKind::RepetitionMissing,
+            )),
+        };
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::RepetitionCountUnclosed,
+            ));
+        }
+        let count_start = self.parse_decimal()?;
+        let mut range = ast::RepetitionRange::Exactly(count_start);
+        if self.is_eof() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::RepetitionCountUnclosed,
+            ));
+        }
+        if self.char() == ',' {
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::RepetitionCountUnclosed,
+                ));
+            }
+            if self.char() != '}' {
+                let count_end = self.parse_decimal()?;
+                range = ast::RepetitionRange::Bounded(count_start, count_end);
+            } else {
+                range = ast::RepetitionRange::AtLeast(count_start);
+            }
+        }
+        if self.is_eof() || self.char() != '}' {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::RepetitionCountUnclosed,
+            ));
+        }
+
+        let mut greedy = true;
+        if self.bump_and_bump_space() && self.char() == '?' {
+            greedy = false;
+            self.bump();
+        }
+
+        let op_span = Span::new(start, self.pos());
+        if !range.is_valid() {
+            return Err(self.error(
+                op_span,
+                ast::ErrorKind::RepetitionCountInvalid,
+            ));
+        }
+        concat.asts.push(Ast::Repetition(ast::Repetition {
+            span: ast.span().with_end(self.pos()),
+            op: ast::RepetitionOp {
+                span: op_span,
+                kind: ast::RepetitionKind::Range(range),
+            },
+            greedy: greedy,
+            ast: Box::new(ast),
+        }));
+        Ok(concat)
+    }
+
+    /// Parse a group (which contains a sub-expression) or a set of flags.
+    ///
+    /// If a group was found, then it is returned with an empty AST. If a set
+    /// of flags is found, then that set is returned.
+    ///
+    /// The parser should be positioned at the opening parenthesis.
+    ///
+    /// This advances the parser to the character before the start of the
+    /// sub-expression (in the case of a group) or to the closing parenthesis
+    /// immediately following the set of flags.
+    ///
+    /// # Errors
+    ///
+    /// If flags are given and incorrectly specified, then a corresponding
+    /// error is returned.
+    ///
+    /// If a capture name is given and it is incorrectly specified, then a
+    /// corresponding error is returned.
+    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
+        assert_eq!(self.char(), '(');
+        let open_span = self.span_char();
+        self.bump();
+        self.bump_space();
+        if self.is_lookaround_prefix() {
+            return Err(self.error(
+                Span::new(open_span.start, self.span().end),
+                ast::ErrorKind::UnsupportedLookAround,
+            ));
+        }
+        let inner_span = self.span();
+        if self.bump_if("?P<") {
+            let capture_index = self.next_capture_index(open_span)?;
+            let cap = self.parse_capture_name(capture_index)?;
+            Ok(Either::Right(ast::Group {
+                span: open_span,
+                kind: ast::GroupKind::CaptureName(cap),
+                ast: Box::new(Ast::Empty(self.span())),
+            }))
+        } else if self.bump_if("?") {
+            if self.is_eof() {
+                return Err(self.error(
+                    open_span,
+                    ast::ErrorKind::GroupUnclosed,
+                ));
+            }
+            let flags = self.parse_flags()?;
+            let char_end = self.char();
+            self.bump();
+            if char_end == ')' {
+                // We don't allow empty flags, e.g., `(?)`. We instead
+                // interpret it as a repetition operator missing its argument.
+                if flags.items.is_empty() {
+                    return Err(self.error(
+                        inner_span,
+                        ast::ErrorKind::RepetitionMissing,
+                    ));
+                }
+                Ok(Either::Left(ast::SetFlags {
+                    span: Span { end: self.pos(), ..open_span },
+                    flags: flags,
+                }))
+            } else {
+                assert_eq!(char_end, ':');
+                Ok(Either::Right(ast::Group {
+                    span: open_span,
+                    kind: ast::GroupKind::NonCapturing(flags),
+                    ast: Box::new(Ast::Empty(self.span())),
+                }))
+            }
+        } else {
+            let capture_index = self.next_capture_index(open_span)?;
+            Ok(Either::Right(ast::Group {
+                span: open_span,
+                kind: ast::GroupKind::CaptureIndex(capture_index),
+                ast: Box::new(Ast::Empty(self.span())),
+            }))
+        }
+    }
+
+    /// Parses a capture group name. Assumes that the parser is positioned at
+    /// the first character in the name following the opening `<` (and may
+    /// possibly be EOF). This advances the parser to the first character
+    /// following the closing `>`.
+    ///
+    /// The caller must provide the capture index of the group for this name.
+    fn parse_capture_name(
+        &self,
+        capture_index: u32,
+    ) -> Result<ast::CaptureName> {
+        if self.is_eof() {
+            return Err(self.error(
+                self.span(),
+                ast::ErrorKind::GroupNameUnexpectedEof,
+            ));
+        }
+        let start = self.pos();
+        loop {
+            if self.char() == '>' {
+                break;
+            }
+            if !is_capture_char(self.char(), self.pos() == start) {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::GroupNameInvalid,
+                ));
+            }
+            if !self.bump() {
+                break;
+            }
+        }
+        let end = self.pos();
+        if self.is_eof() {
+            return Err(self.error(
+                self.span(),
+                ast::ErrorKind::GroupNameUnexpectedEof,
+            ));
+        }
+        assert_eq!(self.char(), '>');
+        self.bump();
+        let name = &self.pattern()[start.offset..end.offset];
+        if name.is_empty() {
+            return Err(self.error(
+                Span::new(start, start),
+                ast::ErrorKind::GroupNameEmpty,
+            ));
+        }
+        let capname = ast::CaptureName {
+            span: Span::new(start, end),
+            name: name.to_string(),
+            index: capture_index,
+        };
+        self.add_capture_name(&capname)?;
+        Ok(capname)
+    }
+
+    /// Parse a sequence of flags starting at the current character.
+    ///
+    /// This advances the parser to the character immediately following the
+    /// flags, which is guaranteed to be either `:` or `)`.
+    ///
+    /// # Errors
+    ///
+    /// If any flags are duplicated, then an error is returned.
+    ///
+    /// If the negation operator is used more than once, then an error is
+    /// returned.
+    ///
+    /// If no flags could be found or if the negation operation is not followed
+    /// by any flags, then an error is returned.
+    fn parse_flags(&self) -> Result<ast::Flags> {
+        let mut flags = ast::Flags {
+            span: self.span(),
+            items: vec![],
+        };
+        let mut last_was_negation = None;
+        while self.char() != ':' && self.char() != ')' {
+            if self.char() == '-' {
+                last_was_negation = Some(self.span_char());
+                let item = ast::FlagsItem {
+                    span: self.span_char(),
+                    kind: ast::FlagsItemKind::Negation,
+                };
+                if let Some(i) = flags.add_item(item) {
+                    return Err(self.error(
+                        self.span_char(),
+                        ast::ErrorKind::FlagRepeatedNegation {
+                            original: flags.items[i].span,
+                        },
+                    ));
+                }
+            } else {
+                last_was_negation = None;
+                let item = ast::FlagsItem {
+                    span: self.span_char(),
+                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
+                };
+                if let Some(i) = flags.add_item(item) {
+                    return Err(self.error(
+                        self.span_char(),
+                        ast::ErrorKind::FlagDuplicate {
+                            original: flags.items[i].span,
+                        },
+                    ));
+                }
+            }
+            if !self.bump() {
+                return Err(self.error(
+                    self.span(),
+                    ast::ErrorKind::FlagUnexpectedEof,
+                ));
+            }
+        }
+        if let Some(span) = last_was_negation {
+            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
+        }
+        flags.span.end = self.pos();
+        Ok(flags)
+    }
+
+    /// Parse the current character as a flag. Do not advance the parser.
+    ///
+    /// # Errors
+    ///
+    /// If the flag is not recognized, then an error is returned.
+    fn parse_flag(&self) -> Result<ast::Flag> {
+        match self.char() {
+            'i' => Ok(ast::Flag::CaseInsensitive),
+            'm' => Ok(ast::Flag::MultiLine),
+            's' => Ok(ast::Flag::DotMatchesNewLine),
+            'U' => Ok(ast::Flag::SwapGreed),
+            'u' => Ok(ast::Flag::Unicode),
+            'x' => Ok(ast::Flag::IgnoreWhitespace),
+            _ => Err(self.error(
+                self.span_char(),
+                ast::ErrorKind::FlagUnrecognized,
+            )),
+        }
+    }
+
+    /// Parse a primitive AST. e.g., A literal, non-set character class or
+    /// assertion.
+    ///
+    /// This assumes that the parser expects a primitive at the current
+    /// location. i.e., All other non-primitive cases have been handled.
+    /// For example, if the parser's position is at `|`, then `|` will be
+    /// treated as a literal (e.g., inside a character class).
+    ///
+    /// This advances the parser to the first character immediately following
+    /// the primitive.
+    fn parse_primitive(&self) -> Result<Primitive> {
+        match self.char() {
+            '\\' => self.parse_escape(),
+            '.' => {
+                let ast = Primitive::Dot(self.span_char());
+                self.bump();
+                Ok(ast)
+            }
+            '^' => {
+                let ast = Primitive::Assertion(ast::Assertion {
+                    span: self.span_char(),
+                    kind: ast::AssertionKind::StartLine,
+                });
+                self.bump();
+                Ok(ast)
+            }
+            '$' => {
+                let ast = Primitive::Assertion(ast::Assertion {
+                    span: self.span_char(),
+                    kind: ast::AssertionKind::EndLine,
+                });
+                self.bump();
+                Ok(ast)
+            }
+            c => {
+                let ast = Primitive::Literal(ast::Literal {
+                    span: self.span_char(),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: c,
+                });
+                self.bump();
+                Ok(ast)
+            }
+        }
+    }
+
+    /// Parse an escape sequence as a primitive AST.
+    ///
+    /// This assumes the parser is positioned at the start of the escape
+    /// sequence, i.e., `\`. It advances the parser to the first position
+    /// immediately following the escape sequence.
+    fn parse_escape(&self) -> Result<Primitive> {
+        assert_eq!(self.char(), '\\');
+        let start = self.pos();
+        if !self.bump() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        let c = self.char();
+        // Put some of the more complicated routines into helpers.
+        match c {
+            '0'...'7' => {
+                if !self.parser().octal {
+                    return Err(self.error(
+                        Span::new(start, self.span_char().end),
+                        ast::ErrorKind::UnsupportedBackreference,
+                    ));
+                }
+                let mut lit = self.parse_octal();
+                lit.span.start = start;
+                return Ok(Primitive::Literal(lit));
+            }
+            '8'...'9' if !self.parser().octal => {
+                return Err(self.error(
+                    Span::new(start, self.span_char().end),
+                    ast::ErrorKind::UnsupportedBackreference,
+                ));
+            }
+            'x' | 'u' | 'U' => {
+                let mut lit = self.parse_hex()?;
+                lit.span.start = start;
+                return Ok(Primitive::Literal(lit));
+            }
+            'p' | 'P' => {
+                let mut cls = self.parse_unicode_class()?;
+                cls.span.start = start;
+                return Ok(Primitive::Unicode(cls));
+            }
+            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
+                let mut cls = self.parse_perl_class();
+                cls.span.start = start;
+                return Ok(Primitive::Perl(cls));
+            }
+            _ => {}
+        }
+
+        // Handle all of the one letter sequences inline.
+        self.bump();
+        let span = Span::new(start, self.pos());
+        if is_meta_character(c) {
+            return Ok(Primitive::Literal(ast::Literal {
+                span: span,
+                kind: ast::LiteralKind::Punctuation,
+                c: c,
+            }));
+        }
+        let special = |kind, c| Ok(Primitive::Literal(ast::Literal {
+            span: span,
+            kind: ast::LiteralKind::Special(kind),
+            c: c,
+        }));
+        match c {
+            'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
+            'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
+            't' => special(ast::SpecialLiteralKind::Tab, '\t'),
+            'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
+            'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
+            'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
+            ' ' if self.ignore_whitespace() => {
+                special(ast::SpecialLiteralKind::Space, ' ')
+            }
+            'A' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::StartText,
+            })),
+            'z' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::EndText,
+            })),
+            'b' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::WordBoundary,
+            })),
+            'B' => Ok(Primitive::Assertion(ast::Assertion {
+                span: span,
+                kind: ast::AssertionKind::NotWordBoundary,
+            })),
+            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
+        }
+    }
+
+    /// Parse an octal representation of a Unicode codepoint up to 3 digits
+    /// long. This expects the parser to be positioned at the first octal
+    /// digit and advances the parser to the first character immediately
+    /// following the octal number. This also assumes that parsing octal
+    /// escapes is enabled.
+    ///
+    /// Assuming the preconditions are met, this routine can never fail.
+    fn parse_octal(&self) -> ast::Literal {
+        use std::char;
+        use std::u32;
+
+        assert!(self.parser().octal);
+        assert!('0' <= self.char() && self.char() <= '7');
+        let start = self.pos();
+        // Parse up to two more digits.
+        while
+            self.bump() &&
+            '0' <= self.char() && self.char() <= '7' &&
+            self.pos().offset - start.offset <= 2
+        {}
+        let end = self.pos();
+        let octal = &self.pattern()[start.offset..end.offset];
+        // Parsing the octal should never fail since the above guarantees a
+        // valid number.
+        let codepoint =
+            u32::from_str_radix(octal, 8).expect("valid octal number");
+        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
+        // invalid Unicode scalar values.
+        let c = char::from_u32(codepoint).expect("Unicode scalar value");
+        ast::Literal {
+            span: Span::new(start, end),
+            kind: ast::LiteralKind::Octal,
+            c: c,
+        }
+    }
+
+    /// Parse a hex representation of a Unicode codepoint. This handles both
+    /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
+    /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
+    /// the first character immediately following the hexadecimal literal.
+    fn parse_hex(&self) -> Result<ast::Literal> {
+        assert!(self.char() == 'x'
+                || self.char() == 'u'
+                || self.char() == 'U');
+
+        let hex_kind = match self.char() {
+            'x' => ast::HexLiteralKind::X,
+            'u' => ast::HexLiteralKind::UnicodeShort,
+            _ => ast::HexLiteralKind::UnicodeLong,
+        };
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                self.span(),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        if self.char() == '{' {
+            self.parse_hex_brace(hex_kind)
+        } else {
+            self.parse_hex_digits(hex_kind)
+        }
+    }
+
+    /// Parse an N-digit hex representation of a Unicode codepoint. This
+    /// expects the parser to be positioned at the first digit and will advance
+    /// the parser to the first character immediately following the escape
+    /// sequence.
+    ///
+    /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
+    /// or 8 (for `\UNNNNNNNN`).
+    fn parse_hex_digits(
+        &self,
+        kind: ast::HexLiteralKind,
+    ) -> Result<ast::Literal> {
+        use std::char;
+        use std::u32;
+
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        let start = self.pos();
+        for i in 0..kind.digits() {
+            if i > 0 && !self.bump_and_bump_space() {
+                return Err(self.error(
+                    self.span(),
+                    ast::ErrorKind::EscapeUnexpectedEof,
+                ));
+            }
+            if !is_hex(self.char()) {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::EscapeHexInvalidDigit,
+                ));
+            }
+            scratch.push(self.char());
+        }
+        // The final bump just moves the parser past the literal, which may
+        // be EOF.
+        self.bump_and_bump_space();
+        let end = self.pos();
+        let hex = scratch.as_str();
+        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
+            None => Err(self.error(
+                Span::new(start, end),
+                ast::ErrorKind::EscapeHexInvalid,
+            )),
+            Some(c) => Ok(ast::Literal {
+                span: Span::new(start, end),
+                kind: ast::LiteralKind::HexFixed(kind),
+                c: c,
+            }),
+        }
+    }
+
+    /// Parse a hex representation of any Unicode scalar value. This expects
+    /// the parser to be positioned at the opening brace `{` and will advance
+    /// the parser to the first character following the closing brace `}`.
+    fn parse_hex_brace(
+        &self,
+        kind: ast::HexLiteralKind,
+    ) -> Result<ast::Literal> {
+        use std::char;
+        use std::u32;
+
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        let brace_pos = self.pos();
+        let start = self.span_char().end;
+        while self.bump_and_bump_space() && self.char() != '}' {
+            if !is_hex(self.char()) {
+                return Err(self.error(
+                    self.span_char(),
+                    ast::ErrorKind::EscapeHexInvalidDigit,
+                ));
+            }
+            scratch.push(self.char());
+        }
+        if self.is_eof() {
+            return Err(self.error(
+                Span::new(brace_pos, self.pos()),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        let end = self.pos();
+        let hex = scratch.as_str();
+        assert_eq!(self.char(), '}');
+        self.bump_and_bump_space();
+
+        if hex.is_empty() {
+            return Err(self.error(
+                Span::new(brace_pos, self.pos()),
+                ast::ErrorKind::EscapeHexEmpty,
+            ));
+        }
+        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
+            None => Err(self.error(
+                Span::new(start, end),
+                ast::ErrorKind::EscapeHexInvalid,
+            )),
+            Some(c) => Ok(ast::Literal {
+                span: Span::new(start, self.pos()),
+                kind: ast::LiteralKind::HexBrace(kind),
+                c: c,
+            }),
+        }
+    }
+
+    /// Parse a decimal number into a u32 while trimming leading and trailing
+    /// whitespace.
+    ///
+    /// This expects the parser to be positioned at the first position where
+    /// a decimal digit could occur. This will advance the parser to the byte
+    /// immediately following the last contiguous decimal digit.
+    ///
+    /// If no decimal digit could be found or if there was a problem parsing
+    /// the complete set of digits into a u32, then an error is returned.
+    fn parse_decimal(&self) -> Result<u32> {
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        while !self.is_eof() && self.char().is_whitespace() {
+            self.bump();
+        }
+        let start = self.pos();
+        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
+            scratch.push(self.char());
+            self.bump_and_bump_space();
+        }
+        let span = Span::new(start, self.pos());
+        while !self.is_eof() && self.char().is_whitespace() {
+            self.bump_and_bump_space();
+        }
+        let digits = scratch.as_str();
+        if digits.is_empty() {
+            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
+        }
+        match u32::from_str_radix(digits, 10).ok() {
+            Some(n) => Ok(n),
+            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
+        }
+    }
+
+    /// Parse a standard character class consisting primarily of characters or
+    /// character ranges, but can also contain nested character classes of
+    /// any type (sans `.`).
+    ///
+    /// This assumes the parser is positioned at the opening `[`. If parsing
+    /// is successful, then the parser is advanced to the position immediately
+    /// following the closing `]`.
+    fn parse_set_class(&self) -> Result<ast::Class> {
+        assert_eq!(self.char(), '[');
+
+        let mut union = ast::ClassSetUnion {
+            span: self.span(),
+            items: vec![],
+        };
+        loop {
+            self.bump_space();
+            if self.is_eof() {
+                return Err(self.unclosed_class_error());
+            }
+            match self.char() {
+                '[' => {
+                    // If we've already parsed the opening bracket, then
+                    // attempt to treat this as the beginning of an ASCII
+                    // class. If ASCII class parsing fails, then the parser
+                    // backs up to `[`.
+                    if !self.parser().stack_class.borrow().is_empty() {
+                        if let Some(cls) = self.maybe_parse_ascii_class() {
+                            union.push(ast::ClassSetItem::Ascii(cls));
+                            continue;
+                        }
+                    }
+                    union = self.push_class_open(union)?;
+                }
+                ']' => {
+                    match self.pop_class(union)? {
+                        Either::Left(nested_union) => { union = nested_union; }
+                        Either::Right(class) => return Ok(class),
+                    }
+                }
+                '&' if self.peek() == Some('&') => {
+                    assert!(self.bump_if("&&"));
+                    union = self.push_class_op(
+                        ast::ClassSetBinaryOpKind::Intersection, union);
+                }
+                '-' if self.peek() == Some('-') => {
+                    assert!(self.bump_if("--"));
+                    union = self.push_class_op(
+                        ast::ClassSetBinaryOpKind::Difference, union);
+                }
+                '~' if self.peek() == Some('~') => {
+                    assert!(self.bump_if("~~"));
+                    union = self.push_class_op(
+                        ast::ClassSetBinaryOpKind::SymmetricDifference, union);
+                }
+                _ => {
+                    union.push(self.parse_set_class_range()?);
+                }
+            }
+        }
+    }
+
+    /// Parse a single primitive item in a character class set. The item to
+    /// be parsed can either be one of a simple literal character, a range
+    /// between two simple literal characters or a "primitive" character
+    /// class like \w or \p{Greek}.
+    ///
+    /// If an invalid escape is found, or if a character class is found where
+    /// a simple literal is expected (e.g., in a range), then an error is
+    /// returned.
+    fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
+        let prim1 = self.parse_set_class_item()?;
+        self.bump_space();
+        if self.is_eof() {
+            return Err(self.unclosed_class_error());
+        }
+        // If the next char isn't a `-`, then we don't have a range.
+        // There are two exceptions. If the char after a `-` is a `]`, then
+        // `-` is interpreted as a literal `-`. Alternatively, if the char
+        // after a `-` is a `-`, then `--` corresponds to a "difference"
+        // operation.
+        if self.char() != '-'
+            || self.peek_space() == Some(']')
+            || self.peek_space() == Some('-')
+        {
+            return prim1.into_class_set_item(self);
+        }
+        // OK, now we're parsing a range, so bump past the `-` and parse the
+        // second half of the range.
+        if !self.bump_and_bump_space() {
+            return Err(self.unclosed_class_error());
+        }
+        let prim2 = self.parse_set_class_item()?;
+        let range = ast::ClassSetRange {
+            span: Span::new(prim1.span().start, prim2.span().end),
+            start: prim1.into_class_literal(self)?,
+            end: prim2.into_class_literal(self)?,
+        };
+        if !range.is_valid() {
+            return Err(self.error(
+                range.span,
+                ast::ErrorKind::ClassRangeInvalid,
+            ));
+        }
+        Ok(ast::ClassSetItem::Range(range))
+    }
+
+    /// Parse a single item in a character class as a primitive, where the
+    /// primitive either consists of a verbatim literal or a single escape
+    /// sequence.
+    ///
+    /// This assumes the parser is positioned at the beginning of a primitive,
+    /// and advances the parser to the first position after the primitive if
+    /// successful.
+    ///
+    /// Note that it is the caller's responsibility to report an error if an
+    /// illegal primitive was parsed.
+    fn parse_set_class_item(&self) -> Result<Primitive> {
+        if self.char() == '\\' {
+            self.parse_escape()
+        } else {
+            let x = Primitive::Literal(ast::Literal {
+                span: self.span_char(),
+                kind: ast::LiteralKind::Verbatim,
+                c: self.char(),
+            });
+            self.bump();
+            Ok(x)
+        }
+    }
+
+    /// Parses the opening of a character class set. This includes the opening
+    /// bracket along with `^` if present to indicate negation. This also
+    /// starts parsing the opening set of unioned items if applicable, since
+    /// there are special rules applied to certain characters in the opening
+    /// of a character class. For example, `[^]]` is the class of all
+    /// characters not equal to `]`. (`]` would need to be escaped in any other
+    /// position.) Similarly for `-`.
+    ///
+    /// In all cases, the op inside the returned `ast::ClassBracketed` is an
+    /// empty union. This empty union should be replaced with the actual item
+    /// when it is popped from the parser's stack.
+    ///
+    /// This assumes the parser is positioned at the opening `[` and advances
+    /// the parser to the first non-special byte of the character class.
+    ///
+    /// An error is returned if EOF is found.
+    fn parse_set_class_open(
+        &self,
+    ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
+        assert_eq!(self.char(), '[');
+        let start = self.pos();
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                Span::new(start, self.pos()),
+                ast::ErrorKind::ClassUnclosed,
+            ));
+        }
+
+        let negated =
+            if self.char() != '^' {
+                false
+            } else {
+                if !self.bump_and_bump_space() {
+                    return Err(self.error(
+                        Span::new(start, self.pos()),
+                        ast::ErrorKind::ClassUnclosed,
+                    ));
+                }
+                true
+            };
+        // Accept any number of `-` as literal `-`.
+        let mut union = ast::ClassSetUnion {
+            span: self.span(),
+            items: vec![],
+        };
+        while self.char() == '-' {
+            union.push(ast::ClassSetItem::Literal(ast::Literal {
+                span: self.span_char(),
+                kind: ast::LiteralKind::Verbatim,
+                c: '-',
+            }));
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::ClassUnclosed,
+                ));
+            }
+        }
+        // If `]` is the *first* char in a set, then interpret it as a literal
+        // `]`. That is, an empty class is impossible to write.
+        if union.items.is_empty() && self.char() == ']' {
+            union.push(ast::ClassSetItem::Literal(ast::Literal {
+                span: self.span_char(),
+                kind: ast::LiteralKind::Verbatim,
+                c: ']',
+            }));
+            if !self.bump_and_bump_space() {
+                return Err(self.error(
+                    Span::new(start, self.pos()),
+                    ast::ErrorKind::ClassUnclosed,
+                ));
+            }
+        }
+        let set = ast::ClassBracketed {
+            span: Span::new(start, self.pos()),
+            negated: negated,
+            kind: ast::ClassSet::union(ast::ClassSetUnion {
+                span: Span::new(union.span.start, union.span.start),
+                items: vec![],
+            }),
+        };
+        Ok((set, union))
+    }
+
+    /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
+    ///
+    /// This assumes the parser is positioned at the opening `[`.
+    ///
+    /// If no valid ASCII character class could be found, then this does not
+    /// advance the parser and `None` is returned. Otherwise, the parser is
+    /// advanced to the first byte following the closing `]` and the
+    /// corresponding ASCII class is returned.
+    fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
+        // ASCII character classes are interesting from a parsing perspective
+        // because parsing cannot fail with any interesting error. For example,
+        // in order to use an ASCII character class, it must be enclosed in
+        // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
+        // of it as "ASCII character characters have the syntax `[:NAME:]`
+        // which can only appear within character brackets." This means that
+        // things like `[[:lower:]A]` are legal constructs.
+        //
+        // However, if one types an incorrect ASCII character class, e.g.,
+        // `[[:loower:]]`, then we treat that as a normal nested character
+        // class containing the characters `:elorw`. One might argue that we
+        // should return an error instead since the repeated colons give away
+        // the intent to write an ASCII class. But what if the user typed
+        // `[[:lower]]` instead? How can we tell that was intended to be an
+        // ASCII class and not just a normal nested class?
+        //
+        // Reasonable people can probably disagree over this, but for better
+        // or worse, we implement semantics that never fails at the expense
+        // of better failure modes.
+        assert_eq!(self.char(), '[');
+        // If parsing fails, then we back up the parser to this starting point.
+        let start = self.pos();
+        let mut negated = false;
+        if !self.bump() || self.char() != ':' {
+            self.parser().pos.set(start);
+            return None;
+        }
+        if !self.bump() {
+            self.parser().pos.set(start);
+            return None;
+        }
+        if self.char() == '^' {
+            negated = true;
+            if !self.bump() {
+                self.parser().pos.set(start);
+                return None;
+            }
+        }
+        let name_start = self.offset();
+        while self.char() != ':' && self.bump() {}
+        if self.is_eof() {
+            self.parser().pos.set(start);
+            return None;
+        }
+        let name = &self.pattern()[name_start..self.offset()];
+        if !self.bump_if(":]") {
+            self.parser().pos.set(start);
+            return None;
+        }
+        let kind = match ast::ClassAsciiKind::from_name(name) {
+            Some(kind) => kind,
+            None => {
+                self.parser().pos.set(start);
+                return None;
+            }
+        };
+        Some(ast::ClassAscii {
+            span: Span::new(start, self.pos()),
+            kind: kind,
+            negated: negated,
+        })
+    }
+
+    /// Parse a Unicode class in either the single character notation, `\pN`
+    /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
+    /// the parser is positioned at the `p` (or `P` for negation) and will
+    /// advance the parser to the character immediately following the class.
+    ///
+    /// Note that this does not check whether the class name is valid or not.
+    fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
+        assert!(self.char() == 'p' || self.char() == 'P');
+
+        let mut scratch = self.parser().scratch.borrow_mut();
+        scratch.clear();
+
+        let negated = self.char() == 'P';
+        if !self.bump_and_bump_space() {
+            return Err(self.error(
+                self.span(),
+                ast::ErrorKind::EscapeUnexpectedEof,
+            ));
+        }
+        let (start, kind) =
+            if self.char() == '{' {
+                let start = self.span_char().end;
+                while self.bump_and_bump_space() && self.char() != '}' {
+                    scratch.push(self.char());
+                }
+                if self.is_eof() {
+                    return Err(self.error(
+                        self.span(),
+                        ast::ErrorKind::EscapeUnexpectedEof,
+                    ));
+                }
+                assert_eq!(self.char(), '}');
+                self.bump();
+
+                let name = scratch.as_str();
+                if let Some(i) = name.find("!=") {
+                    (start, ast::ClassUnicodeKind::NamedValue {
+                        op: ast::ClassUnicodeOpKind::NotEqual,
+                        name: name[..i].to_string(),
+                        value: name[i+2..].to_string(),
+                    })
+                } else if let Some(i) = name.find(':') {
+                    (start, ast::ClassUnicodeKind::NamedValue {
+                        op: ast::ClassUnicodeOpKind::Colon,
+                        name: name[..i].to_string(),
+                        value: name[i+1..].to_string(),
+                    })
+                } else if let Some(i) = name.find('=') {
+                    (start, ast::ClassUnicodeKind::NamedValue {
+                        op: ast::ClassUnicodeOpKind::Equal,
+                        name: name[..i].to_string(),
+                        value: name[i+1..].to_string(),
+                    })
+                } else {
+                    (start, ast::ClassUnicodeKind::Named(name.to_string()))
+                }
+            } else {
+                let start = self.pos();
+                let c = self.char();
+                self.bump_and_bump_space();
+                let kind = ast::ClassUnicodeKind::OneLetter(c);
+                (start, kind)
+            };
+        Ok(ast::ClassUnicode {
+            span: Span::new(start, self.pos()),
+            negated: negated,
+            kind: kind,
+        })
+    }
+
+    /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
+    /// parser is currently at a valid character class name and will be
+    /// advanced to the character immediately following the class.
+    fn parse_perl_class(&self) -> ast::ClassPerl {
+        let c = self.char();
+        let span = self.span_char();
+        self.bump();
+        let (negated, kind) = match c {
+            'd' => (false, ast::ClassPerlKind::Digit),
+            'D' => (true, ast::ClassPerlKind::Digit),
+            's' => (false, ast::ClassPerlKind::Space),
+            'S' => (true, ast::ClassPerlKind::Space),
+            'w' => (false, ast::ClassPerlKind::Word),
+            'W' => (true, ast::ClassPerlKind::Word),
+            c => panic!("expected valid Perl class but got '{}'", c),
+        };
+        ast::ClassPerl { span: span, kind: kind, negated: negated }
+    }
+}
+
+/// A type that traverses a fully parsed Ast and checks whether its depth
+/// exceeds the specified nesting limit. If it does, then an error is returned.
+#[derive(Debug)]
+struct NestLimiter<'p, 's: 'p, P: 'p + 's> {
+    /// The parser that is checking the nest limit.
+    p: &'p ParserI<'s, P>,
+    /// The current depth while walking an Ast.
+    depth: u32,
+}
+
+impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
+    fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
+        NestLimiter { p: p, depth: 0 }
+    }
+
+    fn check(self, ast: &Ast) -> Result<()> {
+        ast::visit(ast, self)
+    }
+
+    fn increment_depth(&mut self, span: &Span) -> Result<()> {
+        let new = self.depth.checked_add(1).ok_or_else(|| self.p.error(
+            span.clone(),
+            ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
+        ))?;
+        let limit = self.p.parser().nest_limit;
+        if new > limit {
+            return Err(self.p.error(
+                span.clone(),
+                ast::ErrorKind::NestLimitExceeded(limit),
+            ));
+        }
+        self.depth = new;
+        Ok(())
+    }
+
+    fn decrement_depth(&mut self) {
+        // Assuming the correctness of the visitor, this should never drop
+        // below 0.
+        self.depth = self.depth.checked_sub(1).unwrap();
+    }
+}
+
+impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
+    type Output = ();
+    type Err = ast::Error;
+
+    fn finish(self) -> Result<()> {
+        Ok(())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+        let span = match *ast {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_)
+            | Ast::Class(ast::Class::Unicode(_))
+            | Ast::Class(ast::Class::Perl(_)) => {
+                // These are all base cases, so we don't increment depth.
+                return Ok(());
+            }
+            Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
+            Ast::Repetition(ref x) => &x.span,
+            Ast::Group(ref x) => &x.span,
+            Ast::Alternation(ref x) => &x.span,
+            Ast::Concat(ref x) => &x.span,
+        };
+        self.increment_depth(span)
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Empty(_)
+            | Ast::Flags(_)
+            | Ast::Literal(_)
+            | Ast::Dot(_)
+            | Ast::Assertion(_)
+            | Ast::Class(ast::Class::Unicode(_))
+            | Ast::Class(ast::Class::Perl(_)) => {
+                // These are all base cases, so we don't decrement depth.
+                Ok(())
+            }
+            Ast::Class(ast::Class::Bracketed(_))
+            | Ast::Repetition(_)
+            | Ast::Group(_)
+            | Ast::Alternation(_)
+            | Ast::Concat(_) => {
+                self.decrement_depth();
+                Ok(())
+            }
+        }
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        let span = match *ast {
+            ast::ClassSetItem::Empty(_)
+            | ast::ClassSetItem::Literal(_)
+            | ast::ClassSetItem::Range(_)
+            | ast::ClassSetItem::Ascii(_)
+            | ast::ClassSetItem::Unicode(_)
+            | ast::ClassSetItem::Perl(_) => {
+                // These are all base cases, so we don't increment depth.
+                return Ok(());
+            }
+            ast::ClassSetItem::Bracketed(ref x) => &x.span,
+            ast::ClassSetItem::Union(ref x) => &x.span,
+        };
+        self.increment_depth(span)
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Empty(_)
+            | ast::ClassSetItem::Literal(_)
+            | ast::ClassSetItem::Range(_)
+            | ast::ClassSetItem::Ascii(_)
+            | ast::ClassSetItem::Unicode(_)
+            | ast::ClassSetItem::Perl(_) => {
+                // These are all base cases, so we don't decrement depth.
+                Ok(())
+            }
+            ast::ClassSetItem::Bracketed(_)
+            | ast::ClassSetItem::Union(_) => {
+                self.decrement_depth();
+                Ok(())
+            }
+        }
+    }
+
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        ast: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        self.increment_depth(&ast.span)
+    }
+
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        self.decrement_depth();
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Range;
+
+    use ast::{self, Ast, Position, Span};
+    use super::{Parser, ParserI, ParserBuilder, Primitive};
+
+    // Our own assert_eq, which has slightly better formatting (but honestly
+    // still kind of crappy).
+    macro_rules! assert_eq {
+        ($left:expr, $right:expr) => ({
+            match (&$left, &$right) {
+                (left_val, right_val) => {
+                    if !(*left_val == *right_val) {
+                        panic!("assertion failed: `(left == right)`\n\n\
+                               left:  `{:?}`\nright: `{:?}`\n\n",
+                               left_val, right_val)
+                    }
+                }
+            }
+        });
+    }
+
+    // We create these errors to compare with real ast::Errors in the tests.
+    // We define equality between TestError and ast::Error to disregard the
+    // pattern string in ast::Error, which is annoying to provide in tests.
+    #[derive(Clone, Debug)]
+    struct TestError {
+        span: Span,
+        kind: ast::ErrorKind,
+    }
+
+    impl PartialEq<ast::Error> for TestError {
+        fn eq(&self, other: &ast::Error) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    impl PartialEq<TestError> for ast::Error {
+        fn eq(&self, other: &TestError) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    fn s(str: &str) -> String {
+        str.to_string()
+    }
+
+    fn parser(pattern: &str) -> ParserI<Parser> {
+        ParserI::new(Parser::new(), pattern)
+    }
+
+    fn parser_octal(pattern: &str) -> ParserI<Parser> {
+        let parser = ParserBuilder::new().octal(true).build();
+        ParserI::new(parser, pattern)
+    }
+
+    fn parser_nest_limit(pattern: &str, nest_limit: u32) -> ParserI<Parser> {
+        let p = ParserBuilder::new().nest_limit(nest_limit).build();
+        ParserI::new(p, pattern)
+    }
+
+    fn parser_ignore_whitespace(pattern: &str) -> ParserI<Parser> {
+        let p = ParserBuilder::new().ignore_whitespace(true).build();
+        ParserI::new(p, pattern)
+    }
+
+    /// Short alias for creating a new span.
+    fn nspan(start: Position, end: Position) -> Span {
+        Span::new(start, end)
+    }
+
+    /// Short alias for creating a new position.
+    fn npos(offset: usize, line: usize, column: usize) -> Position {
+        Position::new(offset, line, column)
+    }
+
+    /// Create a new span from the given offset range. This assumes a single
+    /// line and sets the columns based on the offsets. i.e., This only works
+    /// out of the box for ASCII, which is fine for most tests.
+    fn span(range: Range<usize>) -> Span {
+        let start = Position::new(range.start, 1, range.start + 1);
+        let end = Position::new(range.end, 1, range.end + 1);
+        Span::new(start, end)
+    }
+
+    /// Create a new span for the corresponding byte range in the given string.
+    fn span_range(subject: &str, range: Range<usize>) -> Span {
+        let start = Position {
+            offset: range.start,
+            line: 1 + subject[..range.start].matches('\n').count(),
+            column: 1 + subject[..range.start]
+                .chars()
+                .rev()
+                .position(|c| c == '\n')
+                .unwrap_or(subject[..range.start].chars().count()),
+        };
+        let end = Position {
+            offset: range.end,
+            line: 1 + subject[..range.end].matches('\n').count(),
+            column: 1 + subject[..range.end]
+                .chars()
+                .rev()
+                .position(|c| c == '\n')
+                .unwrap_or(subject[..range.end].chars().count()),
+        };
+        Span::new(start, end)
+    }
+
+    /// Create a verbatim literal starting at the given position.
+    fn lit(c: char, start: usize) -> Ast {
+        lit_with(c, span(start..start + c.len_utf8()))
+    }
+
+    /// Create a punctuation literal starting at the given position.
+    fn punct_lit(c: char, span: Span) -> Ast {
+        Ast::Literal(ast::Literal {
+            span: span,
+            kind: ast::LiteralKind::Punctuation,
+            c: c,
+        })
+    }
+
+    /// Create a verbatim literal with the given span.
+    fn lit_with(c: char, span: Span) -> Ast {
+        Ast::Literal(ast::Literal {
+            span: span,
+            kind: ast::LiteralKind::Verbatim,
+            c: c,
+        })
+    }
+
+    /// Create a concatenation with the given range.
+    fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
+        concat_with(span(range), asts)
+    }
+
+    /// Create a concatenation with the given span.
+    fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
+        Ast::Concat(ast::Concat { span: span, asts: asts })
+    }
+
+    /// Create an alternation with the given span.
+    fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
+        Ast::Alternation(ast::Alternation { span: span(range), asts: asts })
+    }
+
+    /// Create a capturing group with the given span.
+    fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
+        Ast::Group(ast::Group {
+            span: span(range),
+            kind: ast::GroupKind::CaptureIndex(index),
+            ast: Box::new(ast),
+        })
+    }
+
+    /// Create an ast::SetFlags.
+    ///
+    /// The given pattern should be the full pattern string. The range given
+    /// should correspond to the byte offsets where the flag set occurs.
+    ///
+    /// If negated is true, then the set is interpreted as beginning with a
+    /// negation.
+    fn flag_set(
+        pat: &str,
+        range: Range<usize>,
+        flag: ast::Flag,
+        negated: bool,
+    ) -> Ast {
+        let mut items = vec![
+            ast::FlagsItem {
+                span: span_range(pat, (range.end - 2)..(range.end - 1)),
+                kind: ast::FlagsItemKind::Flag(flag),
+            },
+        ];
+        if negated {
+            items.insert(0, ast::FlagsItem {
+                span: span_range(pat, (range.start + 2)..(range.end - 2)),
+                kind: ast::FlagsItemKind::Negation,
+            });
+        }
+        Ast::Flags(ast::SetFlags {
+            span: span_range(pat, range.clone()),
+            flags: ast::Flags {
+                span: span_range(pat, (range.start + 2)..(range.end - 1)),
+                items: items,
+            },
+        })
+    }
+
+    #[test]
+    fn parse_nest_limit() {
+        // A nest limit of 0 still allows some types of regexes.
+        assert_eq!(
+            parser_nest_limit("", 0).parse(),
+            Ok(Ast::Empty(span(0..0))));
+        assert_eq!(
+            parser_nest_limit("a", 0).parse(),
+            Ok(lit('a', 0)));
+
+        // Test repetition operations, which require one level of nesting.
+        assert_eq!(
+            parser_nest_limit("a+", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            });
+        assert_eq!(
+            parser_nest_limit("a+", 1).parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::OneOrMore,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser_nest_limit("(a)+", 1).parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            });
+        assert_eq!(
+            parser_nest_limit("a+*", 1).parse().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            });
+        assert_eq!(
+            parser_nest_limit("a+*", 2).parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..3),
+                op: ast::RepetitionOp {
+                    span: span(2..3),
+                    kind: ast::RepetitionKind::ZeroOrMore,
+                },
+                greedy: true,
+                ast: Box::new(Ast::Repetition(ast::Repetition {
+                    span: span(0..2),
+                    op: ast::RepetitionOp {
+                        span: span(1..2),
+                        kind: ast::RepetitionKind::OneOrMore,
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('a', 0)),
+                })),
+            })));
+
+        // Test concatenations. A concatenation requires one level of nesting.
+        assert_eq!(
+            parser_nest_limit("ab", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            });
+        assert_eq!(
+            parser_nest_limit("ab", 1).parse(),
+            Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)])));
+        assert_eq!(
+            parser_nest_limit("abc", 1).parse(),
+            Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)])));
+
+        // Test alternations. An alternation requires one level of nesting.
+        assert_eq!(
+            parser_nest_limit("a|b", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            });
+        assert_eq!(
+            parser_nest_limit("a|b", 1).parse(),
+            Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)])));
+        assert_eq!(
+            parser_nest_limit("a|b|c", 1).parse(),
+            Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)])));
+
+        // Test character classes. Classes form their own mini-recursive
+        // syntax!
+        assert_eq!(
+            parser_nest_limit("[a]", 0).parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::NestLimitExceeded(0),
+            });
+        assert_eq!(
+            parser_nest_limit("[a]", 1).parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: ast::ClassSet::Item(
+                    ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(1..2),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'a',
+                    })
+                ),
+            }))));
+        assert_eq!(
+            parser_nest_limit("[ab]", 1).parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            });
+        assert_eq!(
+            parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
+            TestError {
+                span: span(3..7),
+                kind: ast::ErrorKind::NestLimitExceeded(2),
+            });
+        assert_eq!(
+            parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
+            TestError {
+                span: span(4..6),
+                kind: ast::ErrorKind::NestLimitExceeded(3),
+            });
+        assert_eq!(
+            parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
+            TestError {
+                span: span(1..5),
+                kind: ast::ErrorKind::NestLimitExceeded(1),
+            });
+        assert_eq!(
+            parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
+            TestError {
+                span: span(4..6),
+                kind: ast::ErrorKind::NestLimitExceeded(2),
+            });
+    }
+
+    #[test]
+    fn parse_comments() {
+        let pat = "(?x)
+# This is comment 1.
+foo # This is comment 2.
+  # This is comment 3.
+bar
+# This is comment 4.";
+        let astc = parser(pat).parse_with_comments().unwrap();
+        assert_eq!(
+            astc.ast,
+            concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                lit_with('f', span_range(pat, 26..27)),
+                lit_with('o', span_range(pat, 27..28)),
+                lit_with('o', span_range(pat, 28..29)),
+                lit_with('b', span_range(pat, 74..75)),
+                lit_with('a', span_range(pat, 75..76)),
+                lit_with('r', span_range(pat, 76..77)),
+            ]));
+        assert_eq!(astc.comments, vec![
+            ast::Comment {
+                span: span_range(pat, 5..26),
+                comment: s(" This is comment 1."),
+            },
+            ast::Comment {
+                span: span_range(pat, 30..51),
+                comment: s(" This is comment 2."),
+            },
+            ast::Comment {
+                span: span_range(pat, 53..74),
+                comment: s(" This is comment 3."),
+            },
+            ast::Comment {
+                span: span_range(pat, 78..98),
+                comment: s(" This is comment 4."),
+            },
+        ]);
+    }
+
+    #[test]
+    fn parse_holistic() {
+        assert_eq!(
+            parser("]").parse(),
+            Ok(lit(']', 0)));
+        assert_eq!(
+            parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
+            Ok(concat(0..36, vec![
+                punct_lit('\\', span(0..2)),
+                punct_lit('.', span(2..4)),
+                punct_lit('+', span(4..6)),
+                punct_lit('*', span(6..8)),
+                punct_lit('?', span(8..10)),
+                punct_lit('(', span(10..12)),
+                punct_lit(')', span(12..14)),
+                punct_lit('|', span(14..16)),
+                punct_lit('[', span(16..18)),
+                punct_lit(']', span(18..20)),
+                punct_lit('{', span(20..22)),
+                punct_lit('}', span(22..24)),
+                punct_lit('^', span(24..26)),
+                punct_lit('$', span(26..28)),
+                punct_lit('#', span(28..30)),
+                punct_lit('&', span(30..32)),
+                punct_lit('-', span(32..34)),
+                punct_lit('~', span(34..36)),
+            ])));
+    }
+
+    #[test]
+    fn parse_ignore_whitespace() {
+        // Test that basic whitespace insensitivity works.
+        let pat = "(?x)a b";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(nspan(npos(0, 1, 1), npos(7, 1, 8)), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+                lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
+            ])));
+
+        // Test that we can toggle whitespace insensitivity.
+        let pat = "(?x)a b(?-x)a b";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(nspan(npos(0, 1, 1), npos(15, 1, 16)), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+                lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
+                flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
+                lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
+                lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
+                lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
+            ])));
+
+        // Test that nesting whitespace insensitive flags works.
+        let pat = "a (?x:a )a ";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..11), vec![
+                lit_with('a', span_range(pat, 0..1)),
+                lit_with(' ', span_range(pat, 1..2)),
+                Ast::Group(ast::Group {
+                    span: span_range(pat, 2..9),
+                    kind: ast::GroupKind::NonCapturing(ast::Flags {
+                        span: span_range(pat, 4..5),
+                        items: vec![
+                            ast::FlagsItem {
+                                span: span_range(pat, 4..5),
+                                kind: ast::FlagsItemKind::Flag(
+                                    ast::Flag::IgnoreWhitespace),
+                            },
+                        ],
+                    }),
+                    ast: Box::new(lit_with('a', span_range(pat, 6..7))),
+                }),
+                lit_with('a', span_range(pat, 9..10)),
+                lit_with(' ', span_range(pat, 10..11)),
+            ])));
+
+        // Test that whitespace after an opening paren is insignificant.
+        let pat = "(?x)( ?P<foo> a )";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Group(ast::Group {
+                    span: span_range(pat, 4..pat.len()),
+                    kind: ast::GroupKind::CaptureName(ast::CaptureName {
+                        span: span_range(pat, 9..12),
+                        name: s("foo"),
+                        index: 1,
+                    }),
+                    ast: Box::new(lit_with('a', span_range(pat, 14..15))),
+                }),
+            ])));
+        let pat = "(?x)(  a )";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Group(ast::Group {
+                    span: span_range(pat, 4..pat.len()),
+                    kind: ast::GroupKind::CaptureIndex(1),
+                    ast: Box::new(lit_with('a', span_range(pat, 7..8))),
+                }),
+            ])));
+        let pat = "(?x)(  ?:  a )";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Group(ast::Group {
+                    span: span_range(pat, 4..pat.len()),
+                    kind: ast::GroupKind::NonCapturing(ast::Flags {
+                        span: span_range(pat, 8..8),
+                        items: vec![],
+                    }),
+                    ast: Box::new(lit_with('a', span_range(pat, 11..12))),
+                }),
+            ])));
+        let pat = r"(?x)\x { 53 }";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Literal(ast::Literal {
+                    span: span(4..13),
+                    kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                    c: 'S',
+                }),
+            ])));
+
+        // Test that whitespace after an escape is OK.
+        let pat = r"(?x)\ ";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
+                Ast::Literal(ast::Literal {
+                    span: span_range(pat, 4..6),
+                    kind: ast::LiteralKind::Special(
+                        ast::SpecialLiteralKind::Space),
+                    c: ' ',
+                }),
+            ])));
+        // ... but only when `x` mode is enabled.
+        let pat = r"\ ";
+        assert_eq!(
+            parser(pat).parse().unwrap_err(),
+            TestError {
+                span: span_range(pat, 0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            });
+    }
+
+    #[test]
+    fn parse_newlines() {
+        let pat = ".\n.";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..3), vec![
+                Ast::Dot(span_range(pat, 0..1)),
+                lit_with('\n', span_range(pat, 1..2)),
+                Ast::Dot(span_range(pat, 2..3)),
+            ])));
+
+        let pat = "foobar\nbaz\nquux\n";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(concat_with(span_range(pat, 0..pat.len()), vec![
+                lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
+                lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
+                lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
+                lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
+                lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
+                lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
+                lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
+                lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
+                lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
+                lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
+                lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
+                lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
+                lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
+                lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
+                lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
+                lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
+            ])));
+    }
+
+    #[test]
+    fn parse_uncounted_repetition() {
+        assert_eq!(
+            parser(r"a*").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::ZeroOrMore,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a+").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::OneOrMore,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+
+        assert_eq!(
+            parser(r"a?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a??").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..3),
+                op: ast::RepetitionOp {
+                    span: span(1..3),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: false,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..2),
+                op: ast::RepetitionOp {
+                    span: span(1..2),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a?b").parse(),
+            Ok(concat(0..3, vec![
+                Ast::Repetition(ast::Repetition {
+                    span: span(0..2),
+                    op: ast::RepetitionOp {
+                        span: span(1..2),
+                        kind: ast::RepetitionKind::ZeroOrOne,
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('a', 0)),
+                }),
+                lit('b', 2),
+            ])));
+        assert_eq!(
+            parser(r"a??b").parse(),
+            Ok(concat(0..4, vec![
+                Ast::Repetition(ast::Repetition {
+                    span: span(0..3),
+                    op: ast::RepetitionOp {
+                        span: span(1..3),
+                        kind: ast::RepetitionKind::ZeroOrOne,
+                    },
+                    greedy: false,
+                    ast: Box::new(lit('a', 0)),
+                }),
+                lit('b', 3),
+            ])));
+        assert_eq!(
+            parser(r"ab?").parse(),
+            Ok(concat(0..3, vec![
+                lit('a', 0),
+                Ast::Repetition(ast::Repetition {
+                    span: span(1..3),
+                    op: ast::RepetitionOp {
+                        span: span(2..3),
+                        kind: ast::RepetitionKind::ZeroOrOne,
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('b', 1)),
+                }),
+            ])));
+        assert_eq!(
+            parser(r"(ab)?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..5),
+                op: ast::RepetitionOp {
+                    span: span(4..5),
+                    kind: ast::RepetitionKind::ZeroOrOne,
+                },
+                greedy: true,
+                ast: Box::new(group(0..4, 1, concat(1..3, vec![
+                    lit('a', 1),
+                    lit('b', 2),
+                ]))),
+            })));
+        assert_eq!(
+            parser(r"|a?").parse(),
+            Ok(alt(0..3, vec![
+                Ast::Empty(span(0..0)),
+                Ast::Repetition(ast::Repetition {
+                    span: span(1..3),
+                    op: ast::RepetitionOp {
+                        span: span(2..3),
+                        kind: ast::RepetitionKind::ZeroOrOne,
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('a', 1)),
+                }),
+            ])));
+
+        assert_eq!(
+            parser(r"*").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"(?i)*").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"(*)").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"(?:?)").parse().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"+").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"?").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"(?)").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"|*").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"|+").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"|?").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+    }
+
+    #[test]
+    fn parse_counted_repetition() {
+        assert_eq!(
+            parser(r"a{5}").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..4),
+                op: ast::RepetitionOp {
+                    span: span(1..4),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Exactly(5)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a{5,}").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..5),
+                op: ast::RepetitionOp {
+                    span: span(1..5),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::AtLeast(5)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a{5,9}").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..6),
+                op: ast::RepetitionOp {
+                    span: span(1..6),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a{5}?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..5),
+                op: ast::RepetitionOp {
+                    span: span(1..5),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Exactly(5)),
+                },
+                greedy: false,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"ab{5}").parse(),
+            Ok(concat(0..5, vec![
+                lit('a', 0),
+                Ast::Repetition(ast::Repetition {
+                    span: span(1..5),
+                    op: ast::RepetitionOp {
+                        span: span(2..5),
+                        kind: ast::RepetitionKind::Range(
+                            ast::RepetitionRange::Exactly(5)),
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('b', 1)),
+                }),
+            ])));
+        assert_eq!(
+            parser(r"ab{5}c").parse(),
+            Ok(concat(0..6, vec![
+                lit('a', 0),
+                Ast::Repetition(ast::Repetition {
+                    span: span(1..5),
+                    op: ast::RepetitionOp {
+                        span: span(2..5),
+                        kind: ast::RepetitionKind::Range(
+                            ast::RepetitionRange::Exactly(5)),
+                    },
+                    greedy: true,
+                    ast: Box::new(lit('b', 1)),
+                }),
+                lit('c', 5),
+            ])));
+
+        assert_eq!(
+            parser(r"a{ 5 }").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..6),
+                op: ast::RepetitionOp {
+                    span: span(1..6),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Exactly(5)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser(r"a{ 5 , 9 }").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..10),
+                op: ast::RepetitionOp {
+                    span: span(1..10),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)),
+                },
+                greedy: true,
+                ast: Box::new(lit('a', 0)),
+            })));
+        assert_eq!(
+            parser_ignore_whitespace(r"a{5,9} ?").parse(),
+            Ok(Ast::Repetition(ast::Repetition {
+                span: span(0..8),
+                op: ast::RepetitionOp {
+                    span: span(1..8),
+                    kind: ast::RepetitionKind::Range(
+                        ast::RepetitionRange::Bounded(5, 9)),
+                },
+                greedy: false,
+                ast: Box::new(lit('a', 0)),
+            })));
+
+        assert_eq!(
+            parser(r"a{").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            });
+        assert_eq!(
+            parser(r"a{}").parse().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser(r"a{a").parse().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser(r"a{9999999999}").parse().unwrap_err(),
+            TestError {
+                span: span(2..12),
+                kind: ast::ErrorKind::DecimalInvalid,
+            });
+        assert_eq!(
+            parser(r"a{9").parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            });
+        assert_eq!(
+            parser(r"a{9,a").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser(r"a{9,9999999999}").parse().unwrap_err(),
+            TestError {
+                span: span(4..14),
+                kind: ast::ErrorKind::DecimalInvalid,
+            });
+        assert_eq!(
+            parser(r"a{9,").parse().unwrap_err(),
+            TestError {
+                span: span(1..4),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            });
+        assert_eq!(
+            parser(r"a{9,11").parse().unwrap_err(),
+            TestError {
+                span: span(1..6),
+                kind: ast::ErrorKind::RepetitionCountUnclosed,
+            });
+        assert_eq!(
+            parser(r"a{2,1}").parse().unwrap_err(),
+            TestError {
+                span: span(1..6),
+                kind: ast::ErrorKind::RepetitionCountInvalid,
+            });
+        assert_eq!(
+            parser(r"{5}").parse().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+        assert_eq!(
+            parser(r"|{5}").parse().unwrap_err(),
+            TestError {
+                span: span(1..1),
+                kind: ast::ErrorKind::RepetitionMissing,
+            });
+    }
+
+    #[test]
+    fn parse_alternate() {
+        assert_eq!(
+            parser(r"a|b").parse(),
+            Ok(Ast::Alternation(ast::Alternation {
+                span: span(0..3),
+                asts: vec![lit('a', 0), lit('b', 2)],
+            })));
+        assert_eq!(
+            parser(r"(a|b)").parse(),
+            Ok(group(0..5, 1, Ast::Alternation(ast::Alternation {
+                span: span(1..4),
+                asts: vec![lit('a', 1), lit('b', 3)],
+            }))));
+
+        assert_eq!(
+            parser(r"a|b|c").parse(),
+            Ok(Ast::Alternation(ast::Alternation {
+                span: span(0..5),
+                asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
+            })));
+        assert_eq!(
+            parser(r"ax|by|cz").parse(),
+            Ok(Ast::Alternation(ast::Alternation {
+                span: span(0..8),
+                asts: vec![
+                    concat(0..2, vec![lit('a', 0), lit('x', 1)]),
+                    concat(3..5, vec![lit('b', 3), lit('y', 4)]),
+                    concat(6..8, vec![lit('c', 6), lit('z', 7)]),
+                ],
+            })));
+        assert_eq!(
+            parser(r"(ax|by|cz)").parse(),
+            Ok(group(0..10, 1, Ast::Alternation(ast::Alternation {
+                span: span(1..9),
+                asts: vec![
+                    concat(1..3, vec![lit('a', 1), lit('x', 2)]),
+                    concat(4..6, vec![lit('b', 4), lit('y', 5)]),
+                    concat(7..9, vec![lit('c', 7), lit('z', 8)]),
+                ],
+            }))));
+        assert_eq!(
+            parser(r"(ax|(by|(cz)))").parse(),
+            Ok(group(0..14, 1, alt(1..13, vec![
+                concat(1..3, vec![lit('a', 1), lit('x', 2)]),
+                group(4..13, 2, alt(5..12, vec![
+                    concat(5..7, vec![lit('b', 5), lit('y', 6)]),
+                    group(8..12, 3, concat(9..11, vec![
+                        lit('c', 9),
+                        lit('z', 10),
+                    ])),
+                ])),
+            ]))));
+
+        assert_eq!(
+            parser(r"|").parse(), Ok(alt(0..1, vec![
+                Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),
+            ])));
+        assert_eq!(
+            parser(r"||").parse(), Ok(alt(0..2, vec![
+                Ast::Empty(span(0..0)),
+                Ast::Empty(span(1..1)),
+                Ast::Empty(span(2..2)),
+            ])));
+        assert_eq!(
+            parser(r"a|").parse(), Ok(alt(0..2, vec![
+                lit('a', 0), Ast::Empty(span(2..2)),
+            ])));
+        assert_eq!(
+            parser(r"|a").parse(), Ok(alt(0..2, vec![
+                Ast::Empty(span(0..0)), lit('a', 1),
+            ])));
+
+        assert_eq!(
+            parser(r"(|)").parse(), Ok(group(0..3, 1, alt(1..2, vec![
+                Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),
+            ]))));
+        assert_eq!(
+            parser(r"(a|)").parse(), Ok(group(0..4, 1, alt(1..3, vec![
+                lit('a', 1), Ast::Empty(span(3..3)),
+            ]))));
+        assert_eq!(
+            parser(r"(|a)").parse(), Ok(group(0..4, 1, alt(1..3, vec![
+                Ast::Empty(span(1..1)), lit('a', 2),
+            ]))));
+
+        assert_eq!(
+            parser(r"a|b)").parse().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::GroupUnopened,
+            });
+        assert_eq!(
+            parser(r"(a|b").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+    }
+
+    #[test]
+    fn parse_unsupported_lookaround() {
+        assert_eq!(
+            parser(r"(?=a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            });
+        assert_eq!(
+            parser(r"(?!a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            });
+        assert_eq!(
+            parser(r"(?<=a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..4),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            });
+        assert_eq!(
+            parser(r"(?<!a)").parse().unwrap_err(),
+            TestError {
+                span: span(0..4),
+                kind: ast::ErrorKind::UnsupportedLookAround,
+            });
+    }
+
+    #[test]
+    fn parse_group() {
+        assert_eq!(parser("(?i)").parse(), Ok(Ast::Flags(ast::SetFlags {
+            span: span(0..4),
+            flags: ast::Flags {
+                span: span(2..3),
+                items: vec![ast::FlagsItem {
+                    span: span(2..3),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                }],
+            },
+        })));
+        assert_eq!(parser("(?iU)").parse(), Ok(Ast::Flags(ast::SetFlags {
+            span: span(0..5),
+            flags: ast::Flags {
+                span: span(2..4),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive),
+                    },
+                    ast::FlagsItem {
+                        span: span(3..4),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                    },
+                ],
+            },
+        })));
+        assert_eq!(parser("(?i-U)").parse(), Ok(Ast::Flags(ast::SetFlags {
+            span: span(0..6),
+            flags: ast::Flags {
+                span: span(2..5),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive),
+                    },
+                    ast::FlagsItem {
+                        span: span(3..4),
+                        kind: ast::FlagsItemKind::Negation,
+                    },
+                    ast::FlagsItem {
+                        span: span(4..5),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                    },
+                ],
+            },
+        })));
+
+        assert_eq!(parser("()").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..2),
+            kind: ast::GroupKind::CaptureIndex(1),
+            ast: Box::new(Ast::Empty(span(1..1))),
+        })));
+        assert_eq!(parser("(a)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..3),
+            kind: ast::GroupKind::CaptureIndex(1),
+            ast: Box::new(lit('a', 1)),
+        })));
+        assert_eq!(parser("(())").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..4),
+            kind: ast::GroupKind::CaptureIndex(1),
+            ast: Box::new(Ast::Group(ast::Group {
+                span: span(1..3),
+                kind: ast::GroupKind::CaptureIndex(2),
+                ast: Box::new(Ast::Empty(span(2..2))),
+            })),
+        })));
+
+        assert_eq!(parser("(?:a)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..5),
+            kind: ast::GroupKind::NonCapturing(ast::Flags {
+                span: span(2..2),
+                items: vec![],
+            }),
+            ast: Box::new(lit('a', 3)),
+        })));
+
+        assert_eq!(parser("(?i:a)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..6),
+            kind: ast::GroupKind::NonCapturing(ast::Flags {
+                span: span(2..3),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive),
+                    },
+                ],
+            }),
+            ast: Box::new(lit('a', 4)),
+        })));
+        assert_eq!(parser("(?i-U:a)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..8),
+            kind: ast::GroupKind::NonCapturing(ast::Flags {
+                span: span(2..5),
+                items: vec![
+                    ast::FlagsItem {
+                        span: span(2..3),
+                        kind: ast::FlagsItemKind::Flag(
+                            ast::Flag::CaseInsensitive),
+                    },
+                    ast::FlagsItem {
+                        span: span(3..4),
+                        kind: ast::FlagsItemKind::Negation,
+                    },
+                    ast::FlagsItem {
+                        span: span(4..5),
+                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                    },
+                ],
+            }),
+            ast: Box::new(lit('a', 6)),
+        })));
+
+        assert_eq!(
+            parser("(").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+        assert_eq!(
+            parser("(?").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+        assert_eq!(
+            parser("(?P").parse().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            });
+        assert_eq!(
+            parser("(?P<").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            });
+        assert_eq!(
+            parser("(a").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+        assert_eq!(
+            parser("(()").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnclosed,
+            });
+        assert_eq!(
+            parser(")").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::GroupUnopened,
+            });
+        assert_eq!(
+            parser("a)").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::GroupUnopened,
+            });
+    }
+
+    #[test]
+    fn parse_capture_name() {
+        assert_eq!(parser("(?P<a>z)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..8),
+            kind: ast::GroupKind::CaptureName(ast::CaptureName {
+                span: span(4..5),
+                name: s("a"),
+                index: 1,
+            }),
+            ast: Box::new(lit('z', 6)),
+        })));
+        assert_eq!(parser("(?P<abc>z)").parse(), Ok(Ast::Group(ast::Group {
+            span: span(0..10),
+            kind: ast::GroupKind::CaptureName(ast::CaptureName {
+                span: span(4..7),
+                name: s("abc"),
+                index: 1,
+            }),
+            ast: Box::new(lit('z', 8)),
+        })));
+
+        assert_eq!(
+            parser("(?P<").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            });
+        assert_eq!(
+            parser("(?P<>z)").parse().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::GroupNameEmpty,
+            });
+        assert_eq!(
+            parser("(?P<a").parse().unwrap_err(),
+            TestError {
+                span: span(5..5),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            });
+        assert_eq!(
+            parser("(?P<ab").parse().unwrap_err(),
+            TestError {
+                span: span(6..6),
+                kind: ast::ErrorKind::GroupNameUnexpectedEof,
+            });
+        assert_eq!(
+            parser("(?P<0a").parse().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            });
+        assert_eq!(
+            parser("(?P<~").parse().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            });
+        assert_eq!(
+            parser("(?P<abc~").parse().unwrap_err(),
+            TestError {
+                span: span(7..8),
+                kind: ast::ErrorKind::GroupNameInvalid,
+            });
+        assert_eq!(
+            parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
+            TestError {
+                span: span(12..13),
+                kind: ast::ErrorKind::GroupNameDuplicate {
+                    original: span(4..5),
+                },
+            });
+    }
+
+    #[test]
+    fn parse_flags() {
+        assert_eq!(parser("i:").parse_flags(), Ok(ast::Flags {
+            span: span(0..1),
+            items: vec![ast::FlagsItem {
+                span: span(0..1),
+                kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+            }],
+        }));
+        assert_eq!(parser("i)").parse_flags(), Ok(ast::Flags {
+            span: span(0..1),
+            items: vec![ast::FlagsItem {
+                span: span(0..1),
+                kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+            }],
+        }));
+
+        assert_eq!(parser("isU:").parse_flags(), Ok(ast::Flags {
+            span: span(0..3),
+            items: vec![
+                ast::FlagsItem {
+                    span: span(0..1),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                },
+                ast::FlagsItem {
+                    span: span(1..2),
+                    kind: ast::FlagsItemKind::Flag(
+                        ast::Flag::DotMatchesNewLine),
+                },
+                ast::FlagsItem {
+                    span: span(2..3),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                },
+            ],
+        }));
+
+        assert_eq!(parser("-isU:").parse_flags(), Ok(ast::Flags {
+            span: span(0..4),
+            items: vec![
+                ast::FlagsItem {
+                    span: span(0..1),
+                    kind: ast::FlagsItemKind::Negation,
+                },
+                ast::FlagsItem {
+                    span: span(1..2),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                },
+                ast::FlagsItem {
+                    span: span(2..3),
+                    kind: ast::FlagsItemKind::Flag(
+                        ast::Flag::DotMatchesNewLine),
+                },
+                ast::FlagsItem {
+                    span: span(3..4),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                },
+            ],
+        }));
+        assert_eq!(parser("i-sU:").parse_flags(), Ok(ast::Flags {
+            span: span(0..4),
+            items: vec![
+                ast::FlagsItem {
+                    span: span(0..1),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
+                },
+                ast::FlagsItem {
+                    span: span(1..2),
+                    kind: ast::FlagsItemKind::Negation,
+                },
+                ast::FlagsItem {
+                    span: span(2..3),
+                    kind: ast::FlagsItemKind::Flag(
+                        ast::Flag::DotMatchesNewLine),
+                },
+                ast::FlagsItem {
+                    span: span(3..4),
+                    kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
+                },
+            ],
+        }));
+
+        assert_eq!(
+            parser("isU").parse_flags().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::FlagUnexpectedEof,
+            });
+        assert_eq!(
+            parser("isUa:").parse_flags().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            });
+        assert_eq!(
+            parser("isUi:").parse_flags().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::FlagDuplicate {
+                    original: span(0..1),
+                },
+            });
+        assert_eq!(
+            parser("i-sU-i:").parse_flags().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::FlagRepeatedNegation {
+                    original: span(1..2),
+                },
+            });
+        assert_eq!(
+            parser("-)").parse_flags().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::FlagDanglingNegation,
+            });
+        assert_eq!(
+            parser("i-)").parse_flags().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::FlagDanglingNegation,
+            });
+        assert_eq!(
+            parser("iU-)").parse_flags().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::FlagDanglingNegation,
+            });
+    }
+
+    #[test]
+    fn parse_flag() {
+        assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
+        assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
+        assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
+        assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
+        assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
+        assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
+
+        assert_eq!(
+            parser("a").parse_flag().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            });
+        assert_eq!(
+            parser("☃").parse_flag().unwrap_err(),
+            TestError {
+                span: span_range("☃", 0..3),
+                kind: ast::ErrorKind::FlagUnrecognized,
+            });
+    }
+
+    #[test]
+    fn parse_primitive_non_escape() {
+        assert_eq!(
+            parser(r".").parse_primitive(),
+            Ok(Primitive::Dot(span(0..1))));
+        assert_eq!(
+            parser(r"^").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..1),
+                kind: ast::AssertionKind::StartLine,
+            })));
+        assert_eq!(
+            parser(r"$").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..1),
+                kind: ast::AssertionKind::EndLine,
+            })));
+
+        assert_eq!(
+            parser(r"a").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..1),
+                kind: ast::LiteralKind::Verbatim,
+                c: 'a',
+            })));
+        assert_eq!(
+            parser(r"|").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..1),
+                kind: ast::LiteralKind::Verbatim,
+                c: '|',
+            })));
+        assert_eq!(
+            parser(r"☃").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span_range("☃", 0..3),
+                kind: ast::LiteralKind::Verbatim,
+                c: '☃',
+            })));
+    }
+
+    #[test]
+    fn parse_escape() {
+        assert_eq!(
+            parser(r"\|").parse_primitive(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..2),
+                kind: ast::LiteralKind::Punctuation,
+                c: '|',
+            })));
+        let specials = &[
+            (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
+            (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
+            (r"\t", '\t', ast::SpecialLiteralKind::Tab),
+            (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
+            (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
+            (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
+        ];
+        for &(pat, c, ref kind) in specials {
+            assert_eq!(
+                parser(pat).parse_primitive(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..2),
+                    kind: ast::LiteralKind::Special(kind.clone()),
+                    c: c,
+                })));
+        }
+        assert_eq!(
+            parser(r"\A").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::StartText,
+            })));
+        assert_eq!(
+            parser(r"\z").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::EndText,
+            })));
+        assert_eq!(
+            parser(r"\b").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::WordBoundary,
+            })));
+        assert_eq!(
+            parser(r"\B").parse_primitive(),
+            Ok(Primitive::Assertion(ast::Assertion {
+                span: span(0..2),
+                kind: ast::AssertionKind::NotWordBoundary,
+            })));
+
+        assert_eq!(
+            parser(r"\").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\y").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            });
+    }
+
+    #[test]
+    fn parse_unsupported_backreference() {
+        assert_eq!(
+            parser(r"\0").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::UnsupportedBackreference,
+            });
+        assert_eq!(
+            parser(r"\9").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::UnsupportedBackreference,
+            });
+    }
+
+    #[test]
+    fn parse_octal() {
+        for i in 0..511 {
+            let pat = format!(r"\{:o}", i);
+            assert_eq!(
+                parser_octal(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::Octal,
+                    c: ::std::char::from_u32(i).unwrap(),
+                })));
+        }
+        assert_eq!(
+            parser_octal(r"\778").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..3),
+                kind: ast::LiteralKind::Octal,
+                c: '?',
+            })));
+        assert_eq!(
+            parser_octal(r"\7777").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..4),
+                kind: ast::LiteralKind::Octal,
+                c: '\u{01FF}',
+            })));
+        assert_eq!(
+            parser_octal(r"\778").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..4),
+                asts: vec![
+                    Ast::Literal(ast::Literal {
+                        span: span(0..3),
+                        kind: ast::LiteralKind::Octal,
+                        c: '?',
+                    }),
+                    Ast::Literal(ast::Literal {
+                        span: span(3..4),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '8',
+                    }),
+                ],
+            })));
+        assert_eq!(
+            parser_octal(r"\7777").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..5),
+                asts: vec![
+                    Ast::Literal(ast::Literal {
+                        span: span(0..4),
+                        kind: ast::LiteralKind::Octal,
+                        c: '\u{01FF}',
+                    }),
+                    Ast::Literal(ast::Literal {
+                        span: span(4..5),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '7',
+                    }),
+                ],
+            })));
+
+        assert_eq!(
+            parser_octal(r"\8").parse_escape().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::EscapeUnrecognized,
+            });
+    }
+
+    #[test]
+    fn parse_hex_two() {
+        for i in 0..256 {
+            let pat = format!(r"\x{:02x}", i);
+            assert_eq!(
+                parser(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
+                    c: ::std::char::from_u32(i).unwrap(),
+                })));
+        }
+
+        assert_eq!(
+            parser(r"\xF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\xG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\xFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+    }
+
+    #[test]
+    fn parse_hex_four() {
+        for i in 0..65536 {
+            let c = match ::std::char::from_u32(i) {
+                None => continue,
+                Some(c) => c,
+            };
+            let pat = format!(r"\u{:04x}", i);
+            assert_eq!(
+                parser(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::HexFixed(
+                        ast::HexLiteralKind::UnicodeShort),
+                    c: c,
+                })));
+        }
+
+        assert_eq!(
+            parser(r"\uF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\uG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\uFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\uFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\uFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(5..6),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\uD800").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..6),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            });
+    }
+
+    #[test]
+    fn parse_hex_eight() {
+        for i in 0..65536 {
+            let c = match ::std::char::from_u32(i) {
+                None => continue,
+                Some(c) => c,
+            };
+            let pat = format!(r"\U{:08x}", i);
+            assert_eq!(
+                parser(&pat).parse_escape(),
+                Ok(Primitive::Literal(ast::Literal {
+                    span: span(0..pat.len()),
+                    kind: ast::LiteralKind::HexFixed(
+                        ast::HexLiteralKind::UnicodeLong),
+                    c: c,
+                })));
+        }
+
+        assert_eq!(
+            parser(r"\UF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\UG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..4),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(5..6),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(6..7),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(7..8),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(8..9),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
+            TestError {
+                span: span(9..10),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+    }
+
+    #[test]
+    fn parse_hex_brace() {
+        assert_eq!(
+            parser(r"\u{26c4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(
+                    ast::HexLiteralKind::UnicodeShort),
+                c: '⛄',
+            })));
+        assert_eq!(
+            parser(r"\U{26c4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(
+                    ast::HexLiteralKind::UnicodeLong),
+                c: '⛄',
+            })));
+        assert_eq!(
+            parser(r"\x{26c4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                c: '⛄',
+            })));
+        assert_eq!(
+            parser(r"\x{26C4}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..8),
+                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                c: '⛄',
+            })));
+        assert_eq!(
+            parser(r"\x{10fFfF}").parse_escape(),
+            Ok(Primitive::Literal(ast::Literal {
+                span: span(0..10),
+                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
+                c: '\u{10FFFF}',
+            })));
+
+        assert_eq!(
+            parser(r"\x").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\x{").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\x{FF").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..5),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\x{}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..4),
+                kind: ast::ErrorKind::EscapeHexEmpty,
+            });
+        assert_eq!(
+            parser(r"\x{FGF}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..5),
+                kind: ast::ErrorKind::EscapeHexInvalidDigit,
+            });
+        assert_eq!(
+            parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..9),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            });
+        assert_eq!(
+            parser(r"\x{D800}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..7),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            });
+        assert_eq!(
+            parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..12),
+                kind: ast::ErrorKind::EscapeHexInvalid,
+            });
+    }
+
+    #[test]
+    fn parse_decimal() {
+        assert_eq!(parser("123").parse_decimal(), Ok(123));
+        assert_eq!(parser("0").parse_decimal(), Ok(0));
+        assert_eq!(parser("01").parse_decimal(), Ok(1));
+
+        assert_eq!(
+            parser("-1").parse_decimal().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser("").parse_decimal().unwrap_err(),
+            TestError {
+                span: span(0..0),
+                kind: ast::ErrorKind::DecimalEmpty,
+            });
+        assert_eq!(
+            parser("9999999999").parse_decimal().unwrap_err(),
+            TestError {
+                span: span(0..10),
+                kind: ast::ErrorKind::DecimalInvalid,
+            });
+    }
+
+    #[test]
+    fn parse_set_class() {
+        fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
+            ast::ClassSet::union(ast::ClassSetUnion {
+                span: span,
+                items: items,
+            })
+        }
+
+        fn intersection(
+            span: Span,
+            lhs: ast::ClassSet,
+            rhs: ast::ClassSet,
+        ) -> ast::ClassSet {
+            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+                span: span,
+                kind: ast::ClassSetBinaryOpKind::Intersection,
+                lhs: Box::new(lhs),
+                rhs: Box::new(rhs),
+            })
+        }
+
+        fn difference(
+            span: Span,
+            lhs: ast::ClassSet,
+            rhs: ast::ClassSet,
+        ) -> ast::ClassSet {
+            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+                span: span,
+                kind: ast::ClassSetBinaryOpKind::Difference,
+                lhs: Box::new(lhs),
+                rhs: Box::new(rhs),
+            })
+        }
+
+        fn symdifference(
+            span: Span,
+            lhs: ast::ClassSet,
+            rhs: ast::ClassSet,
+        ) -> ast::ClassSet {
+            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
+                span: span,
+                kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
+                lhs: Box::new(lhs),
+                rhs: Box::new(rhs),
+            })
+        }
+
+        fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
+            ast::ClassSet::Item(item)
+        }
+
+        fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
+            ast::ClassSetItem::Ascii(cls)
+        }
+
+        fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
+            ast::ClassSetItem::Unicode(cls)
+        }
+
+        fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
+            ast::ClassSetItem::Perl(cls)
+        }
+
+        fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
+            ast::ClassSetItem::Bracketed(Box::new(cls))
+        }
+
+        fn lit(span: Span, c: char) -> ast::ClassSetItem {
+            ast::ClassSetItem::Literal(ast::Literal {
+                span: span,
+                kind: ast::LiteralKind::Verbatim,
+                c: c,
+            })
+        }
+
+        fn empty(span: Span) -> ast::ClassSetItem {
+            ast::ClassSetItem::Empty(span)
+        }
+
+        fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
+            let pos1 = Position {
+                offset: span.start.offset + start.len_utf8(),
+                column: span.start.column + 1,
+                ..span.start
+            };
+            let pos2 = Position {
+                offset: span.end.offset - end.len_utf8(),
+                column: span.end.column - 1,
+                ..span.end
+            };
+            ast::ClassSetItem::Range(ast::ClassSetRange {
+                span: span,
+                start: ast::Literal {
+                    span: Span { end: pos1, ..span },
+                    kind: ast::LiteralKind::Verbatim,
+                    c: start,
+                },
+                end: ast::Literal {
+                    span: Span { start: pos2, ..span },
+                    kind: ast::LiteralKind::Verbatim,
+                    c: end,
+                },
+            })
+        }
+
+        fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
+            ast::ClassAscii {
+                span: span,
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: negated,
+            }
+        }
+
+        fn lower(span: Span, negated: bool) -> ast::ClassAscii {
+            ast::ClassAscii {
+                span: span,
+                kind: ast::ClassAsciiKind::Lower,
+                negated: negated,
+            }
+        }
+
+        assert_eq!(
+            parser("[[:alnum:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..11),
+                negated: false,
+                kind: itemset(item_ascii(alnum(span(1..10), false))),
+            }))));
+        assert_eq!(
+            parser("[[[:alnum:]]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..13),
+                negated: false,
+                kind: itemset(item_bracket(ast::ClassBracketed {
+                    span: span(1..12),
+                    negated: false,
+                    kind: itemset(item_ascii(alnum(span(2..11), false))),
+                })),
+            }))));
+        assert_eq!(
+            parser("[[:alnum:]&&[:lower:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..22),
+                negated: false,
+                kind: intersection(
+                    span(1..21),
+                    itemset(item_ascii(alnum(span(1..10), false))),
+                    itemset(item_ascii(lower(span(12..21), false))),
+                ),
+            }))));
+        assert_eq!(
+            parser("[[:alnum:]--[:lower:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..22),
+                negated: false,
+                kind: difference(
+                    span(1..21),
+                    itemset(item_ascii(alnum(span(1..10), false))),
+                    itemset(item_ascii(lower(span(12..21), false))),
+                ),
+            }))));
+        assert_eq!(
+            parser("[[:alnum:]~~[:lower:]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..22),
+                negated: false,
+                kind: symdifference(
+                    span(1..21),
+                    itemset(item_ascii(alnum(span(1..10), false))),
+                    itemset(item_ascii(lower(span(12..21), false))),
+                ),
+            }))));
+
+        assert_eq!(
+            parser("[a]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: itemset(lit(span(1..2), 'a')),
+            }))));
+        assert_eq!(
+            parser(r"[a\]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: union(span(1..4), vec![
+                    lit(span(1..2), 'a'),
+                    ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(2..4),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: ']',
+                    }),
+                ]),
+            }))));
+        assert_eq!(
+            parser(r"[a\-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..6),
+                negated: false,
+                kind: union(span(1..5), vec![
+                    lit(span(1..2), 'a'),
+                    ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(2..4),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '-',
+                    }),
+                    lit(span(4..5), 'z'),
+                ]),
+            }))));
+        assert_eq!(
+            parser("[ab]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: union(span(1..3), vec![
+                    lit(span(1..2), 'a'),
+                    lit(span(2..3), 'b'),
+                ]),
+            }))));
+        assert_eq!(
+            parser("[a-]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: union(span(1..3), vec![
+                    lit(span(1..2), 'a'),
+                    lit(span(2..3), '-'),
+                ]),
+            }))));
+        assert_eq!(
+            parser("[-a]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: union(span(1..3), vec![
+                    lit(span(1..2), '-'),
+                    lit(span(2..3), 'a'),
+                ]),
+            }))));
+        assert_eq!(
+            parser(r"[\pL]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: itemset(item_unicode(ast::ClassUnicode {
+                    span: span(1..4),
+                    negated: false,
+                    kind: ast::ClassUnicodeKind::OneLetter('L'),
+                })),
+            }))));
+        assert_eq!(
+            parser(r"[\w]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..4),
+                negated: false,
+                kind: itemset(item_perl(ast::ClassPerl {
+                    span: span(1..3),
+                    kind: ast::ClassPerlKind::Word,
+                    negated: false,
+                })),
+            }))));
+        assert_eq!(
+            parser(r"[a\wz]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..6),
+                negated: false,
+                kind: union(span(1..5), vec![
+                    lit(span(1..2), 'a'),
+                    item_perl(ast::ClassPerl {
+                        span: span(2..4),
+                        kind: ast::ClassPerlKind::Word,
+                        negated: false,
+                    }),
+                    lit(span(4..5), 'z'),
+                ]),
+            }))));
+
+        assert_eq!(
+            parser("[a-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: itemset(range(span(1..4), 'a', 'z')),
+            }))));
+        assert_eq!(
+            parser("[a-cx-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..8),
+                negated: false,
+                kind: union(span(1..7), vec![
+                    range(span(1..4), 'a', 'c'),
+                    range(span(4..7), 'x', 'z'),
+                ]),
+            }))));
+        assert_eq!(
+            parser(r"[\w&&a-cx-z]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..12),
+                negated: false,
+                kind: intersection(
+                    span(1..11),
+                    itemset(item_perl(ast::ClassPerl {
+                        span: span(1..3),
+                        kind: ast::ClassPerlKind::Word,
+                        negated: false,
+                    })),
+                    union(span(5..11), vec![
+                        range(span(5..8), 'a', 'c'),
+                        range(span(8..11), 'x', 'z'),
+                    ]),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[a-cx-z&&\w]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..12),
+                negated: false,
+                kind: intersection(
+                    span(1..11),
+                    union(span(1..7), vec![
+                        range(span(1..4), 'a', 'c'),
+                        range(span(4..7), 'x', 'z'),
+                    ]),
+                    itemset(item_perl(ast::ClassPerl {
+                        span: span(9..11),
+                        kind: ast::ClassPerlKind::Word,
+                        negated: false,
+                    })),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[a--b--c]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..9),
+                negated: false,
+                kind: difference(
+                    span(1..8),
+                    difference(
+                        span(1..5),
+                        itemset(lit(span(1..2), 'a')),
+                        itemset(lit(span(4..5), 'b')),
+                    ),
+                    itemset(lit(span(7..8), 'c')),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[a~~b~~c]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..9),
+                negated: false,
+                kind: symdifference(
+                    span(1..8),
+                    symdifference(
+                        span(1..5),
+                        itemset(lit(span(1..2), 'a')),
+                        itemset(lit(span(4..5), 'b')),
+                    ),
+                    itemset(lit(span(7..8), 'c')),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[\^&&^]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..7),
+                negated: false,
+                kind: intersection(
+                    span(1..6),
+                    itemset(ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(1..3),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '^',
+                    })),
+                    itemset(lit(span(5..6), '^')),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[\&&&&]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..7),
+                negated: false,
+                kind: intersection(
+                    span(1..6),
+                    itemset(ast::ClassSetItem::Literal(ast::Literal {
+                        span: span(1..3),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '&',
+                    })),
+                    itemset(lit(span(5..6), '&')),
+                ),
+            }))));
+        assert_eq!(
+            parser(r"[&&&&]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..6),
+                negated: false,
+                kind: intersection(
+                    span(1..5),
+                    intersection(
+                        span(1..3),
+                        itemset(empty(span(1..1))),
+                        itemset(empty(span(3..3))),
+                    ),
+                    itemset(empty(span(5..5))),
+                ),
+            }))));
+
+        let pat = "[☃-⛄]";
+        assert_eq!(
+            parser(pat).parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span_range(pat, 0..9),
+                negated: false,
+                kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
+                    span: span_range(pat, 1..8),
+                    start: ast::Literal {
+                        span: span_range(pat, 1..4),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '☃',
+                    },
+                    end: ast::Literal {
+                        span: span_range(pat, 5..8),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: '⛄',
+                    },
+                })),
+            }))));
+
+        assert_eq!(
+            parser(r"[]]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..3),
+                negated: false,
+                kind: itemset(lit(span(1..2), ']')),
+            }))));
+        assert_eq!(
+            parser(r"[]\[]").parse(),
+            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                span: span(0..5),
+                negated: false,
+                kind: union(span(1..4), vec![
+                    lit(span(1..2), ']'),
+                    ast::ClassSetItem::Literal(ast::Literal  {
+                        span: span(2..4),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '[',
+                    }),
+                ]),
+            }))));
+        assert_eq!(
+            parser(r"[\[]]").parse(),
+            Ok(concat(0..5, vec![
+                Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: itemset(ast::ClassSetItem::Literal(ast::Literal  {
+                        span: span(1..3),
+                        kind: ast::LiteralKind::Punctuation,
+                        c: '[',
+                    })),
+                })),
+                Ast::Literal(ast::Literal {
+                    span: span(4..5),
+                    kind: ast::LiteralKind::Verbatim,
+                    c: ']',
+                }),
+            ])));
+
+        assert_eq!(
+            parser("[").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[[").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[[-]").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[[[:alnum:]").parse().unwrap_err(),
+            TestError {
+                span: span(1..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser(r"[\b]").parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::ClassEscapeInvalid,
+            });
+        assert_eq!(
+            parser(r"[\w-a]").parse().unwrap_err(),
+            TestError {
+                span: span(1..3),
+                kind: ast::ErrorKind::ClassRangeLiteral,
+            });
+        assert_eq!(
+            parser(r"[a-\w]").parse().unwrap_err(),
+            TestError {
+                span: span(3..5),
+                kind: ast::ErrorKind::ClassRangeLiteral,
+            });
+        assert_eq!(
+            parser(r"[z-a]").parse().unwrap_err(),
+            TestError {
+                span: span(1..4),
+                kind: ast::ErrorKind::ClassRangeInvalid,
+            });
+
+        assert_eq!(
+            parser_ignore_whitespace("[a ").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[a- ").parse().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+    }
+
+    #[test]
+    fn parse_set_class_open() {
+        assert_eq!(
+            parser("[a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..1),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..1),
+                    items: vec![],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[   a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(4..4),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(4..4),
+                    items: vec![],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[^a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..2),
+                    negated: true,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..2),
+                    items: vec![],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[ ^ a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: true,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(4..4),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(4..4),
+                    items: vec![],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[-a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..2),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..2),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(1..2),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[ - a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[^-a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..3),
+                    negated: true,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[--a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..3),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(1..2),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[]a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..2),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..2),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(1..2),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: ']',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[ ] a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..4),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: ']',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[^]a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..3),
+                    negated: true,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(2..2),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(2..3),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(2..3),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: ']',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+        assert_eq!(
+            parser("[-]a]").parse_set_class_open(), {
+                let set = ast::ClassBracketed {
+                    span: span(0..2),
+                    negated: false,
+                    kind: ast::ClassSet::union(ast::ClassSetUnion {
+                        span: span(1..1),
+                        items: vec![],
+                    }),
+                };
+                let union = ast::ClassSetUnion {
+                    span: span(1..2),
+                    items: vec![
+                        ast::ClassSetItem::Literal(ast::Literal {
+                            span: span(1..2),
+                            kind: ast::LiteralKind::Verbatim,
+                            c: '-',
+                        }),
+                    ],
+                };
+                Ok((set, union))
+            });
+
+        assert_eq!(
+            parser("[").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..1),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser_ignore_whitespace("[    ")
+            .parse_set_class_open()
+            .unwrap_err(),
+            TestError {
+                span: span(0..5),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[^").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[]").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[-").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..2),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+        assert_eq!(
+            parser("[--").parse_set_class_open().unwrap_err(),
+            TestError {
+                span: span(0..3),
+                kind: ast::ErrorKind::ClassUnclosed,
+            });
+    }
+
+    #[test]
+    fn maybe_parse_ascii_class() {
+        assert_eq!(
+            parser(r"[:alnum:]").maybe_parse_ascii_class(),
+            Some(ast::ClassAscii {
+                span: span(0..9),
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: false,
+            }));
+        assert_eq!(
+            parser(r"[:alnum:]A").maybe_parse_ascii_class(),
+            Some(ast::ClassAscii {
+                span: span(0..9),
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: false,
+            }));
+        assert_eq!(
+            parser(r"[:^alnum:]").maybe_parse_ascii_class(),
+            Some(ast::ClassAscii {
+                span: span(0..10),
+                kind: ast::ClassAsciiKind::Alnum,
+                negated: true,
+            }));
+
+        let p = parser(r"[:");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:^");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[^:alnum:]");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:alnnum:]");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:alnum]");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+
+        let p = parser(r"[:alnum:");
+        assert_eq!(p.maybe_parse_ascii_class(), None);
+        assert_eq!(p.offset(), 0);
+    }
+
+    #[test]
+    fn parse_unicode_class() {
+        assert_eq!(
+            parser(r"\pN").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..3),
+                negated: false,
+                kind: ast::ClassUnicodeKind::OneLetter('N'),
+            })));
+        assert_eq!(
+            parser(r"\PN").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..3),
+                negated: true,
+                kind: ast::ClassUnicodeKind::OneLetter('N'),
+            })));
+        assert_eq!(
+            parser(r"\p{N}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: false,
+                kind: ast::ClassUnicodeKind::Named(s("N")),
+            })));
+        assert_eq!(
+            parser(r"\P{N}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: true,
+                kind: ast::ClassUnicodeKind::Named(s("N")),
+            })));
+        assert_eq!(
+            parser(r"\p{Greek}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..9),
+                negated: false,
+                kind: ast::ClassUnicodeKind::Named(s("Greek")),
+            })));
+
+        assert_eq!(
+            parser(r"\p{scx:Katakana}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..16),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Colon,
+                    name: s("scx"),
+                    value: s("Katakana"),
+                },
+            })));
+        assert_eq!(
+            parser(r"\p{scx=Katakana}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..16),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Equal,
+                    name: s("scx"),
+                    value: s("Katakana"),
+                },
+            })));
+        assert_eq!(
+            parser(r"\p{scx!=Katakana}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..17),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::NotEqual,
+                    name: s("scx"),
+                    value: s("Katakana"),
+                },
+            })));
+
+        assert_eq!(
+            parser(r"\p{:}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Colon,
+                    name: s(""),
+                    value: s(""),
+                },
+            })));
+        assert_eq!(
+            parser(r"\p{=}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..5),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::Equal,
+                    name: s(""),
+                    value: s(""),
+                },
+            })));
+        assert_eq!(
+            parser(r"\p{!=}").parse_escape(),
+            Ok(Primitive::Unicode(ast::ClassUnicode {
+                span: span(0..6),
+                negated: false,
+                kind: ast::ClassUnicodeKind::NamedValue {
+                    op: ast::ClassUnicodeOpKind::NotEqual,
+                    name: s(""),
+                    value: s(""),
+                },
+            })));
+
+        assert_eq!(
+            parser(r"\p").parse_escape().unwrap_err(),
+            TestError {
+                span: span(2..2),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\p{").parse_escape().unwrap_err(),
+            TestError {
+                span: span(3..3),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\p{N").parse_escape().unwrap_err(),
+            TestError {
+                span: span(4..4),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+        assert_eq!(
+            parser(r"\p{Greek").parse_escape().unwrap_err(),
+            TestError {
+                span: span(8..8),
+                kind: ast::ErrorKind::EscapeUnexpectedEof,
+            });
+
+        assert_eq!(
+            parser(r"\pNz").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..4),
+                asts: vec![
+                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                        span: span(0..3),
+                        negated: false,
+                        kind: ast::ClassUnicodeKind::OneLetter('N'),
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(3..4),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'z',
+                    }),
+                ],
+            })));
+        assert_eq!(
+            parser(r"\p{Greek}z").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..10),
+                asts: vec![
+                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
+                        span: span(0..9),
+                        negated: false,
+                        kind: ast::ClassUnicodeKind::Named(s("Greek")),
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(9..10),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'z',
+                    }),
+                ],
+            })));
+    }
+
+    #[test]
+    fn parse_perl_class() {
+        assert_eq!(
+            parser(r"\d").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Digit,
+                negated: false,
+            })));
+        assert_eq!(
+            parser(r"\D").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Digit,
+                negated: true,
+            })));
+        assert_eq!(
+            parser(r"\s").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Space,
+                negated: false,
+            })));
+        assert_eq!(
+            parser(r"\S").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Space,
+                negated: true,
+            })));
+        assert_eq!(
+            parser(r"\w").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Word,
+                negated: false,
+            })));
+        assert_eq!(
+            parser(r"\W").parse_escape(),
+            Ok(Primitive::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Word,
+                negated: true,
+            })));
+
+        assert_eq!(
+            parser(r"\d").parse(),
+            Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
+                span: span(0..2),
+                kind: ast::ClassPerlKind::Digit,
+                negated: false,
+            }))));
+        assert_eq!(
+            parser(r"\dz").parse(),
+            Ok(Ast::Concat(ast::Concat {
+                span: span(0..3),
+                asts: vec![
+                    Ast::Class(ast::Class::Perl(ast::ClassPerl {
+                        span: span(0..2),
+                        kind: ast::ClassPerlKind::Digit,
+                        negated: false,
+                    })),
+                    Ast::Literal(ast::Literal {
+                        span: span(2..3),
+                        kind: ast::LiteralKind::Verbatim,
+                        c: 'z',
+                    }),
+                ],
+            })));
+    }
+
+    // This tests a bug fix where the nest limit checker wasn't decrementing
+    // its depth during post-traversal, which causes long regexes to trip
+    // the default limit too aggressively.
+    #[test]
+    fn regression_454_nest_too_big() {
+        let pattern = r#"
+        2(?:
+          [45]\d{3}|
+          7(?:
+            1[0-267]|
+            2[0-289]|
+            3[0-29]|
+            4[01]|
+            5[1-3]|
+            6[013]|
+            7[0178]|
+            91
+          )|
+          8(?:
+            0[125]|
+            [139][1-6]|
+            2[0157-9]|
+            41|
+            6[1-35]|
+            7[1-5]|
+            8[1-8]|
+            90
+          )|
+          9(?:
+            0[0-2]|
+            1[0-4]|
+            2[568]|
+            3[3-6]|
+            5[5-7]|
+            6[0167]|
+            7[15]|
+            8[0146-9]
+          )
+        )\d{4}
+        "#;
+        assert!(parser_nest_limit(pattern, 50).parse().is_ok());
+    }
+
+    // This tests that we treat a trailing `-` in a character class as a
+    // literal `-` even when whitespace mode is enabled and there is whitespace
+    // after the trailing `-`.
+    #[test]
+    fn regression_455_trailing_dash_ignore_whitespace() {
+        assert!(parser("(?x)[ / - ]").parse().is_ok());
+        assert!(parser("(?x)[ a - ]").parse().is_ok());
+        assert!(parser("(?x)[
+            a
+            - ]
+        ").parse().is_ok());
+        assert!(parser("(?x)[
+            a # wat
+            - ]
+        ").parse().is_ok());
+
+        assert!(parser("(?x)[ / -").parse().is_err());
+        assert!(parser("(?x)[ / - ").parse().is_err());
+        assert!(parser("(?x)[
+            / -
+        ").parse().is_err());
+        assert!(parser("(?x)[
+            / - # wat
+        ").parse().is_err());
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/ast/print.rs
@@ -0,0 +1,586 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+This module provides a regular expression printer for `Ast`.
+*/
+
+use std::fmt;
+
+use ast::{self, Ast};
+use ast::visitor::{self, Visitor};
+
+/// A builder for constructing a printer.
+///
+/// Note that since a printer doesn't have any configuration knobs, this type
+/// remains unexported.
+#[derive(Clone, Debug)]
+struct PrinterBuilder {
+    _priv: (),
+}
+
+impl Default for PrinterBuilder {
+    fn default() -> PrinterBuilder {
+        PrinterBuilder::new()
+    }
+}
+
+impl PrinterBuilder {
+    fn new() -> PrinterBuilder {
+        PrinterBuilder {
+            _priv: (),
+        }
+    }
+
+    fn build(&self) -> Printer {
+        Printer {
+            _priv: (),
+        }
+    }
+}
+
+/// A printer for a regular expression abstract syntax tree.
+///
+/// A printer converts an abstract syntax tree (AST) to a regular expression
+/// pattern string. This particular printer uses constant stack space and heap
+/// space proportional to the size of the AST.
+///
+/// This printer will not necessarily preserve the original formatting of the
+/// regular expression pattern string. For example, all whitespace and comments
+/// are ignored.
+#[derive(Debug)]
+pub struct Printer {
+    _priv: (),
+}
+
+impl Printer {
+    /// Create a new printer.
+    pub fn new() -> Printer {
+        PrinterBuilder::new().build()
+    }
+
+    /// Print the given `Ast` to the given writer. The writer must implement
+    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
+    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
+    /// implementations) or a `&mut String`.
+    pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
+        visitor::visit(ast, Writer { printer: self, wtr: wtr })
+    }
+}
+
+#[derive(Debug)]
+struct Writer<'p, W> {
+    printer: &'p mut Printer,
+    wtr: W,
+}
+
+impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
+    type Output = ();
+    type Err = fmt::Error;
+
+    fn finish(self) -> fmt::Result {
+        Ok(())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
+        match *ast {
+            Ast::Group(ref x) => self.fmt_group_pre(x),
+            Ast::Class(ast::Class::Bracketed(ref x)) => {
+                self.fmt_class_bracketed_pre(x)
+            }
+            _ => Ok(())
+        }
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
+        use ast::Class;
+
+        match *ast {
+            Ast::Empty(_) => Ok(()),
+            Ast::Flags(ref x) => self.fmt_set_flags(x),
+            Ast::Literal(ref x) => self.fmt_literal(x),
+            Ast::Dot(_) => self.wtr.write_str("."),
+            Ast::Assertion(ref x) => self.fmt_assertion(x),
+            Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
+            Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
+            Ast::Class(Class::Bracketed(ref x)) => {
+                self.fmt_class_bracketed_post(x)
+            }
+            Ast::Repetition(ref x) => self.fmt_repetition(x),
+            Ast::Group(ref x) => self.fmt_group_post(x),
+            Ast::Alternation(_) => Ok(()),
+            Ast::Concat(_) => Ok(()),
+        }
+    }
+
+    fn visit_alternation_in(&mut self) -> fmt::Result {
+        self.wtr.write_str("|")
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        match *ast {
+            ast::ClassSetItem::Bracketed(ref x) => {
+                self.fmt_class_bracketed_pre(x)
+            }
+            _ => Ok(()),
+        }
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        use ast::ClassSetItem::*;
+
+        match *ast {
+            Empty(_) => Ok(()),
+            Literal(ref x) => self.fmt_literal(x),
+            Range(ref x) => {
+                self.fmt_literal(&x.start)?;
+                self.wtr.write_str("-")?;
+                self.fmt_literal(&x.end)?;
+                Ok(())
+            }
+            Ascii(ref x) => self.fmt_class_ascii(x),
+            Unicode(ref x) => self.fmt_class_unicode(x),
+            Perl(ref x) => self.fmt_class_perl(x),
+            Bracketed(ref x) => self.fmt_class_bracketed_post(x),
+            Union(_) => Ok(()),
+        }
+    }
+
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        self.fmt_class_set_binary_op_kind(&ast.kind)
+    }
+}
+
+impl<'p, W: fmt::Write> Writer<'p, W> {
+    fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
+        use ast::GroupKind::*;
+        match ast.kind {
+            CaptureIndex(_) => self.wtr.write_str("("),
+            CaptureName(ref x) => {
+                self.wtr.write_str("(?P<")?;
+                self.wtr.write_str(&x.name)?;
+                self.wtr.write_str(">")?;
+                Ok(())
+            }
+            NonCapturing(ref flags) => {
+                self.wtr.write_str("(?")?;
+                self.fmt_flags(flags)?;
+                self.wtr.write_str(":")?;
+                Ok(())
+            }
+        }
+    }
+
+    fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
+        self.wtr.write_str(")")
+    }
+
+    fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
+        use ast::RepetitionKind::*;
+        match ast.op.kind {
+            ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
+            ZeroOrOne => self.wtr.write_str("??"),
+            ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
+            ZeroOrMore => self.wtr.write_str("*?"),
+            OneOrMore if ast.greedy => self.wtr.write_str("+"),
+            OneOrMore => self.wtr.write_str("+?"),
+            Range(ref x) => {
+                self.fmt_repetition_range(x)?;
+                if !ast.greedy {
+                    self.wtr.write_str("?")?;
+                }
+                Ok(())
+            }
+        }
+    }
+
+    fn fmt_repetition_range(
+        &mut self,
+        ast: &ast::RepetitionRange,
+    ) -> fmt::Result {
+        use ast::RepetitionRange::*;
+        match *ast {
+            Exactly(x) => write!(self.wtr, "{{{}}}", x),
+            AtLeast(x) => write!(self.wtr, "{{{},}}", x),
+            Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
+        }
+    }
+
+    fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
+        use ast::LiteralKind::*;
+
+        match ast.kind {
+            Verbatim => self.wtr.write_char(ast.c),
+            Punctuation => write!(self.wtr, r"\{}", ast.c),
+            Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
+            HexFixed(ast::HexLiteralKind::X) => {
+                write!(self.wtr, r"\x{:02X}", ast.c as u32)
+            }
+            HexFixed(ast::HexLiteralKind::UnicodeShort) => {
+                write!(self.wtr, r"\u{:04X}", ast.c as u32)
+            }
+            HexFixed(ast::HexLiteralKind::UnicodeLong) => {
+                write!(self.wtr, r"\U{:08X}", ast.c as u32)
+            }
+            HexBrace(ast::HexLiteralKind::X) => {
+                write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
+            }
+            HexBrace(ast::HexLiteralKind::UnicodeShort) => {
+                write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
+            }
+            HexBrace(ast::HexLiteralKind::UnicodeLong) => {
+                write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
+            }
+            Special(ast::SpecialLiteralKind::Bell) => {
+                self.wtr.write_str(r"\a")
+            }
+            Special(ast::SpecialLiteralKind::FormFeed) => {
+                self.wtr.write_str(r"\f")
+            }
+            Special(ast::SpecialLiteralKind::Tab) => {
+                self.wtr.write_str(r"\t")
+            }
+            Special(ast::SpecialLiteralKind::LineFeed) => {
+                self.wtr.write_str(r"\n")
+            }
+            Special(ast::SpecialLiteralKind::CarriageReturn) => {
+                self.wtr.write_str(r"\r")
+            }
+            Special(ast::SpecialLiteralKind::VerticalTab) => {
+                self.wtr.write_str(r"\v")
+            }
+            Special(ast::SpecialLiteralKind::Space) => {
+                self.wtr.write_str(r"\ ")
+            }
+        }
+    }
+
+    fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
+        use ast::AssertionKind::*;
+        match ast.kind {
+            StartLine => self.wtr.write_str("^"),
+            EndLine => self.wtr.write_str("$"),
+            StartText => self.wtr.write_str(r"\A"),
+            EndText => self.wtr.write_str(r"\z"),
+            WordBoundary => self.wtr.write_str(r"\b"),
+            NotWordBoundary => self.wtr.write_str(r"\B"),
+        }
+    }
+
+    fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
+        self.wtr.write_str("(?")?;
+        self.fmt_flags(&ast.flags)?;
+        self.wtr.write_str(")")?;
+        Ok(())
+    }
+
+    fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
+        use ast::{Flag, FlagsItemKind};
+
+        for item in &ast.items {
+            match item.kind {
+                FlagsItemKind::Negation => self.wtr.write_str("-"),
+                FlagsItemKind::Flag(ref flag) => {
+                    match *flag {
+                        Flag::CaseInsensitive => self.wtr.write_str("i"),
+                        Flag::MultiLine => self.wtr.write_str("m"),
+                        Flag::DotMatchesNewLine => self.wtr.write_str("s"),
+                        Flag::SwapGreed => self.wtr.write_str("U"),
+                        Flag::Unicode => self.wtr.write_str("u"),
+                        Flag::IgnoreWhitespace => self.wtr.write_str("x"),
+                    }
+                }
+            }?;
+        }
+        Ok(())
+    }
+
+    fn fmt_class_bracketed_pre(
+        &mut self,
+        ast: &ast::ClassBracketed,
+    ) -> fmt::Result {
+        if ast.negated {
+            self.wtr.write_str("[^")
+        } else {
+            self.wtr.write_str("[")
+        }
+    }
+
+    fn fmt_class_bracketed_post(
+        &mut self,
+        _ast: &ast::ClassBracketed,
+    ) -> fmt::Result {
+        self.wtr.write_str("]")
+    }
+
+    fn fmt_class_set_binary_op_kind(
+        &mut self,
+        ast: &ast::ClassSetBinaryOpKind,
+    ) -> fmt::Result {
+        use ast::ClassSetBinaryOpKind::*;
+        match *ast {
+            Intersection => self.wtr.write_str("&&"),
+            Difference => self.wtr.write_str("--"),
+            SymmetricDifference => self.wtr.write_str("~~"),
+        }
+    }
+
+    fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
+        use ast::ClassPerlKind::*;
+        match ast.kind {
+            Digit if ast.negated => self.wtr.write_str(r"\D"),
+            Digit => self.wtr.write_str(r"\d"),
+            Space if ast.negated => self.wtr.write_str(r"\S"),
+            Space => self.wtr.write_str(r"\s"),
+            Word if ast.negated => self.wtr.write_str(r"\W"),
+            Word => self.wtr.write_str(r"\w"),
+        }
+    }
+
+    fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
+        use ast::ClassAsciiKind::*;
+        match ast.kind {
+            Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
+            Alnum => self.wtr.write_str("[:alnum:]"),
+            Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
+            Alpha => self.wtr.write_str("[:alpha:]"),
+            Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
+            Ascii => self.wtr.write_str("[:ascii:]"),
+            Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
+            Blank => self.wtr.write_str("[:blank:]"),
+            Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
+            Cntrl => self.wtr.write_str("[:cntrl:]"),
+            Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
+            Digit => self.wtr.write_str("[:digit:]"),
+            Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
+            Graph => self.wtr.write_str("[:graph:]"),
+            Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
+            Lower => self.wtr.write_str("[:lower:]"),
+            Print if ast.negated => self.wtr.write_str("[:^print:]"),
+            Print => self.wtr.write_str("[:print:]"),
+            Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
+            Punct => self.wtr.write_str("[:punct:]"),
+            Space if ast.negated => self.wtr.write_str("[:^space:]"),
+            Space => self.wtr.write_str("[:space:]"),
+            Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
+            Upper => self.wtr.write_str("[:upper:]"),
+            Word if ast.negated => self.wtr.write_str("[:^word:]"),
+            Word => self.wtr.write_str("[:word:]"),
+            Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
+            Xdigit => self.wtr.write_str("[:xdigit:]"),
+        }
+    }
+
+    fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
+        use ast::ClassUnicodeKind::*;
+        use ast::ClassUnicodeOpKind::*;
+
+        if ast.negated {
+            self.wtr.write_str(r"\P")?;
+        } else {
+            self.wtr.write_str(r"\p")?;
+        }
+        match ast.kind {
+            OneLetter(c) => self.wtr.write_char(c),
+            Named(ref x) => write!(self.wtr, "{{{}}}", x),
+            NamedValue { op: Equal, ref name, ref value } => {
+                write!(self.wtr, "{{{}={}}}", name, value)
+            }
+            NamedValue { op: Colon, ref name, ref value } => {
+                write!(self.wtr, "{{{}:{}}}", name, value)
+            }
+            NamedValue { op: NotEqual, ref name, ref value } => {
+                write!(self.wtr, "{{{}!={}}}", name, value)
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ast::parse::ParserBuilder;
+    use super::Printer;
+
+    fn roundtrip(given: &str) {
+        roundtrip_with(|b| b, given);
+    }
+
+    fn roundtrip_with<F>(mut f: F, given: &str)
+    where F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder
+    {
+        let mut builder = ParserBuilder::new();
+        f(&mut builder);
+        let ast = builder.build().parse(given).unwrap();
+
+        let mut printer = Printer::new();
+        let mut dst = String::new();
+        printer.print(&ast, &mut dst).unwrap();
+        assert_eq!(given, dst);
+    }
+
+    #[test]
+    fn print_literal() {
+        roundtrip("a");
+        roundtrip(r"\[");
+        roundtrip_with(|b| b.octal(true), r"\141");
+        roundtrip(r"\x61");
+        roundtrip(r"\x7F");
+        roundtrip(r"\u0061");
+        roundtrip(r"\U00000061");
+        roundtrip(r"\x{61}");
+        roundtrip(r"\x{7F}");
+        roundtrip(r"\u{61}");
+        roundtrip(r"\U{61}");
+
+        roundtrip(r"\a");
+        roundtrip(r"\f");
+        roundtrip(r"\t");
+        roundtrip(r"\n");
+        roundtrip(r"\r");
+        roundtrip(r"\v");
+        roundtrip(r"(?x)\ ");
+    }
+
+    #[test]
+    fn print_dot() {
+        roundtrip(".");
+    }
+
+    #[test]
+    fn print_concat() {
+        roundtrip("ab");
+        roundtrip("abcde");
+        roundtrip("a(bcd)ef");
+    }
+
+    #[test]
+    fn print_alternation() {
+        roundtrip("a|b");
+        roundtrip("a|b|c|d|e");
+        roundtrip("|a|b|c|d|e");
+        roundtrip("|a|b|c|d|e|");
+        roundtrip("a(b|c|d)|e|f");
+    }
+
+    #[test]
+    fn print_assertion() {
+        roundtrip(r"^");
+        roundtrip(r"$");
+        roundtrip(r"\A");
+        roundtrip(r"\z");
+        roundtrip(r"\b");
+        roundtrip(r"\B");
+    }
+
+    #[test]
+    fn print_repetition() {
+        roundtrip("a?");
+        roundtrip("a??");
+        roundtrip("a*");
+        roundtrip("a*?");
+        roundtrip("a+");
+        roundtrip("a+?");
+        roundtrip("a{5}");
+        roundtrip("a{5}?");
+        roundtrip("a{5,}");
+        roundtrip("a{5,}?");
+        roundtrip("a{5,10}");
+        roundtrip("a{5,10}?");
+    }
+
+    #[test]
+    fn print_flags() {
+        roundtrip("(?i)");
+        roundtrip("(?-i)");
+        roundtrip("(?s-i)");
+        roundtrip("(?-si)");
+        roundtrip("(?siUmux)");
+    }
+
+    #[test]
+    fn print_group() {
+        roundtrip("(?i:a)");
+        roundtrip("(?P<foo>a)");
+        roundtrip("(a)");
+    }
+
+    #[test]
+    fn print_class() {
+        roundtrip(r"[abc]");
+        roundtrip(r"[a-z]");
+        roundtrip(r"[^a-z]");
+        roundtrip(r"[a-z0-9]");
+        roundtrip(r"[-a-z0-9]");
+        roundtrip(r"[-a-z0-9]");
+        roundtrip(r"[a-z0-9---]");
+        roundtrip(r"[a-z&&m-n]");
+        roundtrip(r"[[a-z&&m-n]]");
+        roundtrip(r"[a-z--m-n]");
+        roundtrip(r"[a-z~~m-n]");
+        roundtrip(r"[a-z[0-9]]");
+        roundtrip(r"[a-z[^0-9]]");
+
+        roundtrip(r"\d");
+        roundtrip(r"\D");
+        roundtrip(r"\s");
+        roundtrip(r"\S");
+        roundtrip(r"\w");
+        roundtrip(r"\W");
+
+        roundtrip(r"[[:alnum:]]");
+        roundtrip(r"[[:^alnum:]]");
+        roundtrip(r"[[:alpha:]]");
+        roundtrip(r"[[:^alpha:]]");
+        roundtrip(r"[[:ascii:]]");
+        roundtrip(r"[[:^ascii:]]");
+        roundtrip(r"[[:blank:]]");
+        roundtrip(r"[[:^blank:]]");
+        roundtrip(r"[[:cntrl:]]");
+        roundtrip(r"[[:^cntrl:]]");
+        roundtrip(r"[[:digit:]]");
+        roundtrip(r"[[:^digit:]]");
+        roundtrip(r"[[:graph:]]");
+        roundtrip(r"[[:^graph:]]");
+        roundtrip(r"[[:lower:]]");
+        roundtrip(r"[[:^lower:]]");
+        roundtrip(r"[[:print:]]");
+        roundtrip(r"[[:^print:]]");
+        roundtrip(r"[[:punct:]]");
+        roundtrip(r"[[:^punct:]]");
+        roundtrip(r"[[:space:]]");
+        roundtrip(r"[[:^space:]]");
+        roundtrip(r"[[:upper:]]");
+        roundtrip(r"[[:^upper:]]");
+        roundtrip(r"[[:word:]]");
+        roundtrip(r"[[:^word:]]");
+        roundtrip(r"[[:xdigit:]]");
+        roundtrip(r"[[:^xdigit:]]");
+
+        roundtrip(r"\pL");
+        roundtrip(r"\PL");
+        roundtrip(r"\p{L}");
+        roundtrip(r"\P{L}");
+        roundtrip(r"\p{X=Y}");
+        roundtrip(r"\P{X=Y}");
+        roundtrip(r"\p{X:Y}");
+        roundtrip(r"\P{X:Y}");
+        roundtrip(r"\p{X!=Y}");
+        roundtrip(r"\P{X!=Y}");
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/ast/visitor.rs
@@ -0,0 +1,557 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::fmt;
+
+use ast::{self, Ast};
+
+/// A trait for visiting an abstract syntax tree (AST) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on an abstract syntax tree without necessarily using recursion.
+/// In particular, this permits callers to do case analysis with constant stack
+/// usage, which can be important since the size of an abstract syntax tree
+/// may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`](fn.visit.html) function.
+///
+/// Note that the abstract syntax tree for a regular expression is quite
+/// complex. Unless you specifically need it, you might be able to use the
+/// much simpler
+/// [high-level intermediate representation](../hir/struct.Hir.html)
+/// and its
+/// [corresponding `Visitor` trait](../hir/trait.Visitor.html)
+/// instead.
+pub trait Visitor {
+    /// The result of visiting an AST.
+    type Output;
+    /// An error that visiting an AST might return.
+    type Err;
+
+    /// All implementors of `Visitor` must provide a `finish` method, which
+    /// yields the result of visiting the AST or an error.
+    fn finish(self) -> Result<Self::Output, Self::Err>;
+
+    /// This method is called before beginning traversal of the AST.
+    fn start(&mut self) {}
+
+    /// This method is called on an `Ast` before descending into child `Ast`
+    /// nodes.
+    fn visit_pre(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on an `Ast` after descending all of its child
+    /// `Ast` nodes.
+    fn visit_post(&mut self, _ast: &Ast) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between child nodes of an
+    /// [`Alternation`](struct.Alternation.html).
+    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetItem`](enum.ClassSetItem.html)
+    /// before descending into child nodes.
+    fn visit_class_set_item_pre(
+        &mut self,
+        _ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetItem`](enum.ClassSetItem.html)
+    /// after descending into child nodes.
+    fn visit_class_set_item_post(
+        &mut self,
+        _ast: &ast::ClassSetItem,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+    /// before descending into child nodes.
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on every
+    /// [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html)
+    /// after descending into child nodes.
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between the left hand and right hand child nodes
+    /// of a [`ClassSetBinaryOp`](struct.ClassSetBinaryOp.html).
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        _ast: &ast::ClassSetBinaryOp,
+    ) -> Result<(), Self::Err> {
+        Ok(())
+    }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Ast` while calling the
+/// appropriate methods provided by the
+/// [`Visitor`](trait.Visitor.html) trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Ast` without using a stack size proportional to the depth
+/// of the `Ast`. Namely, this method will instead use constant stack size, but
+/// will use heap space proportional to the size of the `Ast`. This may be
+/// desirable in cases where the size of `Ast` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(ast: &Ast, visitor: V) -> Result<V::Output, V::Err> {
+    HeapVisitor::new().visit(ast, visitor)
+}
+
+/// HeapVisitor visits every item in an `Ast` recursively using constant stack
+/// size and a heap size proportional to the size of the `Ast`.
+struct HeapVisitor<'a> {
+    /// A stack of `Ast` nodes. This is roughly analogous to the call stack
+    /// used in a typical recursive visitor.
+    stack: Vec<(&'a Ast, Frame<'a>)>,
+    /// Similar to the `Ast` stack above, but is used only for character
+    /// classes. In particular, character classes embed their own mini
+    /// recursive syntax.
+    stack_class: Vec<(ClassInduct<'a>, ClassFrame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Ast`.
+enum Frame<'a> {
+    /// A stack frame allocated just before descending into a repetition
+    /// operator's child node.
+    Repetition(&'a ast::Repetition),
+    /// A stack frame allocated just before descending into a group's child
+    /// node.
+    Group(&'a ast::Group),
+    /// The stack frame used while visiting every child node of a concatenation
+    /// of expressions.
+    Concat {
+        /// The child node we are currently visiting.
+        head: &'a Ast,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Ast],
+    },
+    /// The stack frame used while visiting every child node of an alternation
+    /// of expressions.
+    Alternation {
+        /// The child node we are currently visiting.
+        head: &'a Ast,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Ast],
+    },
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// a character class.
+enum ClassFrame<'a> {
+    /// The stack frame used while visiting every child node of a union of
+    /// character class items.
+    Union {
+        /// The child node we are currently visiting.
+        head: &'a ast::ClassSetItem,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [ast::ClassSetItem],
+    },
+    /// The stack frame used while a binary class operation.
+    Binary {
+        op: &'a ast::ClassSetBinaryOp,
+    },
+    /// A stack frame allocated just before descending into a binary operator's
+    /// left hand child node.
+    BinaryLHS {
+        op: &'a ast::ClassSetBinaryOp,
+        lhs: &'a ast::ClassSet,
+        rhs: &'a ast::ClassSet,
+    },
+    /// A stack frame allocated just before descending into a binary operator's
+    /// right hand child node.
+    BinaryRHS {
+        op: &'a ast::ClassSetBinaryOp,
+        rhs: &'a ast::ClassSet,
+    },
+}
+
+/// A representation of the inductive step when performing structural induction
+/// over a character class.
+///
+/// Note that there is no analogous explicit type for the inductive step for
+/// `Ast` nodes because the inductive step is just an `Ast`. For character
+/// classes, the inductive step can produce one of two possible child nodes:
+/// an item or a binary operation. (An item cannot be a binary operation
+/// because that would imply binary operations can be unioned in the concrete
+/// syntax, which is not possible.)
+enum ClassInduct<'a> {
+    Item(&'a ast::ClassSetItem),
+    BinaryOp(&'a ast::ClassSetBinaryOp),
+}
+
+impl<'a> HeapVisitor<'a> {
+    fn new() -> HeapVisitor<'a> {
+        HeapVisitor { stack: vec![], stack_class: vec![] }
+    }
+
+    fn visit<V: Visitor>(
+        &mut self,
+        mut ast: &'a Ast,
+        mut visitor: V,
+    ) -> Result<V::Output, V::Err> {
+        self.stack.clear();
+        self.stack_class.clear();
+
+        visitor.start();
+        loop {
+            visitor.visit_pre(ast)?;
+            if let Some(x) = self.induct(ast, &mut visitor)? {
+                let child = x.child();
+                self.stack.push((ast, x));
+                ast = child;
+                continue;
+            }
+            // No induction means we have a base case, so we can post visit
+            // it now.
+            visitor.visit_post(ast)?;
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_ast, frame) = match self.stack.pop() {
+                    None => return visitor.finish(),
+                    Some((post_ast, frame)) => (post_ast, frame),
+                };
+                // If this is a concat/alternate, then we might have additional
+                // inductive steps to process.
+                if let Some(x) = self.pop(frame) {
+                    if let Frame::Alternation {..} = x {
+                        visitor.visit_alternation_in()?;
+                    }
+                    ast = x.child();
+                    self.stack.push((post_ast, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this AST, so we can post visit it now.
+                visitor.visit_post(post_ast)?;
+            }
+        }
+    }
+
+    /// Build a stack frame for the given AST if one is needed (which occurs if
+    /// and only if there are child nodes in the AST). Otherwise, return None.
+    ///
+    /// If this visits a class, then the underlying visitor implementation may
+    /// return an error which will be passed on here.
+    fn induct<V: Visitor>(
+        &mut self,
+        ast: &'a Ast,
+        visitor: &mut V,
+    ) -> Result<Option<Frame<'a>>, V::Err> {
+        Ok(match *ast {
+            Ast::Class(ast::Class::Bracketed(ref x)) => {
+                self.visit_class(x, visitor)?;
+                None
+            }
+            Ast::Repetition(ref x) => Some(Frame::Repetition(x)),
+            Ast::Group(ref x) => Some(Frame::Group(x)),
+            Ast::Concat(ref x) if x.asts.is_empty() => None,
+            Ast::Concat(ref x) => {
+                Some(Frame::Concat {
+                    head: &x.asts[0],
+                    tail: &x.asts[1..],
+                })
+            }
+            Ast::Alternation(ref x) if x.asts.is_empty() => None,
+            Ast::Alternation(ref x) => {
+                Some(Frame::Alternation {
+                    head: &x.asts[0],
+                    tail: &x.asts[1..],
+                })
+            }
+            _ => None,
+        })
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+        match induct {
+            Frame::Repetition(_) => None,
+            Frame::Group(_) => None,
+            Frame::Concat { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Concat {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+            Frame::Alternation { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Alternation {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+        }
+    }
+
+    fn visit_class<V: Visitor>(
+        &mut self,
+        ast: &'a ast::ClassBracketed,
+        visitor: &mut V,
+    ) -> Result<(), V::Err> {
+        let mut ast = ClassInduct::from_bracketed(ast);
+        loop {
+            self.visit_class_pre(&ast, visitor)?;
+            if let Some(x) = self.induct_class(&ast) {
+                let child = x.child();
+                self.stack_class.push((ast, x));
+                ast = child;
+                continue;
+            }
+            self.visit_class_post(&ast, visitor)?;
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_ast, frame) = match self.stack_class.pop() {
+                    None => return Ok(()),
+                    Some((post_ast, frame)) => (post_ast, frame),
+                };
+                // If this is a union or a binary op, then we might have
+                // additional inductive steps to process.
+                if let Some(x) = self.pop_class(frame) {
+                    if let ClassFrame::BinaryRHS { ref op, .. } = x {
+                        visitor.visit_class_set_binary_op_in(op)?;
+                    }
+                    ast = x.child();
+                    self.stack_class.push((post_ast, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this class node, so we can post visit it now.
+                self.visit_class_post(&post_ast, visitor)?;
+            }
+        }
+    }
+
+    /// Call the appropriate `Visitor` methods given an inductive step.
+    fn visit_class_pre<V: Visitor>(
+        &self,
+        ast: &ClassInduct<'a>,
+        visitor: &mut V,
+    ) -> Result<(), V::Err> {
+        match *ast {
+            ClassInduct::Item(item) => {
+                visitor.visit_class_set_item_pre(item)?;
+            }
+            ClassInduct::BinaryOp(op) => {
+                visitor.visit_class_set_binary_op_pre(op)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Call the appropriate `Visitor` methods given an inductive step.
+    fn visit_class_post<V: Visitor>(
+        &self,
+        ast: &ClassInduct<'a>,
+        visitor: &mut V,
+    ) -> Result<(), V::Err> {
+        match *ast {
+            ClassInduct::Item(item) => {
+                visitor.visit_class_set_item_post(item)?;
+            }
+            ClassInduct::BinaryOp(op) => {
+                visitor.visit_class_set_binary_op_post(op)?;
+            }
+        }
+        Ok(())
+    }
+
+    /// Build a stack frame for the given class node if one is needed (which
+    /// occurs if and only if there are child nodes). Otherwise, return None.
+    fn induct_class(
+        &self,
+        ast: &ClassInduct<'a>,
+    ) -> Option<ClassFrame<'a>> {
+        match *ast {
+            ClassInduct::Item(&ast::ClassSetItem::Bracketed(ref x)) => {
+                match x.kind {
+                    ast::ClassSet::Item(ref item) => {
+                        Some(ClassFrame::Union {
+                            head: item,
+                            tail: &[],
+                        })
+                    }
+                    ast::ClassSet::BinaryOp(ref op) => {
+                        Some(ClassFrame::Binary { op: op })
+                    }
+                }
+            }
+            ClassInduct::Item(&ast::ClassSetItem::Union(ref x)) => {
+                if x.items.is_empty() {
+                    None
+                } else {
+                    Some(ClassFrame::Union {
+                        head: &x.items[0],
+                        tail: &x.items[1..],
+                    })
+                }
+            }
+            ClassInduct::BinaryOp(op) => {
+                Some(ClassFrame::BinaryLHS {
+                    op: op,
+                    lhs: &op.lhs,
+                    rhs: &op.rhs,
+                })
+            }
+            _ => None,
+        }
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop_class(&self, induct: ClassFrame<'a>) -> Option<ClassFrame<'a>> {
+        match induct {
+            ClassFrame::Union { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(ClassFrame::Union {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+            ClassFrame::Binary {..} => None,
+            ClassFrame::BinaryLHS { op, rhs, .. } => {
+                Some(ClassFrame::BinaryRHS {
+                    op: op,
+                    rhs: rhs,
+                })
+            }
+            ClassFrame::BinaryRHS {..} => None,
+        }
+    }
+}
+
+impl<'a> Frame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child AST node to visit.
+    fn child(&self) -> &'a Ast {
+        match *self {
+            Frame::Repetition(rep) => &rep.ast,
+            Frame::Group(group) => &group.ast,
+            Frame::Concat { head, .. } => head,
+            Frame::Alternation { head, .. } => head,
+        }
+    }
+}
+
+impl<'a> ClassFrame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child class node to visit.
+    fn child(&self) -> ClassInduct<'a> {
+        match *self {
+            ClassFrame::Union { head, .. } => ClassInduct::Item(head),
+            ClassFrame::Binary { op, .. } => ClassInduct::BinaryOp(op),
+            ClassFrame::BinaryLHS { ref lhs, .. } => {
+                ClassInduct::from_set(lhs)
+            }
+            ClassFrame::BinaryRHS { ref rhs, .. } => {
+                ClassInduct::from_set(rhs)
+            }
+        }
+    }
+}
+
+impl<'a> ClassInduct<'a> {
+    fn from_bracketed(ast: &'a ast::ClassBracketed) -> ClassInduct<'a> {
+        ClassInduct::from_set(&ast.kind)
+    }
+
+    fn from_set(ast: &'a ast::ClassSet) -> ClassInduct<'a> {
+        match *ast {
+            ast::ClassSet::Item(ref item) => ClassInduct::Item(item),
+            ast::ClassSet::BinaryOp(ref op) => ClassInduct::BinaryOp(op),
+        }
+    }
+}
+
+impl<'a> fmt::Debug for ClassFrame<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let x = match *self {
+            ClassFrame::Union{..} => "Union",
+            ClassFrame::Binary{..} => "Binary",
+            ClassFrame::BinaryLHS{..} => "BinaryLHS",
+            ClassFrame::BinaryRHS{..} => "BinaryRHS",
+        };
+        write!(f, "{}", x)
+    }
+}
+
+impl<'a> fmt::Debug for ClassInduct<'a> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let x = match *self {
+            ClassInduct::Item(it) => {
+                match *it {
+                    ast::ClassSetItem::Empty(_) => "Item(Empty)",
+                    ast::ClassSetItem::Literal(_) => "Item(Literal)",
+                    ast::ClassSetItem::Range(_) => "Item(Range)",
+                    ast::ClassSetItem::Ascii(_) => "Item(Ascii)",
+                    ast::ClassSetItem::Perl(_) => "Item(Perl)",
+                    ast::ClassSetItem::Unicode(_) => "Item(Unicode)",
+                    ast::ClassSetItem::Bracketed(_) => "Item(Bracketed)",
+                    ast::ClassSetItem::Union(_) => "Item(Union)",
+                }
+            }
+            ClassInduct::BinaryOp(it) => {
+                match it.kind {
+                    ast::ClassSetBinaryOpKind::Intersection => {
+                        "BinaryOp(Intersection)"
+                    }
+                    ast::ClassSetBinaryOpKind::Difference => {
+                        "BinaryOp(Difference)"
+                    }
+                    ast::ClassSetBinaryOpKind::SymmetricDifference => {
+                        "BinaryOp(SymmetricDifference)"
+                    }
+                }
+            }
+        };
+        write!(f, "{}", x)
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/either.rs
@@ -0,0 +1,8 @@
+/// A simple binary sum type.
+///
+/// This is occasionally useful in an ad hoc fashion.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Either<Left, Right> {
+    Left(Left),
+    Right(Right),
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/error.rs
@@ -0,0 +1,297 @@
+use std::cmp;
+use std::error;
+use std::fmt;
+use std::result;
+
+use ast;
+use hir;
+
+/// A type alias for dealing with errors returned by this crate.
+pub type Result<T> = result::Result<T, Error>;
+
+/// This error type encompasses any error that can be returned by this crate.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Error {
+    /// An error that occurred while translating concrete syntax into abstract
+    /// syntax (AST).
+    Parse(ast::Error),
+    /// An error that occurred while translating abstract syntax into a high
+    /// level intermediate representation (HIR).
+    Translate(hir::Error),
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl From<ast::Error> for Error {
+    fn from(err: ast::Error) -> Error {
+        Error::Parse(err)
+    }
+}
+
+impl From<hir::Error> for Error {
+    fn from(err: hir::Error) -> Error {
+        Error::Translate(err)
+    }
+}
+
+impl error::Error for Error {
+    fn description(&self) -> &str {
+        match *self {
+            Error::Parse(ref x) => x.description(),
+            Error::Translate(ref x) => x.description(),
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::Parse(ref x) => x.fmt(f),
+            Error::Translate(ref x) => x.fmt(f),
+            _ => unreachable!(),
+        }
+    }
+}
+
+/// A helper type for formatting nice error messages.
+///
+/// This type is responsible for reporting regex parse errors in a nice human
+/// readable format. Most of its complexity is from interspersing notational
+/// markers pointing out the position where an error occurred.
+#[derive(Debug)]
+pub struct Formatter<'e, E: 'e> {
+    /// The original regex pattern in which the error occurred.
+    pattern: &'e str,
+    /// The error kind. It must impl fmt::Display.
+    err: &'e E,
+    /// The primary span of the error.
+    span: &'e ast::Span,
+    /// An auxiliary and optional span, in case the error needs to point to
+    /// two locations (e.g., when reporting a duplicate capture group name).
+    aux_span: Option<&'e ast::Span>,
+}
+
+impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
+    fn from(err: &'e ast::Error) -> Self {
+        Formatter {
+            pattern: err.pattern(),
+            err: err.kind(),
+            span: err.span(),
+            aux_span: err.auxiliary_span(),
+        }
+    }
+}
+
+impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
+    fn from(err: &'e hir::Error) -> Self {
+        Formatter {
+            pattern: err.pattern(),
+            err: err.kind(),
+            span: err.span(),
+            aux_span: None,
+        }
+    }
+}
+
+impl<'e, E: fmt::Display> fmt::Display for Formatter<'e, E> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let spans = Spans::from_formatter(self);
+        if self.pattern.contains('\n') {
+            let divider = repeat_char('~', 79);
+
+            writeln!(f, "regex parse error:")?;
+            writeln!(f, "{}", divider)?;
+            let notated = spans.notate();
+            write!(f, "{}", notated)?;
+            writeln!(f, "{}", divider)?;
+            // If we have error spans that cover multiple lines, then we just
+            // note the line numbers.
+            if !spans.multi_line.is_empty() {
+                let mut notes = vec![];
+                for span in &spans.multi_line {
+                    notes.push(format!(
+                        "on line {} (column {}) through line {} (column {})",
+                        span.start.line, span.start.column,
+                        span.end.line, span.end.column - 1));
+                }
+                writeln!(f, "{}", notes.join("\n"))?;
+            }
+            write!(f, "error: {}", self.err)?;
+        } else {
+            writeln!(f, "regex parse error:")?;
+            let notated = Spans::from_formatter(self).notate();
+            write!(f, "{}", notated)?;
+            write!(f, "error: {}", self.err)?;
+        }
+        Ok(())
+    }
+}
+
+/// This type represents an arbitrary number of error spans in a way that makes
+/// it convenient to notate the regex pattern. ("Notate" means "point out
+/// exactly where the error occurred in the regex pattern.")
+///
+/// Technically, we can only ever have two spans given our current error
+/// structure. However, after toiling with a specific algorithm for handling
+/// two spans, it became obvious that an algorithm to handle an arbitrary
+/// number of spans was actually much simpler.
+struct Spans<'p> {
+    /// The original regex pattern string.
+    pattern: &'p str,
+    /// The total width that should be used for line numbers. The width is
+    /// used for left padding the line numbers for alignment.
+    ///
+    /// A value of `0` means line numbers should not be displayed. That is,
+    /// the pattern is itself only one line.
+    line_number_width: usize,
+    /// All error spans that occur on a single line. This sequence always has
+    /// length equivalent to the number of lines in `pattern`, where the index
+    /// of the sequence represents a line number, starting at `0`. The spans
+    /// in each line are sorted in ascending order.
+    by_line: Vec<Vec<ast::Span>>,
+    /// All error spans that occur over one or more lines. That is, the start
+    /// and end position of the span have different line numbers. The spans are
+    /// sorted in ascending order.
+    multi_line: Vec<ast::Span>,
+}
+
+impl<'p> Spans<'p> {
+    /// Build a sequence of spans from a formatter.
+    fn from_formatter<'e, E: fmt::Display>(
+        fmter: &'p Formatter<'e, E>,
+    ) -> Spans<'p> {
+        let mut line_count = fmter.pattern.lines().count();
+        // If the pattern ends with a `\n` literal, then our line count is
+        // off by one, since a span can occur immediately after the last `\n`,
+        // which is consider to be an additional line.
+        if fmter.pattern.ends_with('\n') {
+            line_count += 1;
+        }
+        let line_number_width =
+            if line_count <= 1 {
+                0
+            } else {
+                line_count.to_string().len()
+            };
+        let mut spans = Spans {
+            pattern: &fmter.pattern,
+            line_number_width: line_number_width,
+            by_line: vec![vec![]; line_count],
+            multi_line: vec![],
+        };
+        spans.add(fmter.span.clone());
+        if let Some(span) = fmter.aux_span {
+            spans.add(span.clone());
+        }
+        spans
+    }
+
+    /// Add the given span to this sequence, putting it in the right place.
+    fn add(&mut self, span: ast::Span) {
+        // This is grossly inefficient since we sort after each add, but right
+        // now, we only ever add two spans at most.
+        if span.is_one_line() {
+            let i = span.start.line - 1; // because lines are 1-indexed
+            self.by_line[i].push(span);
+            self.by_line[i].sort();
+        } else {
+            self.multi_line.push(span);
+            self.multi_line.sort();
+        }
+    }
+
+    /// Notate the pattern string with carents (`^`) pointing at each span
+    /// location. This only applies to spans that occur within a single line.
+    fn notate(&self) -> String {
+        let mut notated = String::new();
+        for (i, line) in self.pattern.lines().enumerate() {
+            if self.line_number_width > 0 {
+                notated.push_str(&self.left_pad_line_number(i + 1));
+                notated.push_str(": ");
+            } else {
+                notated.push_str("    ");
+            }
+            notated.push_str(line);
+            notated.push('\n');
+            if let Some(notes) = self.notate_line(i) {
+                notated.push_str(&notes);
+                notated.push('\n');
+            }
+        }
+        notated
+    }
+
+    /// Return notes for the line indexed at `i` (zero-based). If there are no
+    /// spans for the given line, then `None` is returned. Otherwise, an
+    /// appropriately space padded string with correctly positioned `^` is
+    /// returned, accounting for line numbers.
+    fn notate_line(&self, i: usize) -> Option<String> {
+        let spans = &self.by_line[i];
+        if spans.is_empty() {
+            return None;
+        }
+        let mut notes = String::new();
+        for _ in 0..self.line_number_padding() {
+            notes.push(' ');
+        }
+        let mut pos = 0;
+        for span in spans {
+            for _ in pos..(span.start.column - 1) {
+                notes.push(' ');
+                pos += 1;
+            }
+            let note_len = span.end.column.saturating_sub(span.start.column);
+            for _ in 0..cmp::max(1, note_len) {
+                notes.push('^');
+                pos += 1;
+            }
+        }
+        Some(notes)
+    }
+
+    /// Left pad the given line number with spaces such that it is aligned with
+    /// other line numbers.
+    fn left_pad_line_number(&self, n: usize) -> String {
+        let n = n.to_string();
+        let pad = self.line_number_width.checked_sub(n.len()).unwrap();
+        let mut result = repeat_char(' ', pad);
+        result.push_str(&n);
+        result
+    }
+
+    /// Return the line number padding beginning at the start of each line of
+    /// the pattern.
+    ///
+    /// If the pattern is only one line, then this returns a fixed padding
+    /// for visual indentation.
+    fn line_number_padding(&self) -> usize {
+        if self.line_number_width == 0 {
+            4
+        } else {
+            2 + self.line_number_width
+        }
+    }
+}
+
+fn repeat_char(c: char, count: usize) -> String {
+    ::std::iter::repeat(c).take(count).collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use ast::parse::Parser;
+
+    // See: https://github.com/rust-lang/regex/issues/464
+    #[test]
+    fn regression_464() {
+        let err = Parser::new().parse("a{\n").unwrap_err();
+        // This test checks that the error formatter doesn't panic.
+        assert!(!err.to_string().is_empty());
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/hir/interval.rs
@@ -0,0 +1,490 @@
+use std::char;
+use std::cmp;
+use std::fmt::Debug;
+use std::slice;
+use std::u8;
+
+// This module contains an *internal* implementation of interval sets.
+//
+// The primary invariant that interval sets guards is canonical ordering. That
+// is, every interval set contains an ordered sequence of intervals where
+// no two intervals are overlapping or adjacent. While this invariant is
+// ocassionally broken within the implementation, it should be impossible for
+// callers to observe it.
+//
+// Since case folding (as implemented below) breaks that invariant, we roll
+// that into this API even though it is a little out of place in an otherwise
+// generic interval set.
+//
+// Some of the implementation complexity here is a result of me wanting to
+// preserve the sequential representation without using additional memory.
+// In many cases, we do use linear extra memory, but it is at most 2x and it
+// is amortized. If we relaxed the memory requirements, this implementation
+// could become much simpler. The extra memory is honestly probably OK, but
+// character classes (especially of the Unicode variety) can become quite
+// large, and it would be nice to keep regex compilation snappy even in debug
+// builds. (In the past, I have been careless with this area of code and it has
+// caused slow regex compilations in debug mode, so this isn't entirely
+// unwarranted.)
+//
+// Tests on this are relegated to the public API of HIR in src/hir.rs.
+
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct IntervalSet<I> {
+    ranges: Vec<I>,
+}
+
+impl<I: Interval> IntervalSet<I> {
+    /// Create a new set from a sequence of intervals. Each interval is
+    /// specified as a pair of bounds, where both bounds are inclusive.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<T: IntoIterator<Item=I>>(intervals: T) -> IntervalSet<I> {
+        let mut set = IntervalSet { ranges: intervals.into_iter().collect() };
+        set.canonicalize();
+        set
+    }
+
+    /// Add a new interval to this set.
+    pub fn push(&mut self, interval: I) {
+        // TODO: This could be faster. e.g., Push the interval such that
+        // it preserves canonicalization.
+        self.ranges.push(interval);
+        self.canonicalize();
+    }
+
+    /// Return an iterator over all intervals in this set.
+    ///
+    /// The iterator yields intervals in ascending order.
+    pub fn iter(&self) -> IntervalSetIter<I> {
+        IntervalSetIter(self.ranges.iter())
+    }
+
+    /// Return an immutable slice of intervals in this set.
+    ///
+    /// The sequence returned is in canonical ordering.
+    pub fn intervals(&self) -> &[I] {
+        &self.ranges
+    }
+
+    /// Expand this interval set such that it contains all case folded
+    /// characters. For example, if this class consists of the range `a-z`,
+    /// then applying case folding will result in the class containing both the
+    /// ranges `a-z` and `A-Z`.
+    pub fn case_fold_simple(&mut self) {
+        let len = self.ranges.len();
+        for i in 0..len {
+            let range = self.ranges[i];
+            range.case_fold_simple(&mut self.ranges);
+        }
+        self.canonicalize();
+    }
+
+    /// Union this set with the given set, in place.
+    pub fn union(&mut self, other: &IntervalSet<I>) {
+        // This could almost certainly be done more efficiently.
+        self.ranges.extend(&other.ranges);
+        self.canonicalize();
+    }
+
+    /// Intersect this set with the given set, in place.
+    pub fn intersect(&mut self, other: &IntervalSet<I>) {
+        if self.ranges.is_empty() {
+            return;
+        }
+        if other.ranges.is_empty() {
+            self.ranges.clear();
+            return;
+        }
+
+        // There should be a way to do this in-place with constant memory,
+        // but I couldn't figure out a simple way to do it. So just append
+        // the intersection to the end of this range, and then drain it before
+        // we're done.
+        let drain_end = self.ranges.len();
+
+        let mut ita = (0..drain_end).into_iter();
+        let mut itb = (0..other.ranges.len()).into_iter();
+        let mut a = ita.next().unwrap();
+        let mut b = itb.next().unwrap();
+        loop {
+            if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) {
+                self.ranges.push(ab);
+            }
+            let (it, aorb) =
+                if self.ranges[a].upper() < other.ranges[b].upper() {
+                    (&mut ita, &mut a)
+                } else {
+                    (&mut itb, &mut b)
+                };
+            match it.next() {
+                Some(v) => *aorb = v,
+                None => break,
+            }
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Subtract the given set from this set, in place.
+    pub fn difference(&mut self, other: &IntervalSet<I>) {
+        if self.ranges.is_empty() || other.ranges.is_empty() {
+            return;
+        }
+
+        // This algorithm is (to me) surprisingly complex. A search of the
+        // interwebs indicate that this is a potentially interesting problem.
+        // Folks seem to suggest interval or segment trees, but I'd like to
+        // avoid the overhead (both runtime and conceptual) of that.
+        //
+        // The following is basically my Shitty First Draft. Therefore, in
+        // order to grok it, you probably need to read each line carefully.
+        // Simplifications are most welcome!
+        //
+        // Remember, we can assume the canonical format invariant here, which
+        // says that all ranges are sorted, not overlapping and not adjacent in
+        // each class.
+        let drain_end = self.ranges.len();
+        let (mut a, mut b) = (0, 0);
+    'LOOP:
+        while a < drain_end && b < other.ranges.len() {
+            // Basically, the easy cases are when neither range overlaps with
+            // each other. If the `b` range is less than our current `a`
+            // range, then we can skip it and move on.
+            if other.ranges[b].upper() < self.ranges[a].lower() {
+                b += 1;
+                continue;
+            }
+            // ... similarly for the `a` range. If it's less than the smallest
+            // `b` range, then we can add it as-is.
+            if self.ranges[a].upper() < other.ranges[b].lower() {
+                let range = self.ranges[a];
+                self.ranges.push(range);
+                a += 1;
+                continue;
+            }
+            // Otherwise, we have overlapping ranges.
+            assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b]));
+
+            // This part is tricky and was non-obvious to me without looking
+            // at explicit examples (see the tests). The trickiness stems from
+            // two things: 1) subtracting a range from another range could
+            // yield two ranges and 2) after subtracting a range, it's possible
+            // that future ranges can have an impact. The loop below advances
+            // the `b` ranges until they can't possible impact the current
+            // range.
+            //
+            // For example, if our `a` range is `a-t` and our next three `b`
+            // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply
+            // subtraction three times before moving on to the next `a` range.
+            let mut range = self.ranges[a];
+            while b < other.ranges.len()
+                && !range.is_intersection_empty(&other.ranges[b])
+            {
+                let old_range = range;
+                range = match range.difference(&other.ranges[b]) {
+                    (None, None) => {
+                        // We lost the entire range, so move on to the next
+                        // without adding this one.
+                        a += 1;
+                        continue 'LOOP;
+                    }
+                    (Some(range1), None) | (None, Some(range1)) => range1,
+                    (Some(range1), Some(range2)) => {
+                        self.ranges.push(range1);
+                        range2
+                    }
+                };
+                // It's possible that the `b` range has more to contribute
+                // here. In particular, if it is greater than the original
+                // range, then it might impact the next `a` range *and* it
+                // has impacted the current `a` range as much as possible,
+                // so we can quit. We don't bump `b` so that the next `a`
+                // range can apply it.
+                if other.ranges[b].upper() > old_range.upper() {
+                    break;
+                }
+                // Otherwise, the next `b` range might apply to the current
+                // `a` range.
+                b += 1;
+            }
+            self.ranges.push(range);
+            a += 1;
+        }
+        while a < drain_end {
+            let range = self.ranges[a];
+            self.ranges.push(range);
+            a += 1;
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Compute the symmetric difference of the two sets, in place.
+    ///
+    /// This computes the symmetric difference of two interval sets. This
+    /// removes all elements in this set that are also in the given set,
+    /// but also adds all elements from the given set that aren't in this
+    /// set. That is, the set will contain all elements in either set,
+    /// but will not contain any elements that are in both sets.
+    pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) {
+        // TODO(burntsushi): Fix this so that it amortizes allocation.
+        let mut intersection = self.clone();
+        intersection.intersect(other);
+        self.union(other);
+        self.difference(&intersection);
+    }
+
+    /// Negate this interval set.
+    ///
+    /// For all `x` where `x` is any element, if `x` was in this set, then it
+    /// will not be in this set after negation.
+    pub fn negate(&mut self) {
+        if self.ranges.is_empty() {
+            let (min, max) = (I::Bound::min_value(), I::Bound::max_value());
+            self.ranges.push(I::create(min, max));
+            return;
+        }
+
+        // There should be a way to do this in-place with constant memory,
+        // but I couldn't figure out a simple way to do it. So just append
+        // the negation to the end of this range, and then drain it before
+        // we're done.
+        let drain_end = self.ranges.len();
+
+        // We do checked arithmetic below because of the canonical ordering
+        // invariant.
+        if self.ranges[0].lower() > I::Bound::min_value() {
+            let upper = self.ranges[0].lower().decrement();
+            self.ranges.push(I::create(I::Bound::min_value(), upper));
+        }
+        for i in 1..drain_end {
+            let lower = self.ranges[i - 1].upper().increment();
+            let upper = self.ranges[i].lower().decrement();
+            self.ranges.push(I::create(lower, upper));
+        }
+        if self.ranges[drain_end - 1].upper() < I::Bound::max_value() {
+            let lower = self.ranges[drain_end - 1].upper().increment();
+            self.ranges.push(I::create(lower, I::Bound::max_value()));
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Converts this set into a canonical ordering.
+    fn canonicalize(&mut self) {
+        if self.is_canonical() {
+            return;
+        }
+        self.ranges.sort();
+        assert!(!self.ranges.is_empty());
+
+        // Is there a way to do this in-place with constant memory? I couldn't
+        // figure out a way to do it. So just append the canonicalization to
+        // the end of this range, and then drain it before we're done.
+        let drain_end = self.ranges.len();
+        for oldi in 0..drain_end {
+            // If we've added at least one new range, then check if we can
+            // merge this range in the previously added range.
+            if self.ranges.len() > drain_end {
+                let (last, rest) = self.ranges.split_last_mut().unwrap();
+                if let Some(union) = last.union(&rest[oldi]) {
+                    *last = union;
+                    continue;
+                }
+            }
+            let range = self.ranges[oldi];
+            self.ranges.push(range);
+        }
+        self.ranges.drain(..drain_end);
+    }
+
+    /// Returns true if and only if this class is in a canonical ordering.
+    fn is_canonical(&self) -> bool {
+        for pair in self.ranges.windows(2) {
+            if pair[0] >= pair[1] {
+                return false;
+            }
+            if pair[0].is_contiguous(&pair[1]) {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+/// An iterator over intervals.
+#[derive(Debug)]
+pub struct IntervalSetIter<'a, I: 'a>(slice::Iter<'a, I>);
+
+impl<'a, I> Iterator for IntervalSetIter<'a, I> {
+    type Item = &'a I;
+
+    fn next(&mut self) -> Option<&'a I> {
+        self.0.next()
+    }
+}
+
+pub trait Interval:
+    Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord
+{
+    type Bound: Bound;
+
+    fn lower(&self) -> Self::Bound;
+    fn upper(&self) -> Self::Bound;
+    fn set_lower(&mut self, bound: Self::Bound);
+    fn set_upper(&mut self, bound: Self::Bound);
+    fn case_fold_simple(&self, intervals: &mut Vec<Self>);
+
+    /// Create a new interval.
+    fn create(lower: Self::Bound, upper: Self::Bound) -> Self {
+        let mut int = Self::default();
+        if lower <= upper {
+            int.set_lower(lower);
+            int.set_upper(upper);
+        } else {
+            int.set_lower(upper);
+            int.set_upper(lower);
+        }
+        int
+    }
+
+    /// Union the given overlapping range into this range.
+    ///
+    /// If the two ranges aren't contiguous, then this returns `None`.
+    fn union(&self, other: &Self) -> Option<Self> {
+        if !self.is_contiguous(other) {
+            return None;
+        }
+        let lower = cmp::min(self.lower(), other.lower());
+        let upper = cmp::max(self.upper(), other.upper());
+        Some(Self::create(lower, upper))
+    }
+
+    /// Intersect this range with the given range and return the result.
+    ///
+    /// If the intersection is empty, then this returns `None`.
+    fn intersect(&self, other: &Self) -> Option<Self> {
+        let lower = cmp::max(self.lower(), other.lower());
+        let upper = cmp::min(self.upper(), other.upper());
+        if lower <= upper {
+            Some(Self::create(lower, upper))
+        } else {
+            None
+        }
+    }
+
+    /// Subtract the given range from this range and return the resulting
+    /// ranges.
+    ///
+    /// If subtraction would result in an empty range, then no ranges are
+    /// returned.
+    fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) {
+        if self.is_subset(other) {
+            return (None, None);
+        }
+        if self.is_intersection_empty(other) {
+            return (Some(self.clone()), None);
+        }
+        let add_lower = other.lower() > self.lower();
+        let add_upper = other.upper() < self.upper();
+        // We know this because !self.is_subset(other) and the ranges have
+        // a non-empty intersection.
+        assert!(add_lower || add_upper);
+        let mut ret = (None, None);
+        if add_lower {
+            let upper = other.lower().decrement();
+            ret.0 = Some(Self::create(self.lower(), upper));
+        }
+        if add_upper {
+            let lower = other.upper().increment();
+            let range = Self::create(lower, self.upper());
+            if ret.0.is_none() {
+                ret.0 = Some(range);
+            } else {
+                ret.1 = Some(range);
+            }
+        }
+        ret
+    }
+
+    /// Compute the symmetric difference the given range from this range. This
+    /// returns the union of the two ranges minus its intersection.
+    fn symmetric_difference(
+        &self,
+        other: &Self,
+    ) -> (Option<Self>, Option<Self>) {
+        let union = match self.union(other) {
+            None => return (Some(self.clone()), Some(other.clone())),
+            Some(union) => union,
+        };
+        let intersection = match self.intersect(other) {
+            None => return (Some(self.clone()), Some(other.clone())),
+            Some(intersection) => intersection,
+        };
+        union.difference(&intersection)
+    }
+
+    /// Returns true if and only if the two ranges are contiguous. Two ranges
+    /// are contiguous if and only if the ranges are either overlapping or
+    /// adjacent.
+    fn is_contiguous(&self, other: &Self) -> bool {
+        let lower1 = self.lower().as_u32();
+        let upper1 = self.upper().as_u32();
+        let lower2 = other.lower().as_u32();
+        let upper2 = other.upper().as_u32();
+        cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1)
+    }
+
+    /// Returns true if and only if the intersection of this range and the
+    /// other range is empty.
+    fn is_intersection_empty(&self, other: &Self) -> bool {
+        let (lower1, upper1) = (self.lower(), self.upper());
+        let (lower2, upper2) = (other.lower(), other.upper());
+        cmp::max(lower1, lower2) > cmp::min(upper1, upper2)
+    }
+
+    /// Returns true if and only if this range is a subset of the other range.
+    fn is_subset(&self, other: &Self) -> bool {
+        let (lower1, upper1) = (self.lower(), self.upper());
+        let (lower2, upper2) = (other.lower(), other.upper());
+        (lower2 <= lower1 && lower1 <= upper2)
+        && (lower2 <= upper1 && upper1 <= upper2)
+    }
+}
+
+pub trait Bound: Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord {
+    fn min_value() -> Self;
+    fn max_value() -> Self;
+    fn as_u32(self) -> u32;
+    fn increment(self) -> Self;
+    fn decrement(self) -> Self;
+}
+
+impl Bound for u8 {
+    fn min_value() -> Self { u8::MIN }
+    fn max_value() -> Self { u8::MAX }
+    fn as_u32(self) -> u32 { self as u32 }
+    fn increment(self) -> Self { self.checked_add(1).unwrap() }
+    fn decrement(self) -> Self { self.checked_sub(1).unwrap() }
+}
+
+impl Bound for char {
+    fn min_value() -> Self { '\x00' }
+    fn max_value() -> Self { '\u{10FFFF}' }
+    fn as_u32(self) -> u32 { self as u32 }
+
+    fn increment(self) -> Self {
+        match self {
+            '\u{D7FF}' => '\u{E000}',
+            c => char::from_u32((c as u32).checked_add(1).unwrap()).unwrap(),
+        }
+    }
+
+    fn decrement(self) -> Self {
+        match self {
+            '\u{E000}' => '\u{D7FF}',
+            c => char::from_u32((c as u32).checked_sub(1).unwrap()).unwrap(),
+        }
+    }
+}
+
+// Tests for interval sets are written in src/hir.rs against the public API.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/hir/literal/mod.rs
@@ -0,0 +1,1551 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+Provides routines for extracting literal prefixes and suffixes from an `Hir`.
+*/
+
+use std::cmp;
+use std::fmt;
+use std::iter;
+use std::mem;
+use std::ops;
+
+use hir::{self, Hir, HirKind};
+
+/// A set of literal byte strings extracted from a regular expression.
+///
+/// Every member of the set is a `Literal`, which is represented by a
+/// `Vec<u8>`. (Notably, it may contain invalid UTF-8.) Every member is
+/// said to be either *complete* or *cut*. A complete literal means that
+/// it extends until the beginning (or end) of the regular expression. In
+/// some circumstances, this can be used to indicate a match in the regular
+/// expression.
+///
+/// A key aspect of literal extraction is knowing when to stop. It is not
+/// feasible to blindly extract all literals from a regular expression, even if
+/// there are finitely many. For example, the regular expression `[0-9]{10}`
+/// has `10^10` distinct literals. For this reason, literal extraction is
+/// bounded to some low number by default using heuristics, but the limits can
+/// be tweaked.
+///
+/// **WARNING**: Literal extraction uses stack space proportional to the size
+/// of the `Hir` expression. At some point, this drawback will be eliminated.
+/// To protect yourself, set a reasonable
+/// [`nest_limit` on your `Parser`](../../struct.ParserBuilder.html#method.nest_limit).
+/// This is done for you by default.
+#[derive(Clone, Eq, PartialEq)]
+pub struct Literals {
+    lits: Vec<Literal>,
+    limit_size: usize,
+    limit_class: usize,
+}
+
+/// A single member of a set of literals extracted from a regular expression.
+///
+/// This type has `Deref` and `DerefMut` impls to `Vec<u8>` so that all slice
+/// and `Vec` operations are available.
+#[derive(Clone, Eq, Ord)]
+pub struct Literal {
+    v: Vec<u8>,
+    cut: bool,
+}
+
+impl Literals {
+    /// Returns a new empty set of literals using default limits.
+    pub fn empty() -> Literals {
+        Literals {
+            lits: vec![],
+            limit_size: 250,
+            limit_class: 10,
+        }
+    }
+
+    /// Returns a set of literal prefixes extracted from the given `Hir`.
+    pub fn prefixes(expr: &Hir) -> Literals {
+        let mut lits = Literals::empty();
+        lits.union_prefixes(expr);
+        lits
+    }
+
+    /// Returns a set of literal suffixes extracted from the given `Hir`.
+    pub fn suffixes(expr: &Hir) -> Literals {
+        let mut lits = Literals::empty();
+        lits.union_suffixes(expr);
+        lits
+    }
+
+    /// Get the approximate size limit (in bytes) of this set.
+    pub fn limit_size(&self) -> usize {
+        self.limit_size
+    }
+
+    /// Set the approximate size limit (in bytes) of this set.
+    ///
+    /// If extracting a literal would put the set over this limit, then
+    /// extraction stops.
+    ///
+    /// The new limits will only apply to additions to this set. Existing
+    /// members remain unchanged, even if the set exceeds the new limit.
+    pub fn set_limit_size(&mut self, size: usize) -> &mut Literals {
+        self.limit_size = size;
+        self
+    }
+
+    /// Get the character class size limit for this set.
+    pub fn limit_class(&self) -> usize {
+        self.limit_class
+    }
+
+    /// Limits the size of character(or byte) classes considered.
+    ///
+    /// A value of `0` prevents all character classes from being considered.
+    ///
+    /// This limit also applies to case insensitive literals, since each
+    /// character in the case insensitive literal is converted to a class, and
+    /// then case folded.
+    ///
+    /// The new limits will only apply to additions to this set. Existing
+    /// members remain unchanged, even if the set exceeds the new limit.
+    pub fn set_limit_class(&mut self, size: usize) -> &mut Literals {
+        self.limit_class = size;
+        self
+    }
+
+    /// Returns the set of literals as a slice. Its order is unspecified.
+    pub fn literals(&self) -> &[Literal] {
+        &self.lits
+    }
+
+    /// Returns the length of the smallest literal.
+    ///
+    /// Returns None is there are no literals in the set.
+    pub fn min_len(&self) -> Option<usize> {
+        let mut min = None;
+        for lit in &self.lits {
+            match min {
+                None => min = Some(lit.len()),
+                Some(m) if lit.len() < m => min = Some(lit.len()),
+                _ => {}
+            }
+        }
+        min
+    }
+
+    /// Returns true if all members in this set are complete.
+    pub fn all_complete(&self) -> bool {
+        !self.lits.is_empty() && self.lits.iter().all(|l| !l.is_cut())
+    }
+
+    /// Returns true if any member in this set is complete.
+    pub fn any_complete(&self) -> bool {
+        self.lits.iter().any(|lit| !lit.is_cut())
+    }
+
+    /// Returns true if this set contains an empty literal.
+    pub fn contains_empty(&self) -> bool {
+        self.lits.iter().any(|lit| lit.is_empty())
+    }
+
+    /// Returns true if this set is empty or if all of its members is empty.
+    pub fn is_empty(&self) -> bool {
+        self.lits.is_empty() || self.lits.iter().all(|lit| lit.is_empty())
+    }
+
+    /// Returns a new empty set of literals using this set's limits.
+    pub fn to_empty(&self) -> Literals {
+        let mut lits = Literals::empty();
+        lits.set_limit_size(self.limit_size)
+            .set_limit_class(self.limit_class);
+        lits
+    }
+
+    /// Returns the longest common prefix of all members in this set.
+    pub fn longest_common_prefix(&self) -> &[u8] {
+        if self.is_empty() {
+            return &[];
+        }
+        let lit0 = &*self.lits[0];
+        let mut len = lit0.len();
+        for lit in &self.lits[1..] {
+            len = cmp::min(
+                len,
+                lit.iter()
+                   .zip(lit0)
+                   .take_while(|&(a, b)| a == b)
+                   .count());
+        }
+        &self.lits[0][..len]
+    }
+
+    /// Returns the longest common suffix of all members in this set.
+    pub fn longest_common_suffix(&self) -> &[u8] {
+        if self.is_empty() {
+            return &[];
+        }
+        let lit0 = &*self.lits[0];
+        let mut len = lit0.len();
+        for lit in &self.lits[1..] {
+            len = cmp::min(
+                len,
+                lit.iter()
+                   .rev()
+                   .zip(lit0.iter().rev())
+                   .take_while(|&(a, b)| a == b)
+                   .count());
+        }
+        &self.lits[0][self.lits[0].len() - len..]
+    }
+
+    /// Returns a new set of literals with the given number of bytes trimmed
+    /// from the suffix of each literal.
+    ///
+    /// If any literal would be cut out completely by trimming, then None is
+    /// returned.
+    ///
+    /// Any duplicates that are created as a result of this transformation are
+    /// removed.
+    pub fn trim_suffix(&self, num_bytes: usize) -> Option<Literals> {
+        if self.min_len().map(|len| len <= num_bytes).unwrap_or(true) {
+            return None;
+        }
+        let mut new = self.to_empty();
+        for mut lit in self.lits.iter().cloned() {
+            let new_len = lit.len() - num_bytes;
+            lit.truncate(new_len);
+            lit.cut();
+            new.lits.push(lit);
+        }
+        new.lits.sort();
+        new.lits.dedup();
+        Some(new)
+    }
+
+    /// Returns a new set of prefixes of this set of literals that are
+    /// guaranteed to be unambiguous.
+    ///
+    /// Any substring match with a member of the set is returned is guaranteed
+    /// to never overlap with a substring match of another member of the set
+    /// at the same starting position.
+    ///
+    /// Given any two members of the returned set, neither is a substring of
+    /// the other.
+    pub fn unambiguous_prefixes(&self) -> Literals {
+        if self.lits.is_empty() {
+            return self.to_empty();
+        }
+        let mut old: Vec<Literal> = self.lits.iter().cloned().collect();
+        let mut new = self.to_empty();
+    'OUTER:
+        while let Some(mut candidate) = old.pop() {
+            if candidate.is_empty() {
+                continue;
+            }
+            if new.lits.is_empty() {
+                new.lits.push(candidate);
+                continue;
+            }
+            for lit2 in &mut new.lits {
+                if lit2.is_empty() {
+                    continue;
+                }
+                if &candidate == lit2 {
+                    // If the literal is already in the set, then we can
+                    // just drop it. But make sure that cut literals are
+                    // infectious!
+                    candidate.cut = candidate.cut || lit2.cut;
+                    lit2.cut = candidate.cut;
+                    continue 'OUTER;
+                }
+                if candidate.len() < lit2.len() {
+                    if let Some(i) = position(&candidate, &lit2) {
+                        candidate.cut();
+                        let mut lit3 = lit2.clone();
+                        lit3.truncate(i);
+                        lit3.cut();
+                        old.push(lit3);
+                        lit2.clear();
+                    }
+                } else {
+                    if let Some(i) = position(&lit2, &candidate) {
+                        lit2.cut();
+                        let mut new_candidate = candidate.clone();
+                        new_candidate.truncate(i);
+                        new_candidate.cut();
+                        old.push(new_candidate);
+                        candidate.clear();
+                    }
+                }
+                // Oops, the candidate is already represented in the set.
+                if candidate.is_empty() {
+                    continue 'OUTER;
+                }
+            }
+            new.lits.push(candidate);
+        }
+        new.lits.retain(|lit| !lit.is_empty());
+        new.lits.sort();
+        new.lits.dedup();
+        new
+    }
+
+    /// Returns a new set of suffixes of this set of literals that are
+    /// guaranteed to be unambiguous.
+    ///
+    /// Any substring match with a member of the set is returned is guaranteed
+    /// to never overlap with a substring match of another member of the set
+    /// at the same ending position.
+    ///
+    /// Given any two members of the returned set, neither is a substring of
+    /// the other.
+    pub fn unambiguous_suffixes(&self) -> Literals {
+        // This is a touch wasteful...
+        let mut lits = self.clone();
+        lits.reverse();
+        let mut unamb = lits.unambiguous_prefixes();
+        unamb.reverse();
+        unamb
+    }
+
+    /// Unions the prefixes from the given expression to this set.
+    ///
+    /// If prefixes could not be added (for example, this set would exceed its
+    /// size limits or the set of prefixes from `expr` includes the empty
+    /// string), then false is returned.
+    ///
+    /// Note that prefix literals extracted from `expr` are said to be complete
+    /// if and only if the literal extends from the beginning of `expr` to the
+    /// end of `expr`.
+    pub fn union_prefixes(&mut self, expr: &Hir) -> bool {
+        let mut lits = self.to_empty();
+        prefixes(expr, &mut lits);
+        !lits.is_empty() && !lits.contains_empty() && self.union(lits)
+    }
+
+    /// Unions the suffixes from the given expression to this set.
+    ///
+    /// If suffixes could not be added (for example, this set would exceed its
+    /// size limits or the set of suffixes from `expr` includes the empty
+    /// string), then false is returned.
+    ///
+    /// Note that prefix literals extracted from `expr` are said to be complete
+    /// if and only if the literal extends from the end of `expr` to the
+    /// beginning of `expr`.
+    pub fn union_suffixes(&mut self, expr: &Hir) -> bool {
+        let mut lits = self.to_empty();
+        suffixes(expr, &mut lits);
+        lits.reverse();
+        !lits.is_empty() && !lits.contains_empty() && self.union(lits)
+    }
+
+    /// Unions this set with another set.
+    ///
+    /// If the union would cause the set to exceed its limits, then the union
+    /// is skipped and it returns false. Otherwise, if the union succeeds, it
+    /// returns true.
+    pub fn union(&mut self, lits: Literals) -> bool {
+        if self.num_bytes() + lits.num_bytes() > self.limit_size {
+            return false;
+        }
+        if lits.is_empty() {
+            self.lits.push(Literal::empty());
+        } else {
+            self.lits.extend(lits.lits);
+        }
+        true
+    }
+
+    /// Extends this set with another set.
+    ///
+    /// The set of literals is extended via a cross product.
+    ///
+    /// If a cross product would cause this set to exceed its limits, then the
+    /// cross product is skipped and it returns false. Otherwise, if the cross
+    /// product succeeds, it returns true.
+    pub fn cross_product(&mut self, lits: &Literals) -> bool {
+        if lits.is_empty() {
+            return true;
+        }
+        // Check that we make sure we stay in our limits.
+        let mut size_after;
+        if self.is_empty() || !self.any_complete() {
+            size_after = self.num_bytes();
+            for lits_lit in lits.literals() {
+                size_after += lits_lit.len();
+            }
+        } else {
+            size_after = self.lits.iter().fold(0, |accum, lit| {
+                accum + if lit.is_cut() { lit.len() } else { 0 }
+            });
+            for lits_lit in lits.literals() {
+                for self_lit in self.literals() {
+                    if !self_lit.is_cut() {
+                        size_after += self_lit.len() + lits_lit.len();
+                    }
+                }
+            }
+        }
+        if size_after > self.limit_size {
+            return false;
+        }
+
+        let mut base = self.remove_complete();
+        if base.is_empty() {
+            base = vec![Literal::empty()];
+        }
+        for lits_lit in lits.literals() {
+            for mut self_lit in base.clone() {
+                self_lit.extend(&**lits_lit);
+                self_lit.cut = lits_lit.cut;
+                self.lits.push(self_lit);
+            }
+        }
+        true
+    }
+
+    /// Extends each literal in this set with the bytes given.
+    ///
+    /// If the set is empty, then the given literal is added to the set.
+    ///
+    /// If adding any number of bytes to all members of this set causes a limit
+    /// to be exceeded, then no bytes are added and false is returned. If a
+    /// prefix of `bytes` can be fit into this set, then it is used and all
+    /// resulting literals are cut.
+    pub fn cross_add(&mut self, bytes: &[u8]) -> bool {
+        // N.B. This could be implemented by simply calling cross_product with
+        // a literal set containing just `bytes`, but we can be smarter about
+        // taking shorter prefixes of `bytes` if they'll fit.
+        if bytes.is_empty() {
+            return true;
+        }
+        if self.lits.is_empty() {
+            let i = cmp::min(self.limit_size, bytes.len());
+            self.lits.push(Literal::new(bytes[..i].to_owned()));
+            self.lits[0].cut = i < bytes.len();
+            return !self.lits[0].is_cut();
+        }
+        let size = self.num_bytes();
+        if size + self.lits.len() >= self.limit_size {
+            return false;
+        }
+        let mut i = 1;
+        while size + (i * self.lits.len()) <= self.limit_size
+            && i < bytes.len() {
+            i += 1;
+        }
+        for lit in &mut self.lits {
+            if !lit.is_cut() {
+                lit.extend(&bytes[..i]);
+                if i < bytes.len() {
+                    lit.cut();
+                }
+            }
+        }
+        true
+    }
+
+    /// Adds the given literal to this set.
+    ///
+    /// Returns false if adding this literal would cause the class to be too
+    /// big.
+    pub fn add(&mut self, lit: Literal) -> bool {
+        if self.num_bytes() + lit.len() > self.limit_size {
+            return false;
+        }
+        self.lits.push(lit);
+        true
+    }
+
+    /// Extends each literal in this set with the character class given.
+    ///
+    /// Returns false if the character class was too big to add.
+    pub fn add_char_class(&mut self, cls: &hir::ClassUnicode) -> bool {
+        self._add_char_class(cls, false)
+    }
+
+    /// Extends each literal in this set with the character class given,
+    /// writing the bytes of each character in reverse.
+    ///
+    /// Returns false if the character class was too big to add.
+    fn add_char_class_reverse(&mut self, cls: &hir::ClassUnicode) -> bool {
+        self._add_char_class(cls, true)
+    }
+
+    fn _add_char_class(
+        &mut self,
+        cls: &hir::ClassUnicode,
+        reverse: bool,
+    ) -> bool {
+        use std::char;
+
+        if self.class_exceeds_limits(cls_char_count(cls)) {
+            return false;
+        }
+        let mut base = self.remove_complete();
+        if base.is_empty() {
+            base = vec![Literal::empty()];
+        }
+        for r in cls.iter() {
+            let (s, e) = (r.start as u32, r.end as u32 + 1);
+            for c in (s..e).filter_map(char::from_u32) {
+                for mut lit in base.clone() {
+                    let mut bytes = c.to_string().into_bytes();
+                    if reverse {
+                        bytes.reverse();
+                    }
+                    lit.extend(&bytes);
+                    self.lits.push(lit);
+                }
+            }
+        }
+        true
+    }
+
+    /// Extends each literal in this set with the byte class given.
+    ///
+    /// Returns false if the byte class was too big to add.
+    pub fn add_byte_class(&mut self, cls: &hir::ClassBytes) -> bool {
+        if self.class_exceeds_limits(cls_byte_count(cls)) {
+            return false;
+        }
+        let mut base = self.remove_complete();
+        if base.is_empty() {
+            base = vec![Literal::empty()];
+        }
+        for r in cls.iter() {
+            let (s, e) = (r.start as u32, r.end as u32 + 1);
+            for b in (s..e).map(|b| b as u8) {
+                for mut lit in base.clone() {
+                    lit.push(b);
+                    self.lits.push(lit);
+                }
+            }
+        }
+        true
+    }
+
+    /// Cuts every member of this set. When a member is cut, it can never
+    /// be extended.
+    pub fn cut(&mut self) {
+        for lit in &mut self.lits {
+            lit.cut();
+        }
+    }
+
+    /// Reverses all members in place.
+    pub fn reverse(&mut self) {
+        for lit in &mut self.lits {
+            lit.reverse();
+        }
+    }
+
+    /// Clears this set of all members.
+    pub fn clear(&mut self) {
+        self.lits.clear();
+    }
+
+    /// Pops all complete literals out of this set.
+    fn remove_complete(&mut self) -> Vec<Literal> {
+        let mut base = vec![];
+        for lit in mem::replace(&mut self.lits, vec![]) {
+            if lit.is_cut() {
+                self.lits.push(lit);
+            } else {
+                base.push(lit);
+            }
+        }
+        base
+    }
+
+    /// Returns the total number of bytes in this set.
+    fn num_bytes(&self) -> usize {
+        self.lits.iter().fold(0, |accum, lit| accum + lit.len())
+    }
+
+    /// Returns true if a character class with the given size would cause this
+    /// set to exceed its limits.
+    ///
+    /// The size given should correspond to the number of items in the class.
+    fn class_exceeds_limits(&self, size: usize) -> bool {
+        if size > self.limit_class {
+            return true;
+        }
+        // This is an approximation since codepoints in a char class can encode
+        // to 1-4 bytes.
+        let new_byte_count =
+            if self.lits.is_empty() {
+                size
+            } else {
+                self.lits
+                    .iter()
+                    .fold(0, |accum, lit| {
+                        accum + if lit.is_cut() {
+                            // If the literal is cut, then we'll never add
+                            // anything to it, so don't count it.
+                            0
+                        } else {
+                            (lit.len() + 1) * size
+                        }
+                    })
+            };
+        new_byte_count > self.limit_size
+    }
+}
+
+fn prefixes(expr: &Hir, lits: &mut Literals) {
+    match *expr.kind() {
+        HirKind::Literal(hir::Literal::Unicode(c)) => {
+            let mut buf = [0; 4];
+            lits.cross_add(c.encode_utf8(&mut buf).as_bytes());
+        }
+        HirKind::Literal(hir::Literal::Byte(b)) => {
+            lits.cross_add(&[b]);
+        }
+        HirKind::Class(hir::Class::Unicode(ref cls)) => {
+            if !lits.add_char_class(cls) {
+                lits.cut();
+            }
+        }
+        HirKind::Class(hir::Class::Bytes(ref cls)) => {
+            if !lits.add_byte_class(cls) {
+                lits.cut();
+            }
+        }
+        HirKind::Group(hir::Group { ref hir, .. }) => {
+            prefixes(&**hir, lits);
+        }
+        HirKind::Repetition(ref x) => {
+            match x.kind {
+                hir::RepetitionKind::ZeroOrOne => {
+                    repeat_zero_or_one_literals(&x.hir, lits, prefixes);
+                }
+                hir::RepetitionKind::ZeroOrMore => {
+                    repeat_zero_or_more_literals(&x.hir, lits, prefixes);
+                }
+                hir::RepetitionKind::OneOrMore => {
+                    repeat_one_or_more_literals(&x.hir, lits, prefixes);
+                }
+                hir::RepetitionKind::Range(ref rng) => {
+                    let (min, max) = match *rng {
+                        hir::RepetitionRange::Exactly(m) => {
+                            (m, Some(m))
+                        }
+                        hir::RepetitionRange::AtLeast(m) => {
+                            (m, None)
+                        }
+                        hir::RepetitionRange::Bounded(m, n) => {
+                            (m, Some(n))
+                        }
+                    };
+                    repeat_range_literals(
+                        &x.hir, min, max, x.greedy, lits, prefixes)
+                }
+            }
+        }
+        HirKind::Concat(ref es) if es.is_empty() => {}
+        HirKind::Concat(ref es) if es.len() == 1 => prefixes(&es[0], lits),
+        HirKind::Concat(ref es) => {
+            for e in es {
+                if let HirKind::Anchor(hir::Anchor::StartText) = *e.kind() {
+                    if !lits.is_empty() {
+                        lits.cut();
+                        break;
+                    }
+                    lits.add(Literal::empty());
+                    continue;
+                }
+                let mut lits2 = lits.to_empty();
+                prefixes(e, &mut lits2);
+                if !lits.cross_product(&lits2) || !lits2.any_complete() {
+                    // If this expression couldn't yield any literal that
+                    // could be extended, then we need to quit. Since we're
+                    // short-circuiting, we also need to freeze every member.
+                    lits.cut();
+                    break;
+                }
+            }
+        }
+        HirKind::Alternation(ref es) => {
+            alternate_literals(es, lits, prefixes);
+        }
+        _ => lits.cut(),
+    }
+}
+
+fn suffixes(expr: &Hir, lits: &mut Literals) {
+    match *expr.kind() {
+        HirKind::Literal(hir::Literal::Unicode(c)) => {
+            let mut buf = [0u8; 4];
+            let i = c.encode_utf8(&mut buf).len();
+            let mut buf = &mut buf[..i];
+            buf.reverse();
+            lits.cross_add(buf);
+        }
+        HirKind::Literal(hir::Literal::Byte(b)) => {
+            lits.cross_add(&[b]);
+        }
+        HirKind::Class(hir::Class::Unicode(ref cls)) => {
+            if !lits.add_char_class_reverse(cls) {
+                lits.cut();
+            }
+        }
+        HirKind::Class(hir::Class::Bytes(ref cls)) => {
+            if !lits.add_byte_class(cls) {
+                lits.cut();
+            }
+        }
+        HirKind::Group(hir::Group { ref hir, .. }) => {
+            suffixes(&**hir, lits);
+        }
+        HirKind::Repetition(ref x) => {
+            match x.kind {
+                hir::RepetitionKind::ZeroOrOne => {
+                    repeat_zero_or_one_literals(&x.hir, lits, suffixes);
+                }
+                hir::RepetitionKind::ZeroOrMore => {
+                    repeat_zero_or_more_literals(&x.hir, lits, suffixes);
+                }
+                hir::RepetitionKind::OneOrMore => {
+                    repeat_one_or_more_literals(&x.hir, lits, suffixes);
+                }
+                hir::RepetitionKind::Range(ref rng) => {
+                    let (min, max) = match *rng {
+                        hir::RepetitionRange::Exactly(m) => {
+                            (m, Some(m))
+                        }
+                        hir::RepetitionRange::AtLeast(m) => {
+                            (m, None)
+                        }
+                        hir::RepetitionRange::Bounded(m, n) => {
+                            (m, Some(n))
+                        }
+                    };
+                    repeat_range_literals(
+                        &x.hir, min, max, x.greedy, lits, suffixes)
+                }
+            }
+        }
+        HirKind::Concat(ref es) if es.is_empty() => {}
+        HirKind::Concat(ref es) if es.len() == 1 => suffixes(&es[0], lits),
+        HirKind::Concat(ref es) => {
+            for e in es.iter().rev() {
+                if let HirKind::Anchor(hir::Anchor::EndText) = *e.kind() {
+                    if !lits.is_empty() {
+                        lits.cut();
+                        break;
+                    }
+                    lits.add(Literal::empty());
+                    continue;
+                }
+                let mut lits2 = lits.to_empty();
+                suffixes(e, &mut lits2);
+                if !lits.cross_product(&lits2) || !lits2.any_complete() {
+                    // If this expression couldn't yield any literal that
+                    // could be extended, then we need to quit. Since we're
+                    // short-circuiting, we also need to freeze every member.
+                    lits.cut();
+                    break;
+                }
+            }
+        }
+        HirKind::Alternation(ref es) => {
+            alternate_literals(es, lits, suffixes);
+        }
+        _ => lits.cut(),
+    }
+}
+
+fn repeat_zero_or_one_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
+    lits: &mut Literals,
+    mut f: F,
+) {
+    let (mut lits2, mut lits3) = (lits.clone(), lits.to_empty());
+    lits3.set_limit_size(lits.limit_size() / 2);
+    f(e, &mut lits3);
+
+    if lits3.is_empty() || !lits2.cross_product(&lits3) {
+        lits.cut();
+        return;
+    }
+    lits2.add(Literal::empty());
+    if !lits.union(lits2) {
+        lits.cut();
+    }
+}
+
+fn repeat_zero_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
+    lits: &mut Literals,
+    mut f: F,
+) {
+    let (mut lits2, mut lits3) = (lits.clone(), lits.to_empty());
+    lits3.set_limit_size(lits.limit_size() / 2);
+    f(e, &mut lits3);
+
+    if lits3.is_empty() || !lits2.cross_product(&lits3) {
+        lits.cut();
+        return;
+    }
+    lits2.cut();
+    lits2.add(Literal::empty());
+    if !lits.union(lits2) {
+        lits.cut();
+    }
+}
+
+fn repeat_one_or_more_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
+    lits: &mut Literals,
+    mut f: F,
+) {
+    f(e, lits);
+    lits.cut();
+}
+
+fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
+    e: &Hir,
+    min: u32,
+    max: Option<u32>,
+    greedy: bool,
+    lits: &mut Literals,
+    mut f: F,
+) {
+    if min == 0 {
+        // This is a bit conservative. If `max` is set, then we could
+        // treat this as a finite set of alternations. For now, we
+        // just treat it as `e*`.
+        f(&Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrMore,
+            greedy: greedy,
+            hir: Box::new(e.clone()),
+        }), lits);
+    } else {
+        if min > 0 {
+            let n = cmp::min(lits.limit_size, min as usize);
+            let es = iter::repeat(e.clone()).take(n).collect();
+            f(&Hir::concat(es), lits);
+            if n < min as usize || lits.contains_empty() {
+                lits.cut();
+            }
+        }
+        if max.map_or(true, |max| min < max) {
+            lits.cut();
+        }
+    }
+}
+
+fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
+    es: &[Hir],
+    lits: &mut Literals,
+    mut f: F,
+) {
+    let mut lits2 = lits.to_empty();
+    for e in es {
+        let mut lits3 = lits.to_empty();
+        lits3.set_limit_size(lits.limit_size() / 5);
+        f(e, &mut lits3);
+        if lits3.is_empty() || !lits2.union(lits3) {
+            // If we couldn't find suffixes for *any* of the
+            // alternates, then the entire alternation has to be thrown
+            // away and any existing members must be frozen. Similarly,
+            // if the union couldn't complete, stop and freeze.
+            lits.cut();
+            return;
+        }
+    }
+    if !lits.cross_product(&lits2) {
+        lits.cut();
+    }
+}
+
+impl fmt::Debug for Literals {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.debug_struct("Literals")
+         .field("lits", &self.lits)
+         .field("limit_size", &self.limit_size)
+         .field("limit_class", &self.limit_class)
+         .finish()
+    }
+}
+
+impl Literal {
+    /// Returns a new complete literal with the bytes given.
+    pub fn new(bytes: Vec<u8>) -> Literal {
+        Literal { v: bytes, cut: false }
+    }
+
+    /// Returns a new complete empty literal.
+    pub fn empty() -> Literal {
+        Literal { v: vec![], cut: false }
+    }
+
+    /// Returns true if this literal was "cut."
+    pub fn is_cut(&self) -> bool {
+        self.cut
+    }
+
+    /// Cuts this literal.
+    pub fn cut(&mut self) {
+        self.cut = true;
+    }
+}
+
+impl PartialEq for Literal {
+    fn eq(&self, other: &Literal) -> bool {
+        self.v == other.v
+    }
+}
+
+impl PartialOrd for Literal {
+    fn partial_cmp(&self, other: &Literal) -> Option<cmp::Ordering> {
+        self.v.partial_cmp(&other.v)
+    }
+}
+
+impl fmt::Debug for Literal {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if self.is_cut() {
+            write!(f, "Cut({})", escape_unicode(&self.v))
+        } else {
+            write!(f, "Complete({})", escape_unicode(&self.v))
+        }
+    }
+}
+
+impl AsRef<[u8]> for Literal {
+    fn as_ref(&self) -> &[u8] { &self.v }
+}
+
+impl ops::Deref for Literal {
+    type Target = Vec<u8>;
+    fn deref(&self) -> &Vec<u8> { &self.v }
+}
+
+impl ops::DerefMut for Literal {
+    fn deref_mut(&mut self) -> &mut Vec<u8> { &mut self.v }
+}
+
+fn position(needle: &[u8], mut haystack: &[u8]) -> Option<usize> {
+    let mut i = 0;
+    while haystack.len() >= needle.len() {
+        if needle == &haystack[..needle.len()] {
+            return Some(i);
+        }
+        i += 1;
+        haystack = &haystack[1..];
+    }
+    None
+}
+
+fn escape_unicode(bytes: &[u8]) -> String {
+    let show = match ::std::str::from_utf8(bytes) {
+        Ok(v) => v.to_string(),
+        Err(_) => escape_bytes(bytes),
+    };
+    let mut space_escaped = String::new();
+    for c in show.chars() {
+        if c.is_whitespace() {
+            let escaped = if c as u32 <= 0x7F {
+                escape_byte(c as u8)
+            } else {
+                if c as u32 <= 0xFFFF {
+                    format!(r"\u{{{:04x}}}", c as u32)
+                } else {
+                    format!(r"\U{{{:08x}}}", c as u32)
+                }
+            };
+            space_escaped.push_str(&escaped);
+        } else {
+            space_escaped.push(c);
+        }
+    }
+    space_escaped
+}
+
+fn escape_bytes(bytes: &[u8]) -> String {
+    let mut s = String::new();
+    for &b in bytes {
+        s.push_str(&escape_byte(b));
+    }
+    s
+}
+
+fn escape_byte(byte: u8) -> String {
+    use std::ascii::escape_default;
+
+    let escaped: Vec<u8> = escape_default(byte).collect();
+    String::from_utf8_lossy(&escaped).into_owned()
+}
+
+fn cls_char_count(cls: &hir::ClassUnicode) -> usize {
+    cls.iter()
+        .map(|&r| 1 + (r.end as u32) - (r.start as u32))
+        .sum::<u32>() as usize
+}
+
+fn cls_byte_count(cls: &hir::ClassBytes) -> usize {
+    cls.iter()
+        .map(|&r| 1 + (r.end as u32) - (r.start as u32))
+        .sum::<u32>() as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use std::fmt;
+
+    use ParserBuilder;
+    use hir::Hir;
+    use super::{Literals, Literal, escape_bytes};
+
+    // To make test failures easier to read.
+    #[derive(Debug, Eq, PartialEq)]
+    struct Bytes(Vec<ULiteral>);
+    #[derive(Debug, Eq, PartialEq)]
+    struct Unicode(Vec<ULiteral>);
+
+    fn escape_lits(blits: &[Literal]) -> Vec<ULiteral> {
+        let mut ulits = vec![];
+        for blit in blits {
+            ulits.push(ULiteral {
+                v: escape_bytes(&blit),
+                cut: blit.is_cut(),
+            });
+        }
+        ulits
+    }
+
+    fn create_lits<I: IntoIterator<Item=Literal>>(it: I) -> Literals {
+        Literals {
+            lits: it.into_iter().collect(),
+            limit_size: 0,
+            limit_class: 0,
+        }
+    }
+
+    // Needs to be pub for 1.3?
+    #[derive(Clone, Eq, PartialEq)]
+    pub struct ULiteral {
+        v: String,
+        cut: bool,
+    }
+
+    impl ULiteral {
+        fn is_cut(&self) -> bool { self.cut }
+    }
+
+    impl fmt::Debug for ULiteral {
+        fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+            if self.is_cut() {
+                write!(f, "Cut({})", self.v)
+            } else {
+                write!(f, "Complete({})", self.v)
+            }
+        }
+    }
+
+    impl PartialEq<Literal> for ULiteral {
+        fn eq(&self, other: &Literal) -> bool {
+            self.v.as_bytes() == &*other.v && self.is_cut() == other.is_cut()
+        }
+    }
+
+    impl PartialEq<ULiteral> for Literal {
+        fn eq(&self, other: &ULiteral) -> bool {
+            &*self.v == other.v.as_bytes() && self.is_cut() == other.is_cut()
+        }
+    }
+
+    #[allow(non_snake_case)]
+    fn C(s: &'static str) -> ULiteral {
+        ULiteral { v: s.to_owned(), cut: true }
+    }
+    #[allow(non_snake_case)]
+    fn M(s: &'static str) -> ULiteral {
+        ULiteral { v: s.to_owned(), cut: false }
+    }
+
+    fn prefixes(lits: &mut Literals, expr: &Hir) {
+        lits.union_prefixes(expr);
+    }
+
+    fn suffixes(lits: &mut Literals, expr: &Hir) {
+        lits.union_suffixes(expr);
+    }
+
+    macro_rules! assert_lit_eq {
+        ($which:ident, $got_lits:expr, $($expected_lit:expr),*) => {{
+            let expected: Vec<ULiteral> = vec![$($expected_lit),*];
+            let lits = $got_lits;
+            assert_eq!(
+                $which(expected.clone()),
+                $which(escape_lits(lits.literals())));
+            assert_eq!(
+                !expected.is_empty() && expected.iter().all(|l| !l.is_cut()),
+                lits.all_complete());
+            assert_eq!(
+                expected.iter().any(|l| !l.is_cut()),
+                lits.any_complete());
+        }};
+    }
+
+    macro_rules! test_lit {
+        ($name:ident, $which:ident, $re:expr) => {
+            test_lit!($name, $which, $re,);
+        };
+        ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
+            #[test]
+            fn $name() {
+                let expr = ParserBuilder::new()
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let lits = Literals::$which(&expr);
+                assert_lit_eq!(Unicode, lits, $($lit),*);
+
+                let expr = ParserBuilder::new()
+                    .allow_invalid_utf8(true)
+                    .unicode(false)
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let lits = Literals::$which(&expr);
+                assert_lit_eq!(Bytes, lits, $($lit),*);
+            }
+        };
+    }
+
+    // ************************************************************************
+    // Tests for prefix literal extraction.
+    // ************************************************************************
+
+    // Elementary tests.
+    test_lit!(pfx_one_lit1, prefixes, "a", M("a"));
+    test_lit!(pfx_one_lit2, prefixes, "abc", M("abc"));
+    test_lit!(pfx_one_lit3, prefixes, "(?u)☃", M("\\xe2\\x98\\x83"));
+    test_lit!(pfx_one_lit4, prefixes, "(?ui)☃", M("\\xe2\\x98\\x83"));
+    test_lit!(pfx_class1, prefixes, "[1-4]",
+              M("1"), M("2"), M("3"), M("4"));
+    test_lit!(pfx_class2, prefixes, "(?u)[☃Ⅰ]",
+              M("\\xe2\\x85\\xa0"), M("\\xe2\\x98\\x83"));
+    test_lit!(pfx_class3, prefixes, "(?ui)[☃Ⅰ]",
+              M("\\xe2\\x85\\xa0"), M("\\xe2\\x85\\xb0"),
+              M("\\xe2\\x98\\x83"));
+    test_lit!(pfx_one_lit_casei1, prefixes, "(?i)a",
+              M("A"), M("a"));
+    test_lit!(pfx_one_lit_casei2, prefixes, "(?i)abc",
+              M("ABC"), M("aBC"), M("AbC"), M("abC"),
+              M("ABc"), M("aBc"), M("Abc"), M("abc"));
+    test_lit!(pfx_group1, prefixes, "(a)", M("a"));
+    test_lit!(pfx_rep_zero_or_one1, prefixes, "a?");
+    test_lit!(pfx_rep_zero_or_one2, prefixes, "(?:abc)?");
+    test_lit!(pfx_rep_zero_or_more1, prefixes, "a*");
+    test_lit!(pfx_rep_zero_or_more2, prefixes, "(?:abc)*");
+    test_lit!(pfx_rep_one_or_more1, prefixes, "a+", C("a"));
+    test_lit!(pfx_rep_one_or_more2, prefixes, "(?:abc)+", C("abc"));
+    test_lit!(pfx_rep_nested_one_or_more, prefixes, "(?:a+)+", C("a"));
+    test_lit!(pfx_rep_range1, prefixes, "a{0}");
+    test_lit!(pfx_rep_range2, prefixes, "a{0,}");
+    test_lit!(pfx_rep_range3, prefixes, "a{0,1}");
+    test_lit!(pfx_rep_range4, prefixes, "a{1}", M("a"));
+    test_lit!(pfx_rep_range5, prefixes, "a{2}", M("aa"));
+    test_lit!(pfx_rep_range6, prefixes, "a{1,2}", C("a"));
+    test_lit!(pfx_rep_range7, prefixes, "a{2,3}", C("aa"));
+
+    // Test regexes with concatenations.
+    test_lit!(pfx_cat1, prefixes, "(?:a)(?:b)", M("ab"));
+    test_lit!(pfx_cat2, prefixes, "[ab]z", M("az"), M("bz"));
+    test_lit!(pfx_cat3, prefixes, "(?i)[ab]z",
+              M("AZ"), M("BZ"), M("aZ"), M("bZ"),
+              M("Az"), M("Bz"), M("az"), M("bz"));
+    test_lit!(pfx_cat4, prefixes, "[ab][yz]",
+              M("ay"), M("by"), M("az"), M("bz"));
+    test_lit!(pfx_cat5, prefixes, "a*b", C("a"), M("b"));
+    test_lit!(pfx_cat6, prefixes, "a*b*c", C("a"), C("b"), M("c"));
+    test_lit!(pfx_cat7, prefixes, "a*b*c+", C("a"), C("b"), C("c"));
+    test_lit!(pfx_cat8, prefixes, "a*b+c", C("a"), C("b"));
+    test_lit!(pfx_cat9, prefixes, "a*b+c*", C("a"), C("b"));
+    test_lit!(pfx_cat10, prefixes, "ab*", C("ab"), M("a"));
+    test_lit!(pfx_cat11, prefixes, "ab*c", C("ab"), M("ac"));
+    test_lit!(pfx_cat12, prefixes, "ab+", C("ab"));
+    test_lit!(pfx_cat13, prefixes, "ab+c", C("ab"));
+    test_lit!(pfx_cat14, prefixes, "a^", C("a"));
+    test_lit!(pfx_cat15, prefixes, "$a");
+    test_lit!(pfx_cat16, prefixes, r"ab*c", C("ab"), M("ac"));
+    test_lit!(pfx_cat17, prefixes, r"ab+c", C("ab"));
+    test_lit!(pfx_cat18, prefixes, r"z*azb", C("z"), M("azb"));
+    test_lit!(pfx_cat19, prefixes, "a.z", C("a"));
+
+    // Test regexes with alternations.
+    test_lit!(pfx_alt1, prefixes, "a|b", M("a"), M("b"));
+    test_lit!(pfx_alt2, prefixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
+    test_lit!(pfx_alt3, prefixes, "y(?:a|b)z", M("yaz"), M("ybz"));
+    test_lit!(pfx_alt4, prefixes, "a|b*");
+    test_lit!(pfx_alt5, prefixes, "a|b+", M("a"), C("b"));
+    test_lit!(pfx_alt6, prefixes, "a|(?:b|c*)");
+    test_lit!(pfx_alt7, prefixes, "(a|b)*c|(a|ab)*c",
+              C("a"), C("b"), M("c"), C("a"), C("ab"), M("c"));
+    test_lit!(pfx_alt8, prefixes, "a*b|c", C("a"), M("b"), M("c"));
+
+    // Test regexes with empty assertions.
+    test_lit!(pfx_empty1, prefixes, "^a", M("a"));
+    test_lit!(pfx_empty2, prefixes, "a${2}", C("a"));
+    test_lit!(pfx_empty3, prefixes, "^abc", M("abc"));
+    test_lit!(pfx_empty4, prefixes, "(?:^abc)|(?:^z)", M("abc"), M("z"));
+
+    // Make sure some curious regexes have no prefixes.
+    test_lit!(pfx_nothing1, prefixes, ".");
+    test_lit!(pfx_nothing2, prefixes, "(?s).");
+    test_lit!(pfx_nothing3, prefixes, "^");
+    test_lit!(pfx_nothing4, prefixes, "$");
+    test_lit!(pfx_nothing6, prefixes, "(?m)$");
+    test_lit!(pfx_nothing7, prefixes, r"\b");
+    test_lit!(pfx_nothing8, prefixes, r"\B");
+
+    // Test a few regexes that defeat any prefix literal detection.
+    test_lit!(pfx_defeated1, prefixes, ".a");
+    test_lit!(pfx_defeated2, prefixes, "(?s).a");
+    test_lit!(pfx_defeated3, prefixes, "a*b*c*");
+    test_lit!(pfx_defeated4, prefixes, "a|.");
+    test_lit!(pfx_defeated5, prefixes, ".|a");
+    test_lit!(pfx_defeated6, prefixes, "a|^");
+    test_lit!(pfx_defeated7, prefixes, ".(?:a(?:b)(?:c))");
+    test_lit!(pfx_defeated8, prefixes, "$a");
+    test_lit!(pfx_defeated9, prefixes, "(?m)$a");
+    test_lit!(pfx_defeated10, prefixes, r"\ba");
+    test_lit!(pfx_defeated11, prefixes, r"\Ba");
+    test_lit!(pfx_defeated12, prefixes, "^*a");
+    test_lit!(pfx_defeated13, prefixes, "^+a");
+
+    test_lit!(
+        pfx_crazy1,
+        prefixes,
+        r"M[ou]'?am+[ae]r .*([AEae]l[- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]",
+        C("Mo\\'am"), C("Mu\\'am"), C("Moam"), C("Muam"));
+
+    // ************************************************************************
+    // Tests for quiting prefix literal search.
+    // ************************************************************************
+
+    macro_rules! test_exhausted {
+        ($name:ident, $which:ident, $re:expr) => {
+            test_exhausted!($name, $which, $re,);
+        };
+        ($name:ident, $which:ident, $re:expr, $($lit:expr),*) => {
+            #[test]
+            fn $name() {
+                let expr = ParserBuilder::new()
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let mut lits = Literals::empty();
+                lits.set_limit_size(20).set_limit_class(10);
+                $which(&mut lits, &expr);
+                assert_lit_eq!(Unicode, lits, $($lit),*);
+
+                let expr = ParserBuilder::new()
+                    .allow_invalid_utf8(true)
+                    .unicode(false)
+                    .build()
+                    .parse($re)
+                    .unwrap();
+                let mut lits = Literals::empty();
+                lits.set_limit_size(20).set_limit_class(10);
+                $which(&mut lits, &expr);
+                assert_lit_eq!(Bytes, lits, $($lit),*);
+            }
+        };
+    }
+
+    // These test use a much lower limit than the default so that we can
+    // write test cases of reasonable size.
+    test_exhausted!(pfx_exhausted1, prefixes, "[a-z]");
+    test_exhausted!(pfx_exhausted2, prefixes, "[a-z]*A");
+    test_exhausted!(pfx_exhausted3, prefixes, "A[a-z]Z", C("A"));
+    test_exhausted!(pfx_exhausted4, prefixes, "(?i)foobar",
+                    C("FO"), C("fO"), C("Fo"), C("fo"));
+    test_exhausted!(pfx_exhausted5, prefixes, "(?:ab){100}",
+                    C("abababababababababab"));
+    test_exhausted!(pfx_exhausted6, prefixes, "(?:(?:ab){100})*cd",
+                    C("ababababab"), M("cd"));
+    test_exhausted!(pfx_exhausted7, prefixes, "z(?:(?:ab){100})*cd",
+                    C("zababababab"), M("zcd"));
+    test_exhausted!(pfx_exhausted8, prefixes, "aaaaaaaaaaaaaaaaaaaaz",
+                    C("aaaaaaaaaaaaaaaaaaaa"));
+
+    // ************************************************************************
+    // Tests for suffix literal extraction.
+    // ************************************************************************
+
+    // Elementary tests.
+    test_lit!(sfx_one_lit1, suffixes, "a", M("a"));
+    test_lit!(sfx_one_lit2, suffixes, "abc", M("abc"));
+    test_lit!(sfx_one_lit3, suffixes, "(?u)☃", M("\\xe2\\x98\\x83"));
+    test_lit!(sfx_one_lit4, suffixes, "(?ui)☃", M("\\xe2\\x98\\x83"));
+    test_lit!(sfx_class1, suffixes, "[1-4]",
+              M("1"), M("2"), M("3"), M("4"));
+    test_lit!(sfx_class2, suffixes, "(?u)[☃Ⅰ]",
+              M("\\xe2\\x85\\xa0"), M("\\xe2\\x98\\x83"));
+    test_lit!(sfx_class3, suffixes, "(?ui)[☃Ⅰ]",
+              M("\\xe2\\x85\\xa0"), M("\\xe2\\x85\\xb0"),
+              M("\\xe2\\x98\\x83"));
+    test_lit!(sfx_one_lit_casei1, suffixes, "(?i)a",
+              M("A"), M("a"));
+    test_lit!(sfx_one_lit_casei2, suffixes, "(?i)abc",
+              M("ABC"), M("ABc"), M("AbC"), M("Abc"),
+              M("aBC"), M("aBc"), M("abC"), M("abc"));
+    test_lit!(sfx_group1, suffixes, "(a)", M("a"));
+    test_lit!(sfx_rep_zero_or_one1, suffixes, "a?");
+    test_lit!(sfx_rep_zero_or_one2, suffixes, "(?:abc)?");
+    test_lit!(sfx_rep_zero_or_more1, suffixes, "a*");
+    test_lit!(sfx_rep_zero_or_more2, suffixes, "(?:abc)*");
+    test_lit!(sfx_rep_one_or_more1, suffixes, "a+", C("a"));
+    test_lit!(sfx_rep_one_or_more2, suffixes, "(?:abc)+", C("abc"));
+    test_lit!(sfx_rep_nested_one_or_more, suffixes, "(?:a+)+", C("a"));
+    test_lit!(sfx_rep_range1, suffixes, "a{0}");
+    test_lit!(sfx_rep_range2, suffixes, "a{0,}");
+    test_lit!(sfx_rep_range3, suffixes, "a{0,1}");
+    test_lit!(sfx_rep_range4, suffixes, "a{1}", M("a"));
+    test_lit!(sfx_rep_range5, suffixes, "a{2}", M("aa"));
+    test_lit!(sfx_rep_range6, suffixes, "a{1,2}", C("a"));
+    test_lit!(sfx_rep_range7, suffixes, "a{2,3}", C("aa"));
+
+    // Test regexes with concatenations.
+    test_lit!(sfx_cat1, suffixes, "(?:a)(?:b)", M("ab"));
+    test_lit!(sfx_cat2, suffixes, "[ab]z", M("az"), M("bz"));
+    test_lit!(sfx_cat3, suffixes, "(?i)[ab]z",
+              M("AZ"), M("Az"), M("BZ"), M("Bz"),
+              M("aZ"), M("az"), M("bZ"), M("bz"));
+    test_lit!(sfx_cat4, suffixes, "[ab][yz]",
+              M("ay"), M("az"), M("by"), M("bz"));
+    test_lit!(sfx_cat5, suffixes, "a*b", C("ab"), M("b"));
+    test_lit!(sfx_cat6, suffixes, "a*b*c", C("bc"), C("ac"), M("c"));
+    test_lit!(sfx_cat7, suffixes, "a*b*c+", C("c"));
+    test_lit!(sfx_cat8, suffixes, "a*b+c", C("bc"));
+    test_lit!(sfx_cat9, suffixes, "a*b+c*", C("c"), C("b"));
+    test_lit!(sfx_cat10, suffixes, "ab*", C("b"), M("a"));
+    test_lit!(sfx_cat11, suffixes, "ab*c", C("bc"), M("ac"));
+    test_lit!(sfx_cat12, suffixes, "ab+", C("b"));
+    test_lit!(sfx_cat13, suffixes, "ab+c", C("bc"));
+    test_lit!(sfx_cat14, suffixes, "a^");
+    test_lit!(sfx_cat15, suffixes, "$a", C("a"));
+    test_lit!(sfx_cat16, suffixes, r"ab*c", C("bc"), M("ac"));
+    test_lit!(sfx_cat17, suffixes, r"ab+c", C("bc"));
+    test_lit!(sfx_cat18, suffixes, r"z*azb", C("zazb"), M("azb"));
+    test_lit!(sfx_cat19, suffixes, "a.z", C("z"));
+
+    // Test regexes with alternations.
+    test_lit!(sfx_alt1, suffixes, "a|b", M("a"), M("b"));
+    test_lit!(sfx_alt2, suffixes, "[1-3]|b", M("1"), M("2"), M("3"), M("b"));
+    test_lit!(sfx_alt3, suffixes, "y(?:a|b)z", M("yaz"), M("ybz"));
+    test_lit!(sfx_alt4, suffixes, "a|b*");
+    test_lit!(sfx_alt5, suffixes, "a|b+", M("a"), C("b"));
+    test_lit!(sfx_alt6, suffixes, "a|(?:b|c*)");
+    test_lit!(sfx_alt7, suffixes, "(a|b)*c|(a|ab)*c",
+              C("ac"), C("bc"), M("c"), C("ac"), C("abc"), M("c"));
+    test_lit!(sfx_alt8, suffixes, "a*b|c", C("ab"), M("b"), M("c"));
+
+    // Test regexes with empty assertions.
+    test_lit!(sfx_empty1, suffixes, "a$", M("a"));
+    test_lit!(sfx_empty2, suffixes, "${2}a", C("a"));
+
+    // Make sure some curious regexes have no suffixes.
+    test_lit!(sfx_nothing1, suffixes, ".");
+    test_lit!(sfx_nothing2, suffixes, "(?s).");
+    test_lit!(sfx_nothing3, suffixes, "^");
+    test_lit!(sfx_nothing4, suffixes, "$");
+    test_lit!(sfx_nothing6, suffixes, "(?m)$");
+    test_lit!(sfx_nothing7, suffixes, r"\b");
+    test_lit!(sfx_nothing8, suffixes, r"\B");
+
+    // Test a few regexes that defeat any suffix literal detection.
+    test_lit!(sfx_defeated1, suffixes, "a.");
+    test_lit!(sfx_defeated2, suffixes, "(?s)a.");
+    test_lit!(sfx_defeated3, suffixes, "a*b*c*");
+    test_lit!(sfx_defeated4, suffixes, "a|.");
+    test_lit!(sfx_defeated5, suffixes, ".|a");
+    test_lit!(sfx_defeated6, suffixes, "a|^");
+    test_lit!(sfx_defeated7, suffixes, "(?:a(?:b)(?:c)).");
+    test_lit!(sfx_defeated8, suffixes, "a^");
+    test_lit!(sfx_defeated9, suffixes, "(?m)a$");
+    test_lit!(sfx_defeated10, suffixes, r"a\b");
+    test_lit!(sfx_defeated11, suffixes, r"a\B");
+    test_lit!(sfx_defeated12, suffixes, "a^*");
+    test_lit!(sfx_defeated13, suffixes, "a^+");
+
+    // These test use a much lower limit than the default so that we can
+    // write test cases of reasonable size.
+    test_exhausted!(sfx_exhausted1, suffixes, "[a-z]");
+    test_exhausted!(sfx_exhausted2, suffixes, "A[a-z]*");
+    test_exhausted!(sfx_exhausted3, suffixes, "A[a-z]Z", C("Z"));
+    test_exhausted!(sfx_exhausted4, suffixes, "(?i)foobar",
+                    C("AR"), C("Ar"), C("aR"), C("ar"));
+    test_exhausted!(sfx_exhausted5, suffixes, "(?:ab){100}",
+                    C("abababababababababab"));
+    test_exhausted!(sfx_exhausted6, suffixes, "cd(?:(?:ab){100})*",
+                    C("ababababab"), M("cd"));
+    test_exhausted!(sfx_exhausted7, suffixes, "cd(?:(?:ab){100})*z",
+                    C("abababababz"), M("cdz"));
+    test_exhausted!(sfx_exhausted8, suffixes, "zaaaaaaaaaaaaaaaaaaaa",
+                    C("aaaaaaaaaaaaaaaaaaaa"));
+
+    // ************************************************************************
+    // Tests for generating unambiguous literal sets.
+    // ************************************************************************
+
+    macro_rules! test_unamb {
+        ($name:ident, $given:expr, $expected:expr) => {
+            #[test]
+            fn $name() {
+                let given: Vec<Literal> =
+                    $given
+                    .into_iter()
+                    .map(|ul| {
+                        let cut = ul.is_cut();
+                        Literal { v: ul.v.into_bytes(), cut: cut }
+                    })
+                    .collect();
+                let lits = create_lits(given);
+                let got = lits.unambiguous_prefixes();
+                assert_eq!($expected, escape_lits(got.literals()));
+            }
+        };
+    }
+
+    test_unamb!(unambiguous1, vec![M("z"), M("azb")], vec![C("a"), C("z")]);
+    test_unamb!(unambiguous2,
+                vec![M("zaaaaaa"), M("aa")], vec![C("aa"), C("z")]);
+    test_unamb!(unambiguous3,
+                vec![M("Sherlock"), M("Watson")],
+                vec![M("Sherlock"), M("Watson")]);
+    test_unamb!(unambiguous4, vec![M("abc"), M("bc")], vec![C("a"), C("bc")]);
+    test_unamb!(unambiguous5, vec![M("bc"), M("abc")], vec![C("a"), C("bc")]);
+    test_unamb!(unambiguous6, vec![M("a"), M("aa")], vec![C("a")]);
+    test_unamb!(unambiguous7, vec![M("aa"), M("a")], vec![C("a")]);
+    test_unamb!(unambiguous8, vec![M("ab"), M("a")], vec![C("a")]);
+    test_unamb!(unambiguous9,
+                vec![M("ac"), M("bc"), M("c"), M("ac"), M("abc"), M("c")],
+                vec![C("a"), C("b"), C("c")]);
+    test_unamb!(unambiguous10,
+                vec![M("Mo'"), M("Mu'"), M("Mo"), M("Mu")],
+                vec![C("Mo"), C("Mu")]);
+    test_unamb!(unambiguous11,
+                vec![M("zazb"), M("azb")], vec![C("a"), C("z")]);
+    test_unamb!(unambiguous12, vec![M("foo"), C("foo")], vec![C("foo")]);
+    test_unamb!(unambiguous13,
+                vec![M("ABCX"), M("CDAX"), M("BCX")],
+                vec![C("A"), C("BCX"), C("CD")]);
+    test_unamb!(unambiguous14,
+                vec![M("IMGX"), M("MVIX"), M("MGX"), M("DSX")],
+                vec![M("DSX"), C("I"), C("MGX"), C("MV")]);
+    test_unamb!(unambiguous15,
+                vec![M("IMG_"), M("MG_"), M("CIMG")],
+                vec![C("C"), C("I"), C("MG_")]);
+
+
+    // ************************************************************************
+    // Tests for suffix trimming.
+    // ************************************************************************
+    macro_rules! test_trim {
+        ($name:ident, $trim:expr, $given:expr, $expected:expr) => {
+            #[test]
+            fn $name() {
+                let given: Vec<Literal> =
+                    $given
+                    .into_iter()
+                    .map(|ul| {
+                        let cut = ul.is_cut();
+                        Literal { v: ul.v.into_bytes(), cut: cut }
+                    })
+                    .collect();
+                let lits = create_lits(given);
+                let got = lits.trim_suffix($trim).unwrap();
+                assert_eq!($expected, escape_lits(got.literals()));
+            }
+        }
+    }
+
+    test_trim!(trim1, 1, vec![M("ab"), M("yz")], vec![C("a"), C("y")]);
+    test_trim!(trim2, 1, vec![M("abc"), M("abd")], vec![C("ab")]);
+    test_trim!(trim3, 2, vec![M("abc"), M("abd")], vec![C("a")]);
+    test_trim!(trim4, 2, vec![M("abc"), M("ghij")], vec![C("a"), C("gh")]);
+
+    // ************************************************************************
+    // Tests for longest common prefix.
+    // ************************************************************************
+
+    macro_rules! test_lcp {
+        ($name:ident, $given:expr, $expected:expr) => {
+            #[test]
+            fn $name() {
+                let given: Vec<Literal> =
+                    $given
+                    .into_iter()
+                    .map(|s: &str| Literal {
+                        v: s.to_owned().into_bytes(),
+                        cut: false,
+                    })
+                    .collect();
+                let lits = create_lits(given);
+                let got = lits.longest_common_prefix();
+                assert_eq!($expected, escape_bytes(got));
+            }
+        };
+    }
+
+    test_lcp!(lcp1, vec!["a"], "a");
+    test_lcp!(lcp2, vec![], "");
+    test_lcp!(lcp3, vec!["a", "b"], "");
+    test_lcp!(lcp4, vec!["ab", "ab"], "ab");
+    test_lcp!(lcp5, vec!["ab", "a"], "a");
+    test_lcp!(lcp6, vec!["a", "ab"], "a");
+    test_lcp!(lcp7, vec!["ab", "b"], "");
+    test_lcp!(lcp8, vec!["b", "ab"], "");
+    test_lcp!(lcp9, vec!["foobar", "foobaz"], "fooba");
+    test_lcp!(lcp10, vec!["foobar", "foobaz", "a"], "");
+    test_lcp!(lcp11, vec!["a", "foobar", "foobaz"], "");
+    test_lcp!(lcp12, vec!["foo", "flub", "flab", "floo"], "f");
+
+    // ************************************************************************
+    // Tests for longest common suffix.
+    // ************************************************************************
+
+    macro_rules! test_lcs {
+        ($name:ident, $given:expr, $expected:expr) => {
+            #[test]
+            fn $name() {
+                let given: Vec<Literal> =
+                    $given
+                    .into_iter()
+                    .map(|s: &str| Literal {
+                        v: s.to_owned().into_bytes(),
+                        cut: false,
+                    })
+                    .collect();
+                let lits = create_lits(given);
+                let got = lits.longest_common_suffix();
+                assert_eq!($expected, escape_bytes(got));
+            }
+        };
+    }
+
+    test_lcs!(lcs1, vec!["a"], "a");
+    test_lcs!(lcs2, vec![], "");
+    test_lcs!(lcs3, vec!["a", "b"], "");
+    test_lcs!(lcs4, vec!["ab", "ab"], "ab");
+    test_lcs!(lcs5, vec!["ab", "a"], "");
+    test_lcs!(lcs6, vec!["a", "ab"], "");
+    test_lcs!(lcs7, vec!["ab", "b"], "b");
+    test_lcs!(lcs8, vec!["b", "ab"], "b");
+    test_lcs!(lcs9, vec!["barfoo", "bazfoo"], "foo");
+    test_lcs!(lcs10, vec!["barfoo", "bazfoo", "a"], "");
+    test_lcs!(lcs11, vec!["a", "barfoo", "bazfoo"], "");
+    test_lcs!(lcs12, vec!["flub", "bub", "boob", "dub"], "b");
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/hir/mod.rs
@@ -0,0 +1,2055 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+Defines a high-level intermediate representation for regular expressions.
+*/
+use std::char;
+use std::cmp;
+use std::error;
+use std::fmt;
+use std::u8;
+
+use ast::Span;
+use hir::interval::{Interval, IntervalSet, IntervalSetIter};
+use unicode;
+
+pub use hir::visitor::{Visitor, visit};
+
+mod interval;
+pub mod literal;
+pub mod print;
+pub mod translate;
+mod visitor;
+
+/// An error that can occur while translating an `Ast` to a `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Error {
+    /// The kind of error.
+    kind: ErrorKind,
+    /// The original pattern that the translator's Ast was parsed from. Every
+    /// span in an error is a valid range into this string.
+    pattern: String,
+    /// The span of this error, derived from the Ast given to the translator.
+    span: Span,
+}
+
+impl Error {
+    /// Return the type of this error.
+    pub fn kind(&self) -> &ErrorKind {
+        &self.kind
+    }
+
+    /// The original pattern string in which this error occurred.
+    ///
+    /// Every span reported by this error is reported in terms of this string.
+    pub fn pattern(&self) -> &str {
+        &self.pattern
+    }
+
+    /// Return the span at which this error occurred.
+    pub fn span(&self) -> &Span {
+        &self.span
+    }
+}
+
+/// The type of an error that occurred while building an `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum ErrorKind {
+    /// This error occurs when a Unicode feature is used when Unicode
+    /// support is disabled. For example `(?-u:\pL)` would trigger this error.
+    UnicodeNotAllowed,
+    /// This error occurs when translating a pattern that could match a byte
+    /// sequence that isn't UTF-8 and `allow_invalid_utf8` was disabled.
+    InvalidUtf8,
+    /// This occurs when an unrecognized Unicode property name could not
+    /// be found.
+    UnicodePropertyNotFound,
+    /// This occurs when an unrecognized Unicode property value could not
+    /// be found.
+    UnicodePropertyValueNotFound,
+    /// This occurs when the translator attempts to construct a character class
+    /// that is empty.
+    ///
+    /// Note that this restriction in the translator may be removed in the
+    /// future.
+    EmptyClassNotAllowed,
+    /// Hints that destructuring should not be exhaustive.
+    ///
+    /// This enum may grow additional variants, so this makes sure clients
+    /// don't count on exhaustive matching. (Otherwise, adding a new variant
+    /// could break existing code.)
+    #[doc(hidden)]
+    __Nonexhaustive,
+}
+
+impl ErrorKind {
+    fn description(&self) -> &str {
+        use self::ErrorKind::*;
+        match *self {
+            UnicodeNotAllowed => "Unicode not allowed here",
+            InvalidUtf8 => "pattern can match invalid UTF-8",
+            UnicodePropertyNotFound => "Unicode property not found",
+            UnicodePropertyValueNotFound => "Unicode property value not found",
+            EmptyClassNotAllowed => "empty character classes are not allowed",
+            _ => unreachable!(),
+        }
+    }
+}
+
+impl error::Error for Error {
+    fn description(&self) -> &str {
+        self.kind.description()
+    }
+}
+
+impl fmt::Display for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        ::error::Formatter::from(self).fmt(f)
+    }
+}
+
+impl fmt::Display for ErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        f.write_str(self.description())
+    }
+}
+
+/// A high-level intermediate representation (HIR) for a regular expression.
+///
+/// The HIR of a regular expression represents an intermediate step between its
+/// abstract syntax (a structured description of the concrete syntax) and
+/// compiled byte codes. The purpose of HIR is to make regular expressions
+/// easier to analyze. In particular, the AST is much more complex than the
+/// HIR. For example, while an AST supports arbitrarily nested character
+/// classes, the HIR will flatten all nested classes into a single set. The HIR
+/// will also "compile away" every flag present in the concrete syntax. For
+/// example, users of HIR expressions never need to worry about case folding;
+/// it is handled automatically by the translator (e.g., by translating `(?i)A`
+/// to `[aA]`).
+///
+/// If the HIR was produced by a translator that disallows invalid UTF-8, then
+/// the HIR is guaranteed to match UTF-8 exclusively.
+///
+/// This type defines its own destructor that uses constant stack space and
+/// heap space proportional to the size of the HIR.
+///
+/// The specific type of an HIR expression can be accessed via its `kind`
+/// or `into_kind` methods. This extra level of indirection exists for two
+/// reasons:
+///
+/// 1. Construction of an HIR expression *must* use the constructor methods
+///    on this `Hir` type instead of building the `HirKind` values directly.
+///    This permits construction to enforce invariants like "concatenations
+///    always consist of two or more sub-expressions."
+/// 2. Every HIR expression contains attributes that are defined inductively,
+///    and can be computed cheaply during the construction process. For
+///    example, one such attribute is whether the expression must match at the
+///    beginning of the text.
+///
+/// Also, an `Hir`'s `fmt::Display` implementation prints an HIR as a regular
+/// expression pattern string, and uses constant stack space and heap space
+/// proportional to the size of the `Hir`.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Hir {
+    /// The underlying HIR kind.
+    kind: HirKind,
+    /// Analysis info about this HIR, computed during construction.
+    info: HirInfo,
+}
+
+/// The kind of an arbitrary `Hir` expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HirKind {
+    /// The empty regular expression, which matches everything, including the
+    /// empty string.
+    Empty,
+    /// A single literal character that matches exactly this character.
+    Literal(Literal),
+    /// A single character class that matches any of the characters in the
+    /// class. A class can either consist of Unicode scalar values as
+    /// characters, or it can use bytes.
+    Class(Class),
+    /// An anchor assertion. An anchor assertion match always has zero length.
+    Anchor(Anchor),
+    /// A word boundary assertion, which may or may not be Unicode aware. A
+    /// word boundary assertion match always has zero length.
+    WordBoundary(WordBoundary),
+    /// A repetition operation applied to a child expression.
+    Repetition(Repetition),
+    /// A possibly capturing group, which contains a child expression.
+    Group(Group),
+    /// A concatenation of expressions. A concatenation always has at least two
+    /// child expressions.
+    ///
+    /// A concatenation matches only if each of its child expression matches
+    /// one after the other.
+    Concat(Vec<Hir>),
+    /// An alternation of expressions. An alternation always has at least two
+    /// child expressions.
+    ///
+    /// An alternation matches only if at least one of its child expression
+    /// matches. If multiple expressions match, then the leftmost is preferred.
+    Alternation(Vec<Hir>),
+}
+
+impl Hir {
+    /// Returns a reference to the underlying HIR kind.
+    pub fn kind(&self) -> &HirKind {
+        &self.kind
+    }
+
+    /// Consumes ownership of this HIR expression and returns its underlying
+    /// `HirKind`.
+    pub fn into_kind(mut self) -> HirKind {
+        use std::mem;
+        mem::replace(&mut self.kind, HirKind::Empty)
+    }
+
+    /// Returns an empty HIR expression.
+    ///
+    /// An empty HIR expression always matches, including the empty string.
+    pub fn empty() -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(true);
+        info.set_all_assertions(true);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(true);
+        Hir {
+            kind: HirKind::Empty,
+            info: info,
+        }
+    }
+
+    /// Creates a literal HIR expression.
+    ///
+    /// If the given literal has a `Byte` variant with an ASCII byte, then this
+    /// method panics. This enforces the invariant that `Byte` variants are
+    /// only used to express matching of invalid UTF-8.
+    pub fn literal(lit: Literal) -> Hir {
+        if let Literal::Byte(b) = lit {
+            assert!(b > 0x7F);
+        }
+
+        let mut info = HirInfo::new();
+        info.set_always_utf8(lit.is_unicode());
+        info.set_all_assertions(false);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(false);
+        Hir {
+            kind: HirKind::Literal(lit),
+            info: info,
+        }
+    }
+
+    /// Creates a class HIR expression.
+    pub fn class(class: Class) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(class.is_always_utf8());
+        info.set_all_assertions(false);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(false);
+        Hir {
+            kind: HirKind::Class(class),
+            info: info,
+        }
+    }
+
+    /// Creates an anchor assertion HIR expression.
+    pub fn anchor(anchor: Anchor) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(true);
+        info.set_all_assertions(true);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        info.set_match_empty(true);
+        if let Anchor::StartText = anchor {
+            info.set_anchored_start(true);
+            info.set_any_anchored_start(true);
+        }
+        if let Anchor::EndText = anchor {
+            info.set_anchored_end(true);
+            info.set_any_anchored_end(true);
+        }
+        Hir {
+            kind: HirKind::Anchor(anchor),
+            info: info,
+        }
+    }
+
+    /// Creates a word boundary assertion HIR expression.
+    pub fn word_boundary(word_boundary: WordBoundary) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(true);
+        info.set_all_assertions(true);
+        info.set_anchored_start(false);
+        info.set_anchored_end(false);
+        info.set_any_anchored_start(false);
+        info.set_any_anchored_end(false);
+        // A negated word boundary matches the empty string, but a normal
+        // word boundary does not!
+        info.set_match_empty(word_boundary.is_negated());
+        // Negated ASCII word boundaries can match invalid UTF-8.
+        if let WordBoundary::AsciiNegate = word_boundary {
+            info.set_always_utf8(false);
+        }
+        Hir {
+            kind: HirKind::WordBoundary(word_boundary),
+            info: info,
+        }
+    }
+
+    /// Creates a repetition HIR expression.
+    pub fn repetition(rep: Repetition) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(rep.hir.is_always_utf8());
+        info.set_all_assertions(rep.hir.is_all_assertions());
+        // If this operator can match the empty string, then it can never
+        // be anchored.
+        info.set_anchored_start(
+            !rep.is_match_empty() && rep.hir.is_anchored_start()
+        );
+        info.set_anchored_end(
+            !rep.is_match_empty() && rep.hir.is_anchored_end()
+        );
+        info.set_any_anchored_start(rep.hir.is_any_anchored_start());
+        info.set_any_anchored_end(rep.hir.is_any_anchored_end());
+        info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty());
+        Hir {
+            kind: HirKind::Repetition(rep),
+            info: info,
+        }
+    }
+
+    /// Creates a group HIR expression.
+    pub fn group(group: Group) -> Hir {
+        let mut info = HirInfo::new();
+        info.set_always_utf8(group.hir.is_always_utf8());
+        info.set_all_assertions(group.hir.is_all_assertions());
+        info.set_anchored_start(group.hir.is_anchored_start());
+        info.set_anchored_end(group.hir.is_anchored_end());
+        info.set_any_anchored_start(group.hir.is_any_anchored_start());
+        info.set_any_anchored_end(group.hir.is_any_anchored_end());
+        info.set_match_empty(group.hir.is_match_empty());
+        Hir {
+            kind: HirKind::Group(group),
+            info: info,
+        }
+    }
+
+    /// Returns the concatenation of the given expressions.
+    ///
+    /// This flattens the concatenation as appropriate.
+    pub fn concat(mut exprs: Vec<Hir>) -> Hir {
+        match exprs.len() {
+            0 => Hir::empty(),
+            1 => exprs.pop().unwrap(),
+            _ => {
+                let mut info = HirInfo::new();
+                info.set_always_utf8(true);
+                info.set_all_assertions(true);
+                info.set_any_anchored_start(false);
+                info.set_any_anchored_end(false);
+                info.set_match_empty(true);
+
+                // Some attributes require analyzing all sub-expressions.
+                for e in &exprs {
+                    let x = info.is_always_utf8() && e.is_always_utf8();
+                    info.set_always_utf8(x);
+
+                    let x = info.is_all_assertions() && e.is_all_assertions();
+                    info.set_all_assertions(x);
+
+                    let x =
+                        info.is_any_anchored_start()
+                        || e.is_any_anchored_start();
+                    info.set_any_anchored_start(x);
+
+                    let x =
+                        info.is_any_anchored_end()
+                        || e.is_any_anchored_end();
+                    info.set_any_anchored_end(x);
+
+                    let x = info.is_match_empty() && e.is_match_empty();
+                    info.set_match_empty(x);
+                }
+                // Anchored attributes require something slightly more
+                // sophisticated. Normally, WLOG, to determine whether an
+                // expression is anchored to the start, we'd only need to check
+                // the first expression of a concatenation. However,
+                // expressions like `$\b^` are still anchored to the start,
+                // but the first expression in the concatenation *isn't*
+                // anchored to the start. So the "first" expression to look at
+                // is actually one that is either not an assertion or is
+                // specifically the StartText assertion.
+                info.set_anchored_start(
+                    exprs.iter()
+                        .take_while(|e| {
+                            e.is_anchored_start() || e.is_all_assertions()
+                        })
+                        .any(|e| {
+                            e.is_anchored_start()
+                        }));
+                // Similarly for the end anchor, but in reverse.
+                info.set_anchored_end(
+                    exprs.iter()
+                        .rev()
+                        .take_while(|e| {
+                            e.is_anchored_end() || e.is_all_assertions()
+                        })
+                        .any(|e| {
+                            e.is_anchored_end()
+                        }));
+                Hir {
+                    kind: HirKind::Concat(exprs),
+                    info: info,
+                }
+            }
+        }
+    }
+
+    /// Returns the alternation of the given expressions.
+    ///
+    /// This flattens the alternation as appropriate.
+    pub fn alternation(mut exprs: Vec<Hir>) -> Hir {
+        match exprs.len() {
+            0 => Hir::empty(),
+            1 => exprs.pop().unwrap(),
+            _ => {
+                let mut info = HirInfo::new();
+                info.set_always_utf8(true);
+                info.set_all_assertions(true);
+                info.set_anchored_start(true);
+                info.set_anchored_end(true);
+                info.set_any_anchored_start(false);
+                info.set_any_anchored_end(false);
+                info.set_match_empty(false);
+
+                // Some attributes require analyzing all sub-expressions.
+                for e in &exprs {
+                    let x = info.is_always_utf8() && e.is_always_utf8();
+                    info.set_always_utf8(x);
+
+                    let x = info.is_all_assertions() && e.is_all_assertions();
+                    info.set_all_assertions(x);
+
+                    let x = info.is_anchored_start() && e.is_anchored_start();
+                    info.set_anchored_start(x);
+
+                    let x = info.is_anchored_end() && e.is_anchored_end();
+                    info.set_anchored_end(x);
+
+                    let x =
+                        info.is_any_anchored_start()
+                        || e.is_any_anchored_start();
+                    info.set_any_anchored_start(x);
+
+                    let x =
+                        info.is_any_anchored_end()
+                        || e.is_any_anchored_end();
+                    info.set_any_anchored_end(x);
+
+                    let x = info.is_match_empty() || e.is_match_empty();
+                    info.set_match_empty(x);
+                }
+                Hir {
+                    kind: HirKind::Alternation(exprs),
+                    info: info,
+                }
+            }
+        }
+    }
+
+    /// Build an HIR expression for `.`.
+    ///
+    /// A `.` expression matches any character except for `\n`. To build an
+    /// expression that matches any character, including `\n`, use the `any`
+    /// method.
+    ///
+    /// If `bytes` is `true`, then this assumes characters are limited to a
+    /// single byte.
+    pub fn dot(bytes: bool) -> Hir {
+        if bytes {
+            let mut cls = ClassBytes::empty();
+            cls.push(ClassBytesRange::new(b'\0', b'\x09'));
+            cls.push(ClassBytesRange::new(b'\x0B', b'\xFF'));
+            Hir::class(Class::Bytes(cls))
+        } else {
+            let mut cls = ClassUnicode::empty();
+            cls.push(ClassUnicodeRange::new('\0', '\x09'));
+            cls.push(ClassUnicodeRange::new('\x0B', '\u{10FFFF}'));
+            Hir::class(Class::Unicode(cls))
+        }
+    }
+
+    /// Build an HIR expression for `(?s).`.
+    ///
+    /// A `(?s).` expression matches any character, including `\n`. To build an
+    /// expression that matches any character except for `\n`, then use the
+    /// `dot` method.
+    ///
+    /// If `bytes` is `true`, then this assumes characters are limited to a
+    /// single byte.
+    pub fn any(bytes: bool) -> Hir {
+        if bytes {
+            let mut cls = ClassBytes::empty();
+            cls.push(ClassBytesRange::new(b'\0', b'\xFF'));
+            Hir::class(Class::Bytes(cls))
+        } else {
+            let mut cls = ClassUnicode::empty();
+            cls.push(ClassUnicodeRange::new('\0', '\u{10FFFF}'));
+            Hir::class(Class::Unicode(cls))
+        }
+    }
+
+    /// Return true if and only if this HIR will always match valid UTF-8.
+    ///
+    /// When this returns false, then it is possible for this HIR expression
+    /// to match invalid UTF-8.
+    pub fn is_always_utf8(&self) -> bool {
+        self.info.is_always_utf8()
+    }
+
+    /// Returns true if and only if this entire HIR expression is made up of
+    /// zero-width assertions.
+    ///
+    /// This includes expressions like `^$\b\A\z` and even `((\b)+())*^`, but
+    /// not `^a`.
+    pub fn is_all_assertions(&self) -> bool {
+        self.info.is_all_assertions()
+    }
+
+    /// Return true if and only if this HIR is required to match from the
+    /// beginning of text. This includes expressions like `^foo`, `^(foo|bar)`,
+    /// `^foo|^bar` but not `^foo|bar`.
+    pub fn is_anchored_start(&self) -> bool {
+        self.info.is_anchored_start()
+    }
+
+    /// Return true if and only if this HIR is required to match at the end
+    /// of text. This includes expressions like `foo$`, `(foo|bar)$`,
+    /// `foo$|bar$` but not `foo$|bar`.
+    pub fn is_anchored_end(&self) -> bool {
+        self.info.is_anchored_end()
+    }
+
+    /// Return true if and only if this HIR contains any sub-expression that
+    /// is required to match at the beginning of text. Specifically, this
+    /// returns true if the `^` symbol (when multiline mode is disabled) or the
+    /// `\A` escape appear anywhere in the regex.
+    pub fn is_any_anchored_start(&self) -> bool {
+        self.info.is_any_anchored_start()
+    }
+
+    /// Return true if and only if this HIR contains any sub-expression that is
+    /// required to match at the end of text. Specifically, this returns true
+    /// if the `$` symbol (when multiline mode is disabled) or the `\z` escape
+    /// appear anywhere in the regex.
+    pub fn is_any_anchored_end(&self) -> bool {
+        self.info.is_any_anchored_end()
+    }
+
+    /// Return true if and only if the empty string is part of the language
+    /// matched by this regular expression.
+    ///
+    /// This includes `a*`, `a?b*`, `a{0}`, `()`, `()+`, `^$`, `a|b?`, `\B`,
+    /// but not `a`, `a+` or `\b`.
+    pub fn is_match_empty(&self) -> bool {
+        self.info.is_match_empty()
+    }
+}
+
+impl HirKind {
+    /// Return true if and only if this HIR is the empty regular expression.
+    ///
+    /// Note that this is not defined inductively. That is, it only tests if
+    /// this kind is the `Empty` variant. To get the inductive definition,
+    /// use the `is_match_empty` method on [`Hir`](struct.Hir.html).
+    pub fn is_empty(&self) -> bool {
+        match *self {
+            HirKind::Empty => true,
+            _ => false,
+        }
+    }
+
+    /// Returns true if and only if this kind has any (including possibly
+    /// empty) subexpressions.
+    pub fn has_subexprs(&self) -> bool {
+        match *self {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Anchor(_)
+            | HirKind::WordBoundary(_) => false,
+            HirKind::Group(_)
+            | HirKind::Repetition(_)
+            | HirKind::Concat(_)
+            | HirKind::Alternation(_) => true,
+        }
+    }
+}
+
+/// Print a display representation of this Hir.
+///
+/// The result of this is a valid regular expression pattern string.
+///
+/// This implementation uses constant stack space and heap space proportional
+/// to the size of the `Hir`.
+impl fmt::Display for Hir {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use hir::print::Printer;
+        Printer::new().print(self, f)
+    }
+}
+
+/// The high-level intermediate representation of a literal.
+///
+/// A literal corresponds to a single character, where a character is either
+/// defined by a Unicode scalar value or an arbitrary byte. Unicode characters
+/// are preferred whenever possible. In particular, a `Byte` variant is only
+/// ever produced when it could match invalid UTF-8.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Literal {
+    /// A single character represented by a Unicode scalar value.
+    Unicode(char),
+    /// A single character represented by an arbitrary byte.
+    Byte(u8),
+}
+
+impl Literal {
+    /// Returns true if and only if this literal corresponds to a Unicode
+    /// scalar value.
+    pub fn is_unicode(&self) -> bool {
+        match *self {
+            Literal::Unicode(_) => true,
+            Literal::Byte(b) if b <= 0x7F => true,
+            Literal::Byte(_) => false,
+        }
+    }
+}
+
+/// The high-level intermediate representation of a character class.
+///
+/// A character class corresponds to a set of characters. A character is either
+/// defined by a Unicode scalar value or a byte. Unicode characters are used
+/// by default, while bytes are used when Unicode mode (via the `u` flag) is
+/// disabled.
+///
+/// A character class, regardless of its character type, is represented by a
+/// sequence of non-overlapping non-adjacent ranges of characters.
+///
+/// Note that unlike [`Literal`](enum.Literal.html), a `Bytes` variant may
+/// be produced even when it exclusively matches valid UTF-8. This is because
+/// a `Bytes` variant represents an intention by the author of the regular
+/// expression to disable Unicode mode, which in turn impacts the semantics of
+/// case insensitive matching. For example, `(?i)k` and `(?i-u)k` will not
+/// match the same set of strings.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Class {
+    /// A set of characters represented by Unicode scalar values.
+    Unicode(ClassUnicode),
+    /// A set of characters represented by arbitrary bytes (one byte per
+    /// character).
+    Bytes(ClassBytes),
+}
+
+impl Class {
+    /// Apply Unicode simple case folding to this character class, in place.
+    /// The character class will be expanded to include all simple case folded
+    /// character variants.
+    ///
+    /// If this is a byte oriented character class, then this will be limited
+    /// to the ASCII ranges `A-Z` and `a-z`.
+    pub fn case_fold_simple(&mut self) {
+        match *self {
+            Class::Unicode(ref mut x) => x.case_fold_simple(),
+            Class::Bytes(ref mut x) => x.case_fold_simple(),
+        }
+    }
+
+    /// Negate this character class in place.
+    ///
+    /// After completion, this character class will contain precisely the
+    /// characters that weren't previously in the class.
+    pub fn negate(&mut self) {
+        match *self {
+            Class::Unicode(ref mut x) => x.negate(),
+            Class::Bytes(ref mut x) => x.negate(),
+        }
+    }
+
+    /// Returns true if and only if this character class will only ever match
+    /// valid UTF-8.
+    ///
+    /// A character class can match invalid UTF-8 only when the following
+    /// conditions are met:
+    ///
+    /// 1. The translator was configured to permit generating an expression
+    ///    that can match invalid UTF-8. (By default, this is disabled.)
+    /// 2. Unicode mode (via the `u` flag) was disabled either in the concrete
+    ///    syntax or in the parser builder. By default, Unicode mode is
+    ///    enabled.
+    pub fn is_always_utf8(&self) -> bool {
+        match *self {
+            Class::Unicode(_) => true,
+            Class::Bytes(ref x) => x.is_all_ascii(),
+        }
+    }
+}
+
+/// A set of characters represented by Unicode scalar values.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassUnicode {
+    set: IntervalSet<ClassUnicodeRange>,
+}
+
+impl ClassUnicode {
+    /// Create a new class from a sequence of ranges.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<I>(ranges: I) -> ClassUnicode
+    where I: IntoIterator<Item=ClassUnicodeRange>
+    {
+        ClassUnicode { set: IntervalSet::new(ranges) }
+    }
+
+    /// Create a new class with no ranges.
+    pub fn empty() -> ClassUnicode {
+        ClassUnicode::new(vec![])
+    }
+
+    /// Add a new range to this set.
+    pub fn push(&mut self, range: ClassUnicodeRange) {
+        self.set.push(range);
+    }
+
+    /// Return an iterator over all ranges in this class.
+    ///
+    /// The iterator yields ranges in ascending order.
+    pub fn iter(&self) -> ClassUnicodeIter {
+        ClassUnicodeIter(self.set.iter())
+    }
+
+    /// Return the underlying ranges as a slice.
+    pub fn ranges(&self) -> &[ClassUnicodeRange] {
+        self.set.intervals()
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters, according to Unicode's "simple" mapping. For example, if
+    /// this class consists of the range `a-z`, then applying case folding will
+    /// result in the class containing both the ranges `a-z` and `A-Z`.
+    pub fn case_fold_simple(&mut self) {
+        self.set.case_fold_simple();
+    }
+
+    /// Negate this character class.
+    ///
+    /// For all `c` where `c` is a Unicode scalar value, if `c` was in this
+    /// set, then it will not be in this set after negation.
+    pub fn negate(&mut self) {
+        self.set.negate();
+    }
+
+    /// Union this character class with the given character class, in place.
+    pub fn union(&mut self, other: &ClassUnicode) {
+        self.set.union(&other.set);
+    }
+
+    /// Intersect this character class with the given character class, in
+    /// place.
+    pub fn intersect(&mut self, other: &ClassUnicode) {
+        self.set.intersect(&other.set);
+    }
+
+    /// Subtract the given character class from this character class, in place.
+    pub fn difference(&mut self, other: &ClassUnicode) {
+        self.set.difference(&other.set);
+    }
+
+    /// Compute the symmetric difference of the given character classes, in
+    /// place.
+    ///
+    /// This computes the symmetric difference of two character classes. This
+    /// removes all elements in this class that are also in the given class,
+    /// but all adds all elements from the given class that aren't in this
+    /// class. That is, the class will contain all elements in either class,
+    /// but will not contain any elements that are in both classes.
+    pub fn symmetric_difference(&mut self, other: &ClassUnicode) {
+        self.set.symmetric_difference(&other.set);
+    }
+}
+
+/// An iterator over all ranges in a Unicode character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassUnicodeIter<'a>(IntervalSetIter<'a, ClassUnicodeRange>);
+
+impl<'a> Iterator for ClassUnicodeIter<'a> {
+    type Item = &'a ClassUnicodeRange;
+
+    fn next(&mut self) -> Option<&'a ClassUnicodeRange> {
+        self.0.next()
+    }
+}
+
+/// A single range of characters represented by Unicode scalar values.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassUnicodeRange {
+    start: char,
+    end: char,
+}
+
+impl fmt::Debug for ClassUnicodeRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let start =
+            if !self.start.is_whitespace() && !self.start.is_control() {
+                self.start.to_string()
+            } else {
+                format!("0x{:X}", self.start as u32)
+            };
+        let end =
+            if !self.end.is_whitespace() && !self.end.is_control() {
+                self.end.to_string()
+            } else {
+                format!("0x{:X}", self.end as u32)
+            };
+        f.debug_struct("ClassUnicodeRange")
+         .field("start", &start)
+         .field("end", &end)
+         .finish()
+    }
+}
+
+impl Interval for ClassUnicodeRange {
+    type Bound = char;
+
+    #[inline] fn lower(&self) -> char { self.start }
+    #[inline] fn upper(&self) -> char { self.end }
+    #[inline] fn set_lower(&mut self, bound: char) { self.start = bound; }
+    #[inline] fn set_upper(&mut self, bound: char) { self.end = bound; }
+
+    /// Apply simple case folding to this Unicode scalar value range.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn case_fold_simple(&self, ranges: &mut Vec<ClassUnicodeRange>) {
+        if !unicode::contains_simple_case_mapping(self.start, self.end) {
+            return;
+        }
+        let start = self.start as u32;
+        let end = (self.end as u32).saturating_add(1);
+        let mut next_simple_cp = None;
+        for cp in (start..end).filter_map(char::from_u32) {
+            if next_simple_cp.map_or(false, |next| cp < next) {
+                continue;
+            }
+            let it = match unicode::simple_fold(cp) {
+                Ok(it) => it,
+                Err(next) => {
+                    next_simple_cp = next;
+                    continue;
+                }
+            };
+            for cp_folded in it {
+                ranges.push(ClassUnicodeRange::new(cp_folded, cp_folded));
+            }
+        }
+    }
+}
+
+impl ClassUnicodeRange {
+    /// Create a new Unicode scalar value range for a character class.
+    ///
+    /// The returned range is always in a canonical form. That is, the range
+    /// returned always satisfies the invariant that `start <= end`.
+    pub fn new(start: char, end: char) -> ClassUnicodeRange {
+        ClassUnicodeRange::create(start, end)
+    }
+
+    /// Return the start of this range.
+    ///
+    /// The start of a range is always less than or equal to the end of the
+    /// range.
+    pub fn start(&self) -> char {
+        self.start
+    }
+
+    /// Return the end of this range.
+    ///
+    /// The end of a range is always greater than or equal to the start of the
+    /// range.
+    pub fn end(&self) -> char {
+        self.end
+    }
+}
+
+/// A set of characters represented by arbitrary bytes (where one byte
+/// corresponds to one character).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct ClassBytes {
+    set: IntervalSet<ClassBytesRange>,
+}
+
+impl ClassBytes {
+    /// Create a new class from a sequence of ranges.
+    ///
+    /// The given ranges do not need to be in any specific order, and ranges
+    /// may overlap.
+    pub fn new<I>(ranges: I) -> ClassBytes
+    where I: IntoIterator<Item=ClassBytesRange>
+    {
+        ClassBytes { set: IntervalSet::new(ranges) }
+    }
+
+    /// Create a new class with no ranges.
+    pub fn empty() -> ClassBytes {
+        ClassBytes::new(vec![])
+    }
+
+    /// Add a new range to this set.
+    pub fn push(&mut self, range: ClassBytesRange) {
+        self.set.push(range);
+    }
+
+    /// Return an iterator over all ranges in this class.
+    ///
+    /// The iterator yields ranges in ascending order.
+    pub fn iter(&self) -> ClassBytesIter {
+        ClassBytesIter(self.set.iter())
+    }
+
+    /// Return the underlying ranges as a slice.
+    pub fn ranges(&self) -> &[ClassBytesRange] {
+        self.set.intervals()
+    }
+
+    /// Expand this character class such that it contains all case folded
+    /// characters. For example, if this class consists of the range `a-z`,
+    /// then applying case folding will result in the class containing both the
+    /// ranges `a-z` and `A-Z`.
+    ///
+    /// Note that this only applies ASCII case folding, which is limited to the
+    /// characters `a-z` and `A-Z`.
+    pub fn case_fold_simple(&mut self) {
+        self.set.case_fold_simple();
+    }
+
+    /// Negate this byte class.
+    ///
+    /// For all `b` where `b` is a any byte, if `b` was in this set, then it
+    /// will not be in this set after negation.
+    pub fn negate(&mut self) {
+        self.set.negate();
+    }
+
+    /// Union this byte class with the given byte class, in place.
+    pub fn union(&mut self, other: &ClassBytes) {
+        self.set.union(&other.set);
+    }
+
+    /// Intersect this byte class with the given byte class, in place.
+    pub fn intersect(&mut self, other: &ClassBytes) {
+        self.set.intersect(&other.set);
+    }
+
+    /// Subtract the given byte class from this byte class, in place.
+    pub fn difference(&mut self, other: &ClassBytes) {
+        self.set.difference(&other.set);
+    }
+
+    /// Compute the symmetric difference of the given byte classes, in place.
+    ///
+    /// This computes the symmetric difference of two byte classes. This
+    /// removes all elements in this class that are also in the given class,
+    /// but all adds all elements from the given class that aren't in this
+    /// class. That is, the class will contain all elements in either class,
+    /// but will not contain any elements that are in both classes.
+    pub fn symmetric_difference(&mut self, other: &ClassBytes) {
+        self.set.symmetric_difference(&other.set);
+    }
+
+    /// Returns true if and only if this character class will either match
+    /// nothing or only ASCII bytes. Stated differently, this returns false
+    /// if and only if this class contains a non-ASCII byte.
+    pub fn is_all_ascii(&self) -> bool {
+        self.set.intervals().last().map_or(true, |r| r.end <= 0x7F)
+    }
+}
+
+/// An iterator over all ranges in a byte character class.
+///
+/// The lifetime `'a` refers to the lifetime of the underlying class.
+#[derive(Debug)]
+pub struct ClassBytesIter<'a>(IntervalSetIter<'a, ClassBytesRange>);
+
+impl<'a> Iterator for ClassBytesIter<'a> {
+    type Item = &'a ClassBytesRange;
+
+    fn next(&mut self) -> Option<&'a ClassBytesRange> {
+        self.0.next()
+    }
+}
+
+/// A single range of characters represented by arbitrary bytes.
+///
+/// The range is closed. That is, the start and end of the range are included
+/// in the range.
+#[derive(Clone, Copy, Default, Eq, PartialEq, PartialOrd, Ord)]
+pub struct ClassBytesRange {
+    start: u8,
+    end: u8,
+}
+
+impl Interval for ClassBytesRange {
+    type Bound = u8;
+
+    #[inline] fn lower(&self) -> u8 { self.start }
+    #[inline] fn upper(&self) -> u8 { self.end }
+    #[inline] fn set_lower(&mut self, bound: u8) { self.start = bound; }
+    #[inline] fn set_upper(&mut self, bound: u8) { self.end = bound; }
+
+    /// Apply simple case folding to this byte range. Only ASCII case mappings
+    /// (for a-z) are applied.
+    ///
+    /// Additional ranges are appended to the given vector. Canonical ordering
+    /// is *not* maintained in the given vector.
+    fn case_fold_simple(&self, ranges: &mut Vec<ClassBytesRange>) {
+        if !ClassBytesRange::new(b'a', b'z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'a');
+            let upper = cmp::min(self.end, b'z');
+            ranges.push(ClassBytesRange::new(lower - 32, upper - 32));
+        }
+        if !ClassBytesRange::new(b'A', b'Z').is_intersection_empty(self) {
+            let lower = cmp::max(self.start, b'A');
+            let upper = cmp::min(self.end, b'Z');
+            ranges.push(ClassBytesRange::new(lower + 32, upper + 32));
+        }
+    }
+}
+
+impl ClassBytesRange {
+    /// Create a new byte range for a character class.
+    ///
+    /// The returned range is always in a canonical form. That is, the range
+    /// returned always satisfies the invariant that `start <= end`.
+    pub fn new(start: u8, end: u8) -> ClassBytesRange {
+        ClassBytesRange::create(start, end)
+    }
+
+    /// Return the start of this range.
+    ///
+    /// The start of a range is always less than or equal to the end of the
+    /// range.
+    pub fn start(&self) -> u8 {
+        self.start
+    }
+
+    /// Return the end of this range.
+    ///
+    /// The end of a range is always greater than or equal to the start of the
+    /// range.
+    pub fn end(&self) -> u8 {
+        self.end
+    }
+}
+
+impl fmt::Debug for ClassBytesRange {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let mut debug = f.debug_struct("ClassBytesRange");
+        if self.start <= 0x7F {
+            debug.field("start", &(self.start as char));
+        } else {
+            debug.field("start", &self.start);
+        }
+        if self.end <= 0x7F {
+            debug.field("end", &(self.end as char));
+        } else {
+            debug.field("end", &self.end);
+        }
+        debug.finish()
+    }
+}
+
+/// The high-level intermediate representation for an anchor assertion.
+///
+/// A matching anchor assertion is always zero-length.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum Anchor {
+    /// Match the beginning of a line or the beginning of text. Specifically,
+    /// this matches at the starting position of the input, or at the position
+    /// immediately following a `\n` character.
+    StartLine,
+    /// Match the end of a line or the end of text. Specifically,
+    /// this matches at the end position of the input, or at the position
+    /// immediately preceding a `\n` character.
+    EndLine,
+    /// Match the beginning of text. Specifically, this matches at the starting
+    /// position of the input.
+    StartText,
+    /// Match the end of text. Specifically, this matches at the ending
+    /// position of the input.
+    EndText,
+}
+
+/// The high-level intermediate representation for a word-boundary assertion.
+///
+/// A matching word boundary assertion is always zero-length.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum WordBoundary {
+    /// Match a Unicode-aware word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    Unicode,
+    /// Match a Unicode-aware negation of a word boundary.
+    UnicodeNegate,
+    /// Match an ASCII-only word boundary. That is, this matches a position
+    /// where the left adjacent character and right adjacent character
+    /// correspond to a word and non-word or a non-word and word character.
+    Ascii,
+    /// Match an ASCII-only negation of a word boundary.
+    AsciiNegate,
+}
+
+impl WordBoundary {
+    /// Returns true if and only if this word boundary assertion is negated.
+    pub fn is_negated(&self) -> bool {
+        match *self {
+            WordBoundary::Unicode |  WordBoundary::Ascii => false,
+            WordBoundary::UnicodeNegate |  WordBoundary::AsciiNegate => true,
+        }
+    }
+}
+
+/// The high-level intermediate representation for a group.
+///
+/// This represents one of three possible group types:
+///
+/// 1. A non-capturing group (e.g., `(?:expr)`).
+/// 2. A capturing group (e.g., `(expr)`).
+/// 3. A named capturing group (e.g., `(?P<name>expr)`).
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Group {
+    /// The kind of this group. If it is a capturing group, then the kind
+    /// contains the capture group index (and the name, if it is a named
+    /// group).
+    pub kind: GroupKind,
+    /// The expression inside the capturing group, which may be empty.
+    pub hir: Box<Hir>,
+}
+
+/// The kind of group.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum GroupKind {
+    /// A normal unnamed capturing group.
+    ///
+    /// The value is the capture index of the group.
+    CaptureIndex(u32),
+    /// A named capturing group.
+    CaptureName {
+        /// The name of the group.
+        name: String,
+        /// The capture index of the group.
+        index: u32,
+    },
+    /// A non-capturing group.
+    NonCapturing,
+}
+
+/// The high-level intermediate representation of a repetition operator.
+///
+/// A repetition operator permits the repetition of an arbitrary
+/// sub-expression.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct Repetition {
+    /// The kind of this repetition operator.
+    pub kind: RepetitionKind,
+    /// Whether this repetition operator is greedy or not. A greedy operator
+    /// will match as much as it can. A non-greedy operator will match as
+    /// little as it can.
+    ///
+    /// Typically, operators are greedy by default and are only non-greedy when
+    /// a `?` suffix is used, e.g., `(expr)*` is greedy while `(expr)*?` is
+    /// not. However, this can be inverted via the `U` "ungreedy" flag.
+    pub greedy: bool,
+    /// The expression being repeated.
+    pub hir: Box<Hir>,
+}
+
+impl Repetition {
+    /// Returns true if and only if this repetition operator makes it possible
+    /// to match the empty string.
+    ///
+    /// Note that this is not defined inductively. For example, while `a*`
+    /// will report `true`, `()+` will not, even though `()` matches the empty
+    /// string and one or more occurrences of something that matches the empty
+    /// string will always match the empty string. In order to get the
+    /// inductive definition, see the corresponding method on
+    /// [`Hir`](struct.Hir.html).
+    pub fn is_match_empty(&self) -> bool {
+        match self.kind {
+            RepetitionKind::ZeroOrOne => true,
+            RepetitionKind::ZeroOrMore => true,
+            RepetitionKind::OneOrMore => false,
+            RepetitionKind::Range(RepetitionRange::Exactly(m)) => m == 0,
+            RepetitionKind::Range(RepetitionRange::AtLeast(m)) => m == 0,
+            RepetitionKind::Range(RepetitionRange::Bounded(m, _)) => m == 0,
+        }
+    }
+}
+
+/// The kind of a repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionKind {
+    /// Matches a sub-expression zero or one times.
+    ZeroOrOne,
+    /// Matches a sub-expression zero or more times.
+    ZeroOrMore,
+    /// Matches a sub-expression one or more times.
+    OneOrMore,
+    /// Matches a sub-expression within a bounded range of times.
+    Range(RepetitionRange),
+}
+
+/// The kind of a counted repetition operator.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum RepetitionRange {
+    /// Matches a sub-expression exactly this many times.
+    Exactly(u32),
+    /// Matches a sub-expression at least this many times.
+    AtLeast(u32),
+    /// Matches a sub-expression at least `m` times and at most `n` times.
+    Bounded(u32, u32),
+}
+
+/// A custom `Drop` impl is used for `HirKind` such that it uses constant stack
+/// space but heap space proportional to the depth of the total `Hir`.
+impl Drop for Hir {
+    fn drop(&mut self) {
+        use std::mem;
+
+        match *self.kind() {
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Anchor(_)
+            | HirKind::WordBoundary(_) => return,
+            HirKind::Group(ref x) if !x.hir.kind.has_subexprs() => return,
+            HirKind::Repetition(ref x) if !x.hir.kind.has_subexprs() => return,
+            HirKind::Concat(ref x) if x.is_empty() => return,
+            HirKind::Alternation(ref x) if x.is_empty() => return,
+            _ => {}
+        }
+
+        let mut stack = vec![mem::replace(self, Hir::empty())];
+        while let Some(mut expr) = stack.pop() {
+            match expr.kind {
+                HirKind::Empty
+                | HirKind::Literal(_)
+                | HirKind::Class(_)
+                | HirKind::Anchor(_)
+                | HirKind::WordBoundary(_) => {}
+                HirKind::Group(ref mut x) => {
+                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
+                }
+                HirKind::Repetition(ref mut x) => {
+                    stack.push(mem::replace(&mut x.hir, Hir::empty()));
+                }
+                HirKind::Concat(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+                HirKind::Alternation(ref mut x) => {
+                    stack.extend(x.drain(..));
+                }
+            }
+        }
+    }
+}
+
+/// A type that documents various attributes of an HIR expression.
+///
+/// These attributes are typically defined inductively on the HIR.
+#[derive(Clone, Debug, Eq, PartialEq)]
+struct HirInfo {
+    /// Represent yes/no questions by a bitfield to conserve space, since
+    /// this is included in every HIR expression.
+    ///
+    /// If more attributes need to be added, it is OK to increase the size of
+    /// this as appropriate.
+    bools: u8,
+}
+
+// A simple macro for defining bitfield accessors/mutators.
+macro_rules! define_bool {
+    ($bit:expr, $is_fn_name:ident, $set_fn_name:ident) => {
+        fn $is_fn_name(&self) -> bool {
+            self.bools & (0b1 << $bit) > 0
+        }
+
+        fn $set_fn_name(&mut self, yes: bool) {
+            if yes {
+                self.bools |= 1 << $bit;
+            } else {
+                self.bools &= !(1 << $bit);
+            }
+        }
+    }
+}
+
+impl HirInfo {
+    fn new() -> HirInfo {
+        HirInfo {
+            bools: 0,
+        }
+    }
+
+    define_bool!(0, is_always_utf8, set_always_utf8);
+    define_bool!(1, is_all_assertions, set_all_assertions);
+    define_bool!(2, is_anchored_start, set_anchored_start);
+    define_bool!(3, is_anchored_end, set_anchored_end);
+    define_bool!(4, is_any_anchored_start, set_any_anchored_start);
+    define_bool!(5, is_any_anchored_end, set_any_anchored_end);
+    define_bool!(6, is_match_empty, set_match_empty);
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn uclass(ranges: &[(char, char)]) -> ClassUnicode {
+        let ranges: Vec<ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| ClassUnicodeRange::new(s, e))
+            .collect();
+        ClassUnicode::new(ranges)
+    }
+
+    fn bclass(ranges: &[(u8, u8)]) -> ClassBytes {
+        let ranges: Vec<ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| ClassBytesRange::new(s, e))
+            .collect();
+        ClassBytes::new(ranges)
+    }
+
+    fn uranges(cls: &ClassUnicode) -> Vec<(char, char)> {
+        cls.iter().map(|x| (x.start(), x.end())).collect()
+    }
+
+    fn ucasefold(cls: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls.clone();
+        cls_.case_fold_simple();
+        cls_
+    }
+
+    fn uunion(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.union(cls2);
+        cls_
+    }
+
+    fn uintersect(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.intersect(cls2);
+        cls_
+    }
+
+    fn udifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.difference(cls2);
+        cls_
+    }
+
+    fn usymdifference(cls1: &ClassUnicode, cls2: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls1.clone();
+        cls_.symmetric_difference(cls2);
+        cls_
+    }
+
+    fn unegate(cls: &ClassUnicode) -> ClassUnicode {
+        let mut cls_ = cls.clone();
+        cls_.negate();
+        cls_
+    }
+
+    fn branges(cls: &ClassBytes) -> Vec<(u8, u8)> {
+        cls.iter().map(|x| (x.start(), x.end())).collect()
+    }
+
+    fn bcasefold(cls: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls.clone();
+        cls_.case_fold_simple();
+        cls_
+    }
+
+    fn bunion(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.union(cls2);
+        cls_
+    }
+
+    fn bintersect(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.intersect(cls2);
+        cls_
+    }
+
+    fn bdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.difference(cls2);
+        cls_
+    }
+
+    fn bsymdifference(cls1: &ClassBytes, cls2: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls1.clone();
+        cls_.symmetric_difference(cls2);
+        cls_
+    }
+
+    fn bnegate(cls: &ClassBytes) -> ClassBytes {
+        let mut cls_ = cls.clone();
+        cls_.negate();
+        cls_
+    }
+
+    #[test]
+    fn class_range_canonical_unicode() {
+        let range = ClassUnicodeRange::new('\u{00FF}', '\0');
+        assert_eq!('\0', range.start());
+        assert_eq!('\u{00FF}', range.end());
+    }
+
+    #[test]
+    fn class_range_canonical_bytes() {
+        let range = ClassBytesRange::new(b'\xFF', b'\0');
+        assert_eq!(b'\0', range.start());
+        assert_eq!(b'\xFF', range.end());
+    }
+
+    #[test]
+    fn class_canonicalize_unicode() {
+        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+        let expected = vec![('a', 'c'), ('x', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('a', 'c')]);
+        let expected = vec![('a', 'c'), ('x', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('w', 'y')]);
+        let expected = vec![('w', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[
+            ('c', 'f'), ('a', 'g'), ('d', 'j'), ('a', 'c'),
+            ('m', 'p'), ('l', 's'),
+        ]);
+        let expected = vec![('a', 'j'), ('l', 's')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('x', 'z'), ('u', 'w')]);
+        let expected = vec![('u', 'z')];
+        assert_eq!(expected, uranges(&cls));
+
+        let cls = uclass(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
+        let expected = vec![('\x00', '\u{10FFFF}')];
+        assert_eq!(expected, uranges(&cls));
+
+
+        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+        let expected = vec![('a', 'b')];
+        assert_eq!(expected, uranges(&cls));
+    }
+
+    #[test]
+    fn class_canonicalize_bytes() {
+        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+        let expected = vec![(b'a', b'c'), (b'x', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'a', b'c')]);
+        let expected = vec![(b'a', b'c'), (b'x', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'w', b'y')]);
+        let expected = vec![(b'w', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[
+            (b'c', b'f'), (b'a', b'g'), (b'd', b'j'), (b'a', b'c'),
+            (b'm', b'p'), (b'l', b's'),
+        ]);
+        let expected = vec![(b'a', b'j'), (b'l', b's')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'x', b'z'), (b'u', b'w')]);
+        let expected = vec![(b'u', b'z')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFF'), (b'\x00', b'\xFF')]);
+        let expected = vec![(b'\x00', b'\xFF')];
+        assert_eq!(expected, branges(&cls));
+
+        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+        let expected = vec![(b'a', b'b')];
+        assert_eq!(expected, branges(&cls));
+    }
+
+    #[test]
+    fn class_case_fold_unicode() {
+        let cls = uclass(&[
+            ('C', 'F'), ('A', 'G'), ('D', 'J'), ('A', 'C'),
+            ('M', 'P'), ('L', 'S'), ('c', 'f'),
+        ]);
+        let expected = uclass(&[
+            ('A', 'J'), ('L', 'S'),
+            ('a', 'j'), ('l', 's'),
+            ('\u{17F}', '\u{17F}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'Z')]);
+        let expected = uclass(&[
+            ('A', 'Z'), ('a', 'z'),
+            ('\u{17F}', '\u{17F}'),
+            ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('a', 'z')]);
+        let expected = uclass(&[
+            ('A', 'Z'), ('a', 'z'),
+            ('\u{17F}', '\u{17F}'),
+            ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'A'), ('_', '_')]);
+        let expected = uclass(&[('A', 'A'), ('_', '_'), ('a', 'a')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('A', 'A'), ('=', '=')]);
+        let expected = uclass(&[('=', '='), ('A', 'A'), ('a', 'a')]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('\x00', '\x10')]);
+        assert_eq!(cls, ucasefold(&cls));
+
+        let cls = uclass(&[('k', 'k')]);
+        let expected = uclass(&[
+            ('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),
+        ]);
+        assert_eq!(expected, ucasefold(&cls));
+
+        let cls = uclass(&[('@', '@')]);
+        assert_eq!(cls, ucasefold(&cls));
+    }
+
+    #[test]
+    fn class_case_fold_bytes() {
+        let cls = bclass(&[
+            (b'C', b'F'), (b'A', b'G'), (b'D', b'J'), (b'A', b'C'),
+            (b'M', b'P'), (b'L', b'S'), (b'c', b'f'),
+        ]);
+        let expected = bclass(&[
+            (b'A', b'J'), (b'L', b'S'),
+            (b'a', b'j'), (b'l', b's'),
+        ]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'Z')]);
+        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[(b'A', b'Z'), (b'a', b'z')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
+        let expected = bclass(&[(b'A', b'A'), (b'_', b'_'), (b'a', b'a')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
+        let expected = bclass(&[(b'=', b'='), (b'A', b'A'), (b'a', b'a')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\x10')]);
+        assert_eq!(cls, bcasefold(&cls));
+
+        let cls = bclass(&[(b'k', b'k')]);
+        let expected = bclass(&[(b'K', b'K'), (b'k', b'k')]);
+        assert_eq!(expected, bcasefold(&cls));
+
+        let cls = bclass(&[(b'@', b'@')]);
+        assert_eq!(cls, bcasefold(&cls));
+    }
+
+    #[test]
+    fn class_negate_unicode() {
+        let cls = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('\x00', '\x60'), ('\x62', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', 'a'), ('b', 'b')]);
+        let expected = uclass(&[('\x00', '\x60'), ('\x63', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', 'c'), ('x', 'z')]);
+        let expected = uclass(&[
+            ('\x00', '\x60'), ('\x64', '\x77'), ('\x7B', '\u{10FFFF}'),
+        ]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', 'a')]);
+        let expected = uclass(&[('\x62', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('a', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\x60')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{10FFFF}')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[]);
+        let expected = uclass(&[('\x00', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[
+            ('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}'),
+        ]);
+        let expected = uclass(&[('\u{10FFFE}', '\u{10FFFE}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{D7FF}')]);
+        let expected = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\x00', '\u{D7FE}')]);
+        let expected = uclass(&[('\u{D7FF}', '\u{10FFFF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\u{E000}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\u{D7FF}')]);
+        assert_eq!(expected, unegate(&cls));
+
+        let cls = uclass(&[('\u{E001}', '\u{10FFFF}')]);
+        let expected = uclass(&[('\x00', '\u{E000}')]);
+        assert_eq!(expected, unegate(&cls));
+    }
+
+    #[test]
+    fn class_negate_bytes() {
+        let cls = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x62', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'a'), (b'b', b'b')]);
+        let expected = bclass(&[(b'\x00', b'\x60'), (b'\x63', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'c'), (b'x', b'z')]);
+        let expected = bclass(&[
+            (b'\x00', b'\x60'), (b'\x64', b'\x77'), (b'\x7B', b'\xFF'),
+        ]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'a')]);
+        let expected = bclass(&[(b'\x62', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'a', b'\xFF')]);
+        let expected = bclass(&[(b'\x00', b'\x60')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFF')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[]);
+        let expected = bclass(&[(b'\x00', b'\xFF')]);
+        assert_eq!(expected, bnegate(&cls));
+
+        let cls = bclass(&[(b'\x00', b'\xFD'), (b'\xFF', b'\xFF')]);
+        let expected = bclass(&[(b'\xFE', b'\xFE')]);
+        assert_eq!(expected, bnegate(&cls));
+    }
+
+    #[test]
+    fn class_union_unicode() {
+        let cls1 = uclass(&[('a', 'g'), ('m', 't'), ('A', 'C')]);
+        let cls2 = uclass(&[('a', 'z')]);
+        let expected = uclass(&[('a', 'z'), ('A', 'C')]);
+        assert_eq!(expected, uunion(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_union_bytes() {
+        let cls1 = bclass(&[(b'a', b'g'), (b'm', b't'), (b'A', b'C')]);
+        let cls2 = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[(b'a', b'z'), (b'A', b'C')]);
+        assert_eq!(expected, bunion(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_intersect_unicode() {
+        let cls1 = uclass(&[]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('b', 'b')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'c')]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('a', 'c')]);
+        let expected = uclass(&[('a', 'b')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('b', 'c')]);
+        let expected = uclass(&[('b', 'b')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b')]);
+        let cls2 = uclass(&[('c', 'd')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('b', 'c')]);
+        let cls2 = uclass(&[('a', 'd')]);
+        let expected = uclass(&[('b', 'c')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('a', 'h')]);
+        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let expected = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('g', 'h')]);
+        let cls2 = uclass(&[('d', 'e'), ('k', 'l')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
+        let cls2 = uclass(&[('h', 'h')]);
+        let expected = uclass(&[('h', 'h')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
+        let cls2 = uclass(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
+        let cls2 = uclass(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
+        let expected = uclass(&[('b', 'f')]);
+        assert_eq!(expected, uintersect(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_intersect_bytes() {
+        let cls1 = bclass(&[]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'b', b'b')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'c')]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'a', b'c')]);
+        let expected = bclass(&[(b'a', b'b')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'b', b'c')]);
+        let expected = bclass(&[(b'b', b'b')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b')]);
+        let cls2 = bclass(&[(b'c', b'd')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'b', b'c')]);
+        let cls2 = bclass(&[(b'a', b'd')]);
+        let expected = bclass(&[(b'b', b'c')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'a', b'h')]);
+        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let expected = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'd', b'e'), (b'k', b'l')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'd', b'e'), (b'g', b'h')]);
+        let cls2 = bclass(&[(b'h', b'h')]);
+        let expected = bclass(&[(b'h', b'h')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'e', b'f'), (b'i', b'j')]);
+        let cls2 = bclass(&[(b'c', b'd'), (b'g', b'h'), (b'k', b'l')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'b'), (b'c', b'd'), (b'e', b'f')]);
+        let cls2 = bclass(&[(b'b', b'c'), (b'd', b'e'), (b'f', b'g')]);
+        let expected = bclass(&[(b'b', b'f')]);
+        assert_eq!(expected, bintersect(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_difference_unicode() {
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'a')]);
+        let cls2 = uclass(&[]);
+        let expected = uclass(&[('a', 'a')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('a', 'a')]);
+        let expected = uclass(&[('b', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('z', 'z')]);
+        let expected = uclass(&[('a', 'y')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('m', 'm')]);
+        let expected = uclass(&[('a', 'l'), ('n', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('a', 'z')]);
+        let expected = uclass(&[]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('d', 'v')]);
+        let expected = uclass(&[('a', 'c')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('b', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'c'), ('g', 'i'), ('r', 't')]);
+        let cls2 = uclass(&[('b', 'd'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('a', 'a'), ('h', 'i'), ('r', 'r')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('x', 'z')]);
+        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('x', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+
+        let cls1 = uclass(&[('a', 'z')]);
+        let cls2 = uclass(&[('a', 'c'), ('e', 'g'), ('s', 'u')]);
+        let expected = uclass(&[('d', 'd'), ('h', 'r'), ('v', 'z')]);
+        assert_eq!(expected, udifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_difference_bytes() {
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'a')]);
+        let cls2 = bclass(&[]);
+        let expected = bclass(&[(b'a', b'a')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'a', b'a')]);
+        let expected = bclass(&[(b'b', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'z', b'z')]);
+        let expected = bclass(&[(b'a', b'y')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'm', b'm')]);
+        let expected = bclass(&[(b'a', b'l'), (b'n', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'a', b'z')]);
+        let expected = bclass(&[]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'd', b'v')]);
+        let expected = bclass(&[(b'a', b'c')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'b', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'c'), (b'g', b'i'), (b'r', b't')]);
+        let cls2 = bclass(&[(b'b', b'd'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'a', b'a'), (b'h', b'i'), (b'r', b'r')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'x', b'z')]);
+        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'x', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+
+        let cls1 = bclass(&[(b'a', b'z')]);
+        let cls2 = bclass(&[(b'a', b'c'), (b'e', b'g'), (b's', b'u')]);
+        let expected = bclass(&[(b'd', b'd'), (b'h', b'r'), (b'v', b'z')]);
+        assert_eq!(expected, bdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_symmetric_difference_unicode() {
+        let cls1 = uclass(&[('a', 'm')]);
+        let cls2 = uclass(&[('g', 't')]);
+        let expected = uclass(&[('a', 'f'), ('n', 't')]);
+        assert_eq!(expected, usymdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    fn class_symmetric_difference_bytes() {
+        let cls1 = bclass(&[(b'a', b'm')]);
+        let cls2 = bclass(&[(b'g', b't')]);
+        let expected = bclass(&[(b'a', b'f'), (b'n', b't')]);
+        assert_eq!(expected, bsymdifference(&cls1, &cls2));
+    }
+
+    #[test]
+    #[should_panic]
+    fn hir_byte_literal_non_ascii() {
+        Hir::literal(Literal::Byte(b'a'));
+    }
+
+    // We use a thread with an explicit stack size to test that our destructor
+    // for Hir can handle arbitrarily sized expressions in constant stack
+    // space. In case we run on a platform without threads (WASM?), we limit
+    // this test to Windows/Unix.
+    #[test]
+    #[cfg(any(unix, windows))]
+    fn no_stack_overflow_on_drop() {
+        use std::thread;
+
+        let run = || {
+            let mut expr = Hir::empty();
+            for _ in 0..100 {
+                expr = Hir::group(Group {
+                    kind: GroupKind::NonCapturing,
+                    hir: Box::new(expr),
+                });
+                expr = Hir::repetition(Repetition {
+                    kind: RepetitionKind::ZeroOrOne,
+                    greedy: true,
+                    hir: Box::new(expr),
+                });
+
+                expr = Hir {
+                    kind: HirKind::Concat(vec![expr]),
+                    info: HirInfo::new(),
+                };
+                expr = Hir {
+                    kind: HirKind::Alternation(vec![expr]),
+                    info: HirInfo::new(),
+                };
+            }
+            assert!(!expr.kind.is_empty());
+        };
+
+        // We run our test on a thread with a small stack size so we can
+        // force the issue more easily.
+        thread::Builder::new()
+            .stack_size(1<<10)
+            .spawn(run)
+            .unwrap()
+            .join()
+            .unwrap();
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/hir/print.rs
@@ -0,0 +1,359 @@
+/*!
+This module provides a regular expression printer for `Hir`.
+*/
+
+use std::fmt;
+
+use hir::{self, Hir, HirKind};
+use hir::visitor::{self, Visitor};
+use is_meta_character;
+
+/// A builder for constructing a printer.
+///
+/// Note that since a printer doesn't have any configuration knobs, this type
+/// remains unexported.
+#[derive(Clone, Debug)]
+struct PrinterBuilder {
+    _priv: (),
+}
+
+impl Default for PrinterBuilder {
+    fn default() -> PrinterBuilder {
+        PrinterBuilder::new()
+    }
+}
+
+impl PrinterBuilder {
+    fn new() -> PrinterBuilder {
+        PrinterBuilder {
+            _priv: (),
+        }
+    }
+
+    fn build(&self) -> Printer {
+        Printer {
+            _priv: (),
+        }
+    }
+}
+
+/// A printer for a regular expression's high-level intermediate
+/// representation.
+///
+/// A printer converts a high-level intermediate representation (HIR) to a
+/// regular expression pattern string. This particular printer uses constant
+/// stack space and heap space proportional to the size of the HIR.
+///
+/// Since this printer is only using the HIR, the pattern it prints will likely
+/// not resemble the original pattern at all. For example, a pattern like
+/// `\pL` will have its entire class written out.
+///
+/// The purpose of this printer is to provide a means to mutate an HIR and then
+/// build a regular expression from the result of that mutation. (A regex
+/// library could provide a constructor from this HIR explicitly, but that
+/// creates an unnecessary public coupling between the regex library and this
+/// specific HIR representation.)
+#[derive(Debug)]
+pub struct Printer {
+    _priv: (),
+}
+
+impl Printer {
+    /// Create a new printer.
+    pub fn new() -> Printer {
+        PrinterBuilder::new().build()
+    }
+
+    /// Print the given `Ast` to the given writer. The writer must implement
+    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
+    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
+    /// implementations) or a `&mut String`.
+    pub fn print<W: fmt::Write>(&mut self, hir: &Hir, wtr: W) -> fmt::Result {
+        visitor::visit(hir, Writer { printer: self, wtr: wtr })
+    }
+}
+
+#[derive(Debug)]
+struct Writer<'p, W> {
+    printer: &'p mut Printer,
+    wtr: W,
+}
+
+impl<'p, W: fmt::Write> Visitor for Writer<'p, W> {
+    type Output = ();
+    type Err = fmt::Error;
+
+    fn finish(self) -> fmt::Result {
+        Ok(())
+    }
+
+    fn visit_pre(&mut self, hir: &Hir) -> fmt::Result {
+        match *hir.kind() {
+            HirKind::Empty
+            | HirKind::Repetition(_)
+            | HirKind::Concat(_)
+            | HirKind::Alternation(_) => {}
+            HirKind::Literal(hir::Literal::Unicode(c)) => {
+                self.write_literal_char(c)?;
+            }
+            HirKind::Literal(hir::Literal::Byte(b)) => {
+                self.write_literal_byte(b)?;
+            }
+            HirKind::Class(hir::Class::Unicode(ref cls)) => {
+                self.wtr.write_str("[")?;
+                for range in cls.iter() {
+                    if range.start() == range.end() {
+                        self.write_literal_char(range.start())?;
+                    } else {
+                        self.write_literal_char(range.start())?;
+                        self.wtr.write_str("-")?;
+                        self.write_literal_char(range.end())?;
+                    }
+                }
+                self.wtr.write_str("]")?;
+            }
+            HirKind::Class(hir::Class::Bytes(ref cls)) => {
+                self.wtr.write_str("(?-u:[")?;
+                for range in cls.iter() {
+                    if range.start() == range.end() {
+                        self.write_literal_class_byte(range.start())?;
+                    } else {
+                        self.write_literal_class_byte(range.start())?;
+                        self.wtr.write_str("-")?;
+                        self.write_literal_class_byte(range.end())?;
+                    }
+                }
+                self.wtr.write_str("])")?;
+            }
+            HirKind::Anchor(hir::Anchor::StartLine) => {
+                self.wtr.write_str("(?m:^)")?;
+            }
+            HirKind::Anchor(hir::Anchor::EndLine) => {
+                self.wtr.write_str("(?m:$)")?;
+            }
+            HirKind::Anchor(hir::Anchor::StartText) => {
+                self.wtr.write_str(r"\A")?;
+            }
+            HirKind::Anchor(hir::Anchor::EndText) => {
+                self.wtr.write_str(r"\z")?;
+            }
+            HirKind::WordBoundary(hir::WordBoundary::Unicode) => {
+                self.wtr.write_str(r"\b")?;
+            }
+            HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => {
+                self.wtr.write_str(r"\B")?;
+            }
+            HirKind::WordBoundary(hir::WordBoundary::Ascii) => {
+                self.wtr.write_str(r"(?-u:\b)")?;
+            }
+            HirKind::WordBoundary(hir::WordBoundary::AsciiNegate) => {
+                self.wtr.write_str(r"(?-u:\B)")?;
+            }
+            HirKind::Group(ref x) => {
+                match x.kind {
+                    hir::GroupKind::CaptureIndex(_) => {
+                        self.wtr.write_str("(")?;
+                    }
+                    hir::GroupKind::CaptureName { ref name, .. } => {
+                        write!(self.wtr, "(?P<{}>", name)?;
+                    }
+                    hir::GroupKind::NonCapturing => {
+                        self.wtr.write_str("(?:")?;
+                    }
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_post(&mut self, hir: &Hir) -> fmt::Result {
+        match *hir.kind() {
+            // Handled during visit_pre
+            HirKind::Empty
+            | HirKind::Literal(_)
+            | HirKind::Class(_)
+            | HirKind::Anchor(_)
+            | HirKind::WordBoundary(_)
+            | HirKind::Concat(_)
+            | HirKind::Alternation(_) => {}
+            HirKind::Repetition(ref x) => {
+                match x.kind {
+                    hir::RepetitionKind::ZeroOrOne => {
+                        self.wtr.write_str("?")?;
+                    }
+                    hir::RepetitionKind::ZeroOrMore => {
+                        self.wtr.write_str("*")?;
+                    }
+                    hir::RepetitionKind::OneOrMore => {
+                        self.wtr.write_str("+")?;
+                    }
+                    hir::RepetitionKind::Range(ref x) => {
+                        match *x {
+                            hir::RepetitionRange::Exactly(m) => {
+                                write!(self.wtr, "{{{}}}", m)?;
+                            }
+                            hir::RepetitionRange::AtLeast(m) => {
+                                write!(self.wtr, "{{{},}}", m)?;
+                            }
+                            hir::RepetitionRange::Bounded(m, n) => {
+                                write!(self.wtr, "{{{},{}}}", m, n)?;
+                            }
+                        }
+                    }
+                }
+                if !x.greedy {
+                    self.wtr.write_str("?")?;
+                }
+            }
+            HirKind::Group(_) => {
+                self.wtr.write_str(")")?;
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_alternation_in(&mut self) -> fmt::Result {
+        self.wtr.write_str("|")
+    }
+}
+
+impl<'p, W: fmt::Write> Writer<'p, W> {
+    fn write_literal_char(&mut self, c: char) -> fmt::Result {
+        if is_meta_character(c) {
+            self.wtr.write_str("\\")?;
+        }
+        self.wtr.write_char(c)
+    }
+
+    fn write_literal_byte(&mut self, b: u8) -> fmt::Result {
+        let c = b as char;
+        if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
+            self.wtr.write_char(c)
+        } else {
+            write!(self.wtr, "(?-u:\\x{:02X})", b)
+        }
+    }
+
+    fn write_literal_class_byte(&mut self, b: u8) -> fmt::Result {
+        let c = b as char;
+        if c <= 0x7F as char && !c.is_control() && !c.is_whitespace() {
+            self.wtr.write_char(c)
+        } else {
+            write!(self.wtr, "\\x{:02X}", b)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ParserBuilder;
+    use super::Printer;
+
+    fn roundtrip(given: &str, expected: &str) {
+        roundtrip_with(|b| b, given, expected);
+    }
+
+    fn roundtrip_bytes(given: &str, expected: &str) {
+        roundtrip_with(|b| b.allow_invalid_utf8(true), given, expected);
+    }
+
+    fn roundtrip_with<F>(mut f: F, given: &str, expected: &str)
+    where F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder
+    {
+        let mut builder = ParserBuilder::new();
+        f(&mut builder);
+        let hir = builder.build().parse(given).unwrap();
+
+        let mut printer = Printer::new();
+        let mut dst = String::new();
+        printer.print(&hir, &mut dst).unwrap();
+        assert_eq!(expected, dst);
+    }
+
+    #[test]
+    fn print_literal() {
+        roundtrip("a", "a");
+        roundtrip(r"\xff", "\u{FF}");
+        roundtrip_bytes(r"\xff", "\u{FF}");
+        roundtrip_bytes(r"(?-u)\xff", r"(?-u:\xFF)");
+        roundtrip("☃", "☃");
+    }
+
+    #[test]
+    fn print_class() {
+        roundtrip(r"[a]", r"[a]");
+        roundtrip(r"[a-z]", r"[a-z]");
+        roundtrip(r"[a-z--b-c--x-y]", r"[ad-wz]");
+        roundtrip(r"[^\x01-\u{10FFFF}]", "[\u{0}]");
+        roundtrip(r"[-]", r"[\-]");
+        roundtrip(r"[☃-⛄]", r"[☃-⛄]");
+
+        roundtrip(r"(?-u)[a]", r"(?-u:[a])");
+        roundtrip(r"(?-u)[a-z]", r"(?-u:[a-z])");
+        roundtrip_bytes(r"(?-u)[a-\xFF]", r"(?-u:[a-\xFF])");
+    }
+
+    #[test]
+    fn print_anchor() {
+        roundtrip(r"^", r"\A");
+        roundtrip(r"$", r"\z");
+        roundtrip(r"(?m)^", r"(?m:^)");
+        roundtrip(r"(?m)$", r"(?m:$)");
+    }
+
+    #[test]
+    fn print_word_boundary() {
+        roundtrip(r"\b", r"\b");
+        roundtrip(r"\B", r"\B");
+        roundtrip(r"(?-u)\b", r"(?-u:\b)");
+        roundtrip_bytes(r"(?-u)\B", r"(?-u:\B)");
+    }
+
+    #[test]
+    fn print_repetition() {
+        roundtrip("a?", "a?");
+        roundtrip("a??", "a??");
+        roundtrip("(?U)a?", "a??");
+
+        roundtrip("a*", "a*");
+        roundtrip("a*?", "a*?");
+        roundtrip("(?U)a*", "a*?");
+
+        roundtrip("a+", "a+");
+        roundtrip("a+?", "a+?");
+        roundtrip("(?U)a+", "a+?");
+
+        roundtrip("a{1}", "a{1}");
+        roundtrip("a{1,}", "a{1,}");
+        roundtrip("a{1,5}", "a{1,5}");
+        roundtrip("a{1}?", "a{1}?");
+        roundtrip("a{1,}?", "a{1,}?");
+        roundtrip("a{1,5}?", "a{1,5}?");
+        roundtrip("(?U)a{1}", "a{1}?");
+        roundtrip("(?U)a{1,}", "a{1,}?");
+        roundtrip("(?U)a{1,5}", "a{1,5}?");
+    }
+
+    #[test]
+    fn print_group() {
+        roundtrip("()", "()");
+        roundtrip("(?P<foo>)", "(?P<foo>)");
+        roundtrip("(?:)", "(?:)");
+
+        roundtrip("(a)", "(a)");
+        roundtrip("(?P<foo>a)", "(?P<foo>a)");
+        roundtrip("(?:a)", "(?:a)");
+
+        roundtrip("((((a))))", "((((a))))");
+    }
+
+    #[test]
+    fn print_alternation() {
+        roundtrip("|", "|");
+        roundtrip("||", "||");
+
+        roundtrip("a|b", "a|b");
+        roundtrip("a|b|c", "a|b|c");
+        roundtrip("foo|bar|quux", "foo|bar|quux");
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/hir/translate.rs
@@ -0,0 +1,2532 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+Defines a translator that converts an `Ast` to an `Hir`.
+*/
+
+use std::cell::{Cell, RefCell};
+use std::result;
+
+use ast::{self, Ast, Span, Visitor};
+use hir::{self, Error, ErrorKind, Hir};
+use unicode::{self, ClassQuery};
+
+type Result<T> = result::Result<T, Error>;
+
+/// A builder for constructing an AST->HIR translator.
+#[derive(Clone, Debug)]
+pub struct TranslatorBuilder {
+    allow_invalid_utf8: bool,
+    flags: Flags,
+}
+
+impl Default for TranslatorBuilder {
+    fn default() -> TranslatorBuilder {
+        TranslatorBuilder::new()
+    }
+}
+
+impl TranslatorBuilder {
+    /// Create a new translator builder with a default c onfiguration.
+    pub fn new() -> TranslatorBuilder {
+        TranslatorBuilder {
+            allow_invalid_utf8: false,
+            flags: Flags::default(),
+        }
+    }
+
+    /// Build a translator using the current configuration.
+    pub fn build(&self) -> Translator {
+        Translator {
+            stack: RefCell::new(vec![]),
+            flags: Cell::new(self.flags),
+            allow_invalid_utf8: self.allow_invalid_utf8,
+        }
+    }
+
+    /// When enabled, translation will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// When disabled (the default), the translator is guaranteed to produce
+    /// an expression that will only ever match valid UTF-8 (otherwise, the
+    /// translator will return an error).
+    ///
+    /// Note that currently, even when invalid UTF-8 is banned, the translator
+    /// will permit a negated ASCII word boundary (i.e., `(?-u:\B)`) even
+    /// though it can actually match at invalid UTF-8 boundaries. This bug
+    /// will be fixed on the next semver release.
+    pub fn allow_invalid_utf8(
+        &mut self,
+        yes: bool,
+    ) -> &mut TranslatorBuilder {
+        self.allow_invalid_utf8 = yes;
+        self
+    }
+
+    /// Enable or disable the case insensitive flag (`i`) by default.
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.case_insensitive = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the multi-line matching flag (`m`) by default.
+    pub fn multi_line(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.multi_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "dot matches any character" flag (`s`) by
+    /// default.
+    pub fn dot_matches_new_line(
+        &mut self,
+        yes: bool,
+    ) -> &mut TranslatorBuilder {
+        self.flags.dot_matches_new_line = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the "swap greed" flag (`U`) by default.
+    pub fn swap_greed(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.swap_greed = if yes { Some(true) } else { None };
+        self
+    }
+
+    /// Enable or disable the Unicode flag (`u`) by default.
+    pub fn unicode(&mut self, yes: bool) -> &mut TranslatorBuilder {
+        self.flags.unicode = if yes { None } else { Some(false) };
+        self
+    }
+}
+
+/// A translator maps abstract syntax to a high level intermediate
+/// representation.
+///
+/// A translator may be benefit from reuse. That is, a translator can translate
+/// many abstract syntax trees.
+///
+/// A `Translator` can be configured in more detail via a
+/// [`TranslatorBuilder`](struct.TranslatorBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Translator {
+    /// Our call stack, but on the heap.
+    stack: RefCell<Vec<HirFrame>>,
+    /// The current flag settings.
+    flags: Cell<Flags>,
+    /// Whether we're allowed to produce HIR that can match arbitrary bytes.
+    allow_invalid_utf8: bool,
+}
+
+impl Translator {
+    /// Create a new translator using the default configuration.
+    pub fn new() -> Translator {
+        TranslatorBuilder::new().build()
+    }
+
+    /// Translate the given abstract syntax tree (AST) into a high level
+    /// intermediate representation (HIR).
+    ///
+    /// If there was a problem doing the translation, then an HIR-specific
+    /// error is returned.
+    ///
+    /// The original pattern string used to produce the `Ast` *must* also be
+    /// provided. The translator does not use the pattern string during any
+    /// correct translation, but is used for error reporting.
+    pub fn translate(&mut self, pattern: &str, ast: &Ast) -> Result<Hir> {
+        ast::visit(ast, TranslatorI::new(self, pattern))
+    }
+}
+
+/// An HirFrame is a single stack frame, represented explicitly, which is
+/// created for each item in the Ast that we traverse.
+///
+/// Note that technically, this type doesn't represent our entire stack
+/// frame. In particular, the Ast visitor represents any state associated with
+/// traversing the Ast itself.
+#[derive(Clone, Debug)]
+enum HirFrame {
+    /// An arbitrary HIR expression. These get pushed whenever we hit a base
+    /// case in the Ast. They get popped after an inductive (i.e., recursive)
+    /// step is complete.
+    Expr(Hir),
+    /// A Unicode character class. This frame is mutated as we descend into
+    /// the Ast of a character class (which is itself its own mini recursive
+    /// structure).
+    ClassUnicode(hir::ClassUnicode),
+    /// A byte-oriented character class. This frame is mutated as we descend
+    /// into the Ast of a character class (which is itself its own mini
+    /// recursive structure).
+    ///
+    /// Byte character classes are created when Unicode mode (`u`) is disabled.
+    /// If `allow_invalid_utf8` is disabled (the default), then a byte
+    /// character is only permitted to match ASCII text.
+    ClassBytes(hir::ClassBytes),
+    /// This is pushed on to the stack upon first seeing any kind of group,
+    /// indicated by parentheses (including non-capturing groups). It is popped
+    /// upon leaving a group.
+    Group {
+        /// The old active flags, if any, when this group was opened.
+        ///
+        /// If this group sets flags, then the new active flags are set to the
+        /// result of merging the old flags with the flags introduced by this
+        /// group.
+        ///
+        /// When this group is popped, the active flags should be restored to
+        /// the flags set here.
+        ///
+        /// The "active" flags correspond to whatever flags are set in the
+        /// Translator.
+        old_flags: Option<Flags>,
+    },
+    /// This is pushed whenever a concatenation is observed. After visiting
+    /// every sub-expression in the concatenation, the translator's stack is
+    /// popped until it sees a Concat frame.
+    Concat,
+    /// This is pushed whenever an alternation is observed. After visiting
+    /// every sub-expression in the alternation, the translator's stack is
+    /// popped until it sees an Alternation frame.
+    Alternation,
+}
+
+impl HirFrame {
+    /// Assert that the current stack frame is an Hir expression and return it.
+    fn unwrap_expr(self) -> Hir {
+        match self {
+            HirFrame::Expr(expr) => expr,
+            _ => panic!("tried to unwrap expr from HirFrame, got: {:?}", self)
+        }
+    }
+
+    /// Assert that the current stack frame is a Unicode class expression and
+    /// return it.
+    fn unwrap_class_unicode(self) -> hir::ClassUnicode {
+        match self {
+            HirFrame::ClassUnicode(cls) => cls,
+            _ => panic!("tried to unwrap Unicode class \
+                         from HirFrame, got: {:?}", self)
+        }
+    }
+
+    /// Assert that the current stack frame is a byte class expression and
+    /// return it.
+    fn unwrap_class_bytes(self) -> hir::ClassBytes {
+        match self {
+            HirFrame::ClassBytes(cls) => cls,
+            _ => panic!("tried to unwrap byte class \
+                         from HirFrame, got: {:?}", self)
+        }
+    }
+
+    /// Assert that the current stack frame is a group indicator and return
+    /// its corresponding flags (the flags that were active at the time the
+    /// group was entered) if they exist.
+    fn unwrap_group(self) -> Option<Flags> {
+        match self {
+            HirFrame::Group { old_flags } => old_flags,
+            _ => panic!("tried to unwrap group from HirFrame, got: {:?}", self)
+        }
+    }
+}
+
+impl<'t, 'p> Visitor for TranslatorI<'t, 'p> {
+    type Output = Hir;
+    type Err = Error;
+
+    fn finish(self) -> Result<Hir> {
+        if self.trans().stack.borrow().is_empty() {
+            // This can happen if the Ast given consists of a single set of
+            // flags. e.g., `(?i)`. /shrug
+            return Ok(Hir::empty());
+        }
+        // ... otherwise, we should have exactly one HIR on the stack.
+        assert_eq!(self.trans().stack.borrow().len(), 1);
+        Ok(self.pop().unwrap().unwrap_expr())
+    }
+
+    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Class(ast::Class::Bracketed(_)) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            Ast::Group(ref x) => {
+                let old_flags = x.flags().map(|ast| self.set_flags(ast));
+                self.push(HirFrame::Group {
+                    old_flags: old_flags,
+                });
+            }
+            Ast::Concat(ref x) if x.asts.is_empty() => {}
+            Ast::Concat(_) => {
+                self.push(HirFrame::Concat);
+            }
+            Ast::Alternation(ref x) if x.asts.is_empty() => {}
+            Ast::Alternation(_) => {
+                self.push(HirFrame::Alternation);
+            }
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
+        match *ast {
+            Ast::Empty(_) => {
+                self.push(HirFrame::Expr(Hir::empty()));
+            }
+            Ast::Flags(ref x) => {
+                self.set_flags(&x.flags);
+            }
+            Ast::Literal(ref x) => {
+                self.push(HirFrame::Expr(self.hir_literal(x)?));
+            }
+            Ast::Dot(span) => {
+                self.push(HirFrame::Expr(self.hir_dot(span)?));
+            }
+            Ast::Assertion(ref x) => {
+                self.push(HirFrame::Expr(self.hir_assertion(x)?));
+            }
+            Ast::Class(ast::Class::Perl(ref x)) => {
+                if self.flags().unicode() {
+                    let cls = self.hir_perl_unicode_class(x);
+                    let hcls = hir::Class::Unicode(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                } else {
+                    let cls = self.hir_perl_byte_class(x);
+                    let hcls = hir::Class::Bytes(cls);
+                    self.push(HirFrame::Expr(Hir::class(hcls)));
+                }
+            }
+            Ast::Class(ast::Class::Unicode(ref x)) => {
+                let cls = hir::Class::Unicode(self.hir_unicode_class(x)?);
+                self.push(HirFrame::Expr(Hir::class(cls)));
+            }
+            Ast::Class(ast::Class::Bracketed(ref ast)) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(ast.negated, &mut cls);
+                    if cls.iter().next().is_none() {
+                        return Err(self.error(
+                            ast.span, ErrorKind::EmptyClassNotAllowed));
+                    }
+                    let expr = Hir::class(hir::Class::Unicode(cls));
+                    self.push(HirFrame::Expr(expr));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    self.bytes_fold_and_negate(
+                        &ast.span, ast.negated, &mut cls)?;
+                    if cls.iter().next().is_none() {
+                        return Err(self.error(
+                            ast.span, ErrorKind::EmptyClassNotAllowed));
+                    }
+
+                    let expr = Hir::class(hir::Class::Bytes(cls));
+                    self.push(HirFrame::Expr(expr));
+                }
+            }
+            Ast::Repetition(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                self.push(HirFrame::Expr(self.hir_repetition(x, expr)));
+            }
+            Ast::Group(ref x) => {
+                let expr = self.pop().unwrap().unwrap_expr();
+                if let Some(flags) = self.pop().unwrap().unwrap_group() {
+                    self.trans().flags.set(flags);
+                }
+                self.push(HirFrame::Expr(self.hir_group(x, expr)));
+            }
+            Ast::Concat(_) => {
+                let mut exprs = vec![];
+                while let Some(HirFrame::Expr(expr)) = self.pop() {
+                    if !expr.kind().is_empty() {
+                        exprs.push(expr);
+                    }
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::concat(exprs)));
+            }
+            Ast::Alternation(_) => {
+                let mut exprs = vec![];
+                while let Some(HirFrame::Expr(expr)) = self.pop() {
+                    exprs.push(expr);
+                }
+                exprs.reverse();
+                self.push(HirFrame::Expr(Hir::alternation(exprs)));
+            }
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_item_pre(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Bracketed(_) => {
+                if self.flags().unicode() {
+                    let cls = hir::ClassUnicode::empty();
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let cls = hir::ClassBytes::empty();
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            // We needn't handle the Union case here since the visitor will
+            // do it for us.
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_item_post(
+        &mut self,
+        ast: &ast::ClassSetItem,
+    ) -> Result<()> {
+        match *ast {
+            ast::ClassSetItem::Empty(_) => {}
+            ast::ClassSetItem::Literal(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.c, x.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let byte = self.class_literal_byte(x)?;
+                    cls.push(hir::ClassBytesRange::new(byte, byte));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Range(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.push(hir::ClassUnicodeRange::new(x.start.c, x.end.c));
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    let start = self.class_literal_byte(&x.start)?;
+                    let end = self.class_literal_byte(&x.end)?;
+                    cls.push(hir::ClassBytesRange::new(start, end));
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Ascii(ref x) => {
+                if self.flags().unicode() {
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    for &(s, e) in ascii_class(&x.kind) {
+                        cls.push(hir::ClassUnicodeRange::new(s, e));
+                    }
+                    self.unicode_fold_and_negate(x.negated, &mut cls);
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    for &(s, e) in ascii_class(&x.kind) {
+                        cls.push(hir::ClassBytesRange::new(s as u8, e as u8));
+                    }
+                    self.bytes_fold_and_negate(
+                        &x.span, x.negated, &mut cls)?;
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Unicode(ref x) => {
+                let xcls = self.hir_unicode_class(x)?;
+                let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                cls.union(&xcls);
+                self.push(HirFrame::ClassUnicode(cls));
+            }
+            ast::ClassSetItem::Perl(ref x) => {
+                if self.flags().unicode() {
+                    let xcls = self.hir_perl_unicode_class(x);
+                    let mut cls = self.pop().unwrap().unwrap_class_unicode();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassUnicode(cls));
+                } else {
+                    let xcls = self.hir_perl_byte_class(x);
+                    let mut cls = self.pop().unwrap().unwrap_class_bytes();
+                    cls.union(&xcls);
+                    self.push(HirFrame::ClassBytes(cls));
+                }
+            }
+            ast::ClassSetItem::Bracketed(ref ast) => {
+                if self.flags().unicode() {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_unicode();
+                    self.unicode_fold_and_negate(ast.negated, &mut cls1);
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_unicode();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassUnicode(cls2));
+                } else {
+                    let mut cls1 = self.pop().unwrap().unwrap_class_bytes();
+                    self.bytes_fold_and_negate(
+                        &ast.span, ast.negated, &mut cls1)?;
+
+                    let mut cls2 = self.pop().unwrap().unwrap_class_bytes();
+                    cls2.union(&cls1);
+                    self.push(HirFrame::ClassBytes(cls2));
+                }
+            }
+            // This is handled automatically by the visitor.
+            ast::ClassSetItem::Union(_) => {}
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_pre(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_in(
+        &mut self,
+        _op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        if self.flags().unicode() {
+            let cls = hir::ClassUnicode::empty();
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let cls = hir::ClassBytes::empty();
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+
+    fn visit_class_set_binary_op_post(
+        &mut self,
+        op: &ast::ClassSetBinaryOp,
+    ) -> Result<()> {
+        use ast::ClassSetBinaryOpKind::*;
+
+        if self.flags().unicode() {
+            let mut rhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut lhs = self.pop().unwrap().unwrap_class_unicode();
+            let mut cls = self.pop().unwrap().unwrap_class_unicode();
+            if self.flags().case_insensitive() {
+                rhs.case_fold_simple();
+                lhs.case_fold_simple();
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassUnicode(cls));
+        } else {
+            let mut rhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut lhs = self.pop().unwrap().unwrap_class_bytes();
+            let mut cls = self.pop().unwrap().unwrap_class_bytes();
+            if self.flags().case_insensitive() {
+                rhs.case_fold_simple();
+                lhs.case_fold_simple();
+            }
+            match op.kind {
+                Intersection => lhs.intersect(&rhs),
+                Difference => lhs.difference(&rhs),
+                SymmetricDifference => lhs.symmetric_difference(&rhs),
+            }
+            cls.union(&lhs);
+            self.push(HirFrame::ClassBytes(cls));
+        }
+        Ok(())
+    }
+}
+
+/// The internal implementation of a translator.
+///
+/// This type is responsible for carrying around the original pattern string,
+/// which is not tied to the internal state of a translator.
+///
+/// A TranslatorI exists for the time it takes to translate a single Ast.
+#[derive(Clone, Debug)]
+struct TranslatorI<'t, 'p> {
+    trans: &'t Translator,
+    pattern: &'p str,
+}
+
+impl<'t, 'p> TranslatorI<'t, 'p> {
+    /// Build a new internal translator.
+    fn new(trans: &'t Translator, pattern: &'p str) -> TranslatorI<'t, 'p> {
+        TranslatorI { trans: trans, pattern: pattern }
+    }
+
+    /// Return a reference to the underlying translator.
+    fn trans(&self) -> &Translator {
+        &self.trans
+    }
+
+    /// Push the given frame on to the call stack.
+    fn push(&self, frame: HirFrame) {
+        self.trans().stack.borrow_mut().push(frame);
+    }
+
+    /// Pop the top of the call stack. If the call stack is empty, return None.
+    fn pop(&self) -> Option<HirFrame> {
+        self.trans().stack.borrow_mut().pop()
+    }
+
+    /// Create a new error with the given span and error type.
+    fn error(&self, span: Span, kind: ErrorKind) -> Error {
+        Error { kind: kind, pattern: self.pattern.to_string(), span: span }
+    }
+
+    /// Return a copy of the active flags.
+    fn flags(&self) -> Flags {
+        self.trans().flags.get()
+    }
+
+    /// Set the flags of this translator from the flags set in the given AST.
+    /// Then, return the old flags.
+    fn set_flags(&self, ast_flags: &ast::Flags) -> Flags {
+        let old_flags = self.flags();
+        let mut new_flags = Flags::from_ast(ast_flags);
+        new_flags.merge(&old_flags);
+        self.trans().flags.set(new_flags);
+        old_flags
+    }
+
+    fn hir_literal(&self, lit: &ast::Literal) -> Result<Hir> {
+        let ch = match self.literal_to_char(lit)? {
+            byte @ hir::Literal::Byte(_) => return Ok(Hir::literal(byte)),
+            hir::Literal::Unicode(ch) => ch,
+        };
+        if self.flags().case_insensitive() {
+            self.hir_from_char_case_insensitive(lit.span, ch)
+        } else {
+            self.hir_from_char(lit.span, ch)
+        }
+    }
+
+    /// Convert an Ast literal to its scalar representation.
+    ///
+    /// When Unicode mode is enabled, then this always succeeds and returns a
+    /// `char` (Unicode scalar value).
+    ///
+    /// When Unicode mode is disabled, then a raw byte is returned. If that
+    /// byte is not ASCII and invalid UTF-8 is not allowed, then this returns
+    /// an error.
+    fn literal_to_char(&self, lit: &ast::Literal) -> Result<hir::Literal> {
+        if self.flags().unicode() {
+            return Ok(hir::Literal::Unicode(lit.c));
+        }
+        let byte = match lit.byte() {
+            None => return Ok(hir::Literal::Unicode(lit.c)),
+            Some(byte) => byte,
+        };
+        if byte <= 0x7F {
+            return Ok(hir::Literal::Unicode(byte as char));
+        }
+        if !self.trans().allow_invalid_utf8 {
+            return Err(self.error(lit.span, ErrorKind::InvalidUtf8));
+        }
+        Ok(hir::Literal::Byte(byte))
+    }
+
+    fn hir_from_char(&self, span: Span, c: char) -> Result<Hir> {
+        if !self.flags().unicode() && c.len_utf8() > 1 {
+            return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+        }
+        Ok(Hir::literal(hir::Literal::Unicode(c)))
+    }
+
+    fn hir_from_char_case_insensitive(
+        &self,
+        span: Span,
+        c: char,
+    ) -> Result<Hir> {
+        // If case folding won't do anything, then don't bother trying.
+        if !unicode::contains_simple_case_mapping(c, c) {
+            return self.hir_from_char(span, c);
+        }
+        if self.flags().unicode() {
+            let mut cls = hir::ClassUnicode::new(vec![
+                hir::ClassUnicodeRange::new(c, c),
+            ]);
+            cls.case_fold_simple();
+            Ok(Hir::class(hir::Class::Unicode(cls)))
+        } else {
+            if c.len_utf8() > 1 {
+                return Err(self.error(span, ErrorKind::UnicodeNotAllowed));
+            }
+            let mut cls = hir::ClassBytes::new(vec![
+                hir::ClassBytesRange::new(c as u8, c as u8),
+            ]);
+            cls.case_fold_simple();
+            Ok(Hir::class(hir::Class::Bytes(cls)))
+        }
+    }
+
+    fn hir_dot(&self, span: Span) -> Result<Hir> {
+        let unicode = self.flags().unicode();
+        if !unicode && !self.trans().allow_invalid_utf8 {
+            return Err(self.error(span, ErrorKind::InvalidUtf8));
+        }
+        Ok(if self.flags().dot_matches_new_line() {
+            Hir::any(!unicode)
+        } else {
+            Hir::dot(!unicode)
+        })
+    }
+
+    fn hir_assertion(&self, asst: &ast::Assertion) -> Result<Hir> {
+        let unicode = self.flags().unicode();
+        let multi_line = self.flags().multi_line();
+        Ok(match asst.kind {
+            ast::AssertionKind::StartLine => {
+                Hir::anchor(if multi_line {
+                    hir::Anchor::StartLine
+                } else {
+                    hir::Anchor::StartText
+                })
+            }
+            ast::AssertionKind::EndLine => {
+                Hir::anchor(if multi_line {
+                    hir::Anchor::EndLine
+                } else {
+                    hir::Anchor::EndText
+                })
+            }
+            ast::AssertionKind::StartText => {
+                Hir::anchor(hir::Anchor::StartText)
+            }
+            ast::AssertionKind::EndText => {
+                Hir::anchor(hir::Anchor::EndText)
+            }
+            ast::AssertionKind::WordBoundary => {
+                Hir::word_boundary(if unicode {
+                    hir::WordBoundary::Unicode
+                } else {
+                    hir::WordBoundary::Ascii
+                })
+            }
+            ast::AssertionKind::NotWordBoundary => {
+                Hir::word_boundary(if unicode {
+                    hir::WordBoundary::UnicodeNegate
+                } else {
+                    // It is possible for negated ASCII word boundaries to
+                    // match at invalid UTF-8 boundaries, even when searching
+                    // valid UTF-8.
+                    if !self.trans().allow_invalid_utf8 {
+                        return Err(self.error(
+                            asst.span, ErrorKind::InvalidUtf8));
+                    }
+                    hir::WordBoundary::AsciiNegate
+                })
+            }
+        })
+    }
+
+    fn hir_group(&self, group: &ast::Group, expr: Hir) -> Hir {
+        let kind = match group.kind {
+            ast::GroupKind::CaptureIndex(idx) => {
+                hir::GroupKind::CaptureIndex(idx)
+            }
+            ast::GroupKind::CaptureName(ref capname) => {
+                hir::GroupKind::CaptureName {
+                    name: capname.name.clone(),
+                    index: capname.index,
+                }
+            }
+            ast::GroupKind::NonCapturing(_) => hir::GroupKind::NonCapturing,
+        };
+        Hir::group(hir::Group {
+            kind: kind,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_repetition(&self, rep: &ast::Repetition, expr: Hir) -> Hir {
+        let kind = match rep.op.kind {
+            ast::RepetitionKind::ZeroOrOne => hir::RepetitionKind::ZeroOrOne,
+            ast::RepetitionKind::ZeroOrMore => hir::RepetitionKind::ZeroOrMore,
+            ast::RepetitionKind::OneOrMore => hir::RepetitionKind::OneOrMore,
+            ast::RepetitionKind::Range(ast::RepetitionRange::Exactly(m)) => {
+                hir::RepetitionKind::Range(hir::RepetitionRange::Exactly(m))
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::AtLeast(m)) => {
+                hir::RepetitionKind::Range(hir::RepetitionRange::AtLeast(m))
+            }
+            ast::RepetitionKind::Range(ast::RepetitionRange::Bounded(m,n)) => {
+                hir::RepetitionKind::Range(hir::RepetitionRange::Bounded(m, n))
+            }
+        };
+        let greedy =
+            if self.flags().swap_greed() {
+                !rep.greedy
+            } else {
+                rep.greedy
+            };
+        Hir::repetition(hir::Repetition {
+            kind: kind,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_unicode_class(
+        &self,
+        ast_class: &ast::ClassUnicode,
+    ) -> Result<hir::ClassUnicode> {
+        use ast::ClassUnicodeKind::*;
+
+        if !self.flags().unicode() {
+            return Err(self.error(
+                ast_class.span,
+                ErrorKind::UnicodeNotAllowed,
+            ));
+        }
+        let query = match ast_class.kind {
+            OneLetter(name) => ClassQuery::OneLetter(name),
+            Named(ref name) => ClassQuery::Binary(name),
+            NamedValue { ref name, ref value, .. } => {
+                ClassQuery::ByValue {
+                    property_name: name,
+                    property_value: value,
+                }
+            }
+        };
+        match unicode::class(query) {
+            Ok(mut class) => {
+                self.unicode_fold_and_negate(ast_class.negated, &mut class);
+                Ok(class)
+            }
+            Err(unicode::Error::PropertyNotFound) => {
+                Err(self.error(
+                    ast_class.span,
+                    ErrorKind::UnicodePropertyNotFound,
+                ))
+            }
+            Err(unicode::Error::PropertyValueNotFound) => {
+                Err(self.error(
+                    ast_class.span,
+                    ErrorKind::UnicodePropertyValueNotFound,
+                ))
+            }
+        }
+    }
+
+    fn hir_perl_unicode_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> hir::ClassUnicode {
+        use ast::ClassPerlKind::*;
+        use unicode_tables::perl_word::PERL_WORD;
+
+        assert!(self.flags().unicode());
+        let mut class = match ast_class.kind {
+            Digit => {
+                let query = ClassQuery::Binary("Decimal_Number");
+                unicode::class(query).unwrap()
+            }
+            Space => {
+                let query = ClassQuery::Binary("Whitespace");
+                unicode::class(query).unwrap()
+            }
+            Word => unicode::hir_class(PERL_WORD),
+        };
+        // We needn't apply case folding here because the Perl Unicode classes
+        // are already closed under Unicode simple case folding.
+        if ast_class.negated {
+            class.negate();
+        }
+        class
+    }
+
+    fn hir_perl_byte_class(
+        &self,
+        ast_class: &ast::ClassPerl,
+    ) -> hir::ClassBytes {
+        use ast::ClassPerlKind::*;
+
+        assert!(!self.flags().unicode());
+        let mut class = match ast_class.kind {
+            Digit => hir_ascii_class_bytes(&ast::ClassAsciiKind::Digit),
+            Space => hir_ascii_class_bytes(&ast::ClassAsciiKind::Space),
+            Word => hir_ascii_class_bytes(&ast::ClassAsciiKind::Word),
+        };
+        // We needn't apply case folding here because the Perl ASCII classes
+        // are already closed (under ASCII case folding).
+        if ast_class.negated {
+            class.negate();
+        }
+        class
+    }
+
+    fn unicode_fold_and_negate(
+        &self,
+        negated: bool,
+        class: &mut hir::ClassUnicode,
+    ) {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation field, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.case_fold_simple();
+        }
+        if negated {
+            class.negate();
+        }
+    }
+
+    fn bytes_fold_and_negate(
+        &self,
+        span: &Span,
+        negated: bool,
+        class: &mut hir::ClassBytes,
+    ) -> Result<()> {
+        // Note that we must apply case folding before negation!
+        // Consider `(?i)[^x]`. If we applied negation field, then
+        // the result would be the character class that matched any
+        // Unicode scalar value.
+        if self.flags().case_insensitive() {
+            class.case_fold_simple();
+        }
+        if negated {
+            class.negate();
+        }
+        if !self.trans().allow_invalid_utf8 && !class.is_all_ascii() {
+            return Err(self.error(span.clone(), ErrorKind::InvalidUtf8));
+        }
+        Ok(())
+    }
+
+    /// Return a scalar byte value suitable for use as a literal in a byte
+    /// character class.
+    fn class_literal_byte(&self, ast: &ast::Literal) -> Result<u8> {
+        match self.literal_to_char(ast)? {
+            hir::Literal::Byte(byte) => Ok(byte),
+            hir::Literal::Unicode(ch) => {
+                if ch <= 0x7F as char {
+                    Ok(ch as u8)
+                } else {
+                    // We can't feasibly support Unicode in
+                    // byte oriented classes. Byte classes don't
+                    // do Unicode case folding.
+                    Err(self.error(ast.span, ErrorKind::UnicodeNotAllowed))
+                }
+            }
+        }
+    }
+}
+
+/// A translator's representation of a regular expression's flags at any given
+/// moment in time.
+///
+/// Each flag can be in one of three states: absent, present but disabled or
+/// present but enabled.
+#[derive(Clone, Copy, Debug, Default)]
+struct Flags {
+    case_insensitive: Option<bool>,
+    multi_line: Option<bool>,
+    dot_matches_new_line: Option<bool>,
+    swap_greed: Option<bool>,
+    unicode: Option<bool>,
+    // Note that `ignore_whitespace` is omitted here because it is handled
+    // entirely in the parser.
+}
+
+impl Flags {
+    fn from_ast(ast: &ast::Flags) -> Flags {
+        let mut flags = Flags::default();
+        let mut enable = true;
+        for item in &ast.items {
+            match item.kind {
+                ast::FlagsItemKind::Negation => {
+                    enable = false;
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive) => {
+                    flags.case_insensitive = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::MultiLine) => {
+                    flags.multi_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::DotMatchesNewLine) => {
+                    flags.dot_matches_new_line = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::SwapGreed) => {
+                    flags.swap_greed = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::Unicode) => {
+                    flags.unicode = Some(enable);
+                }
+                ast::FlagsItemKind::Flag(ast::Flag::IgnoreWhitespace) => {}
+            }
+        }
+        flags
+    }
+
+    fn merge(&mut self, previous: &Flags) {
+        if self.case_insensitive.is_none() {
+            self.case_insensitive = previous.case_insensitive;
+        }
+        if self.multi_line.is_none() {
+            self.multi_line = previous.multi_line;
+        }
+        if self.dot_matches_new_line.is_none() {
+            self.dot_matches_new_line = previous.dot_matches_new_line;
+        }
+        if self.swap_greed.is_none() {
+            self.swap_greed = previous.swap_greed;
+        }
+        if self.unicode.is_none() {
+            self.unicode = previous.unicode;
+        }
+    }
+
+    fn case_insensitive(&self) -> bool {
+        self.case_insensitive.unwrap_or(false)
+    }
+
+    fn multi_line(&self) -> bool {
+        self.multi_line.unwrap_or(false)
+    }
+
+    fn dot_matches_new_line(&self) -> bool {
+        self.dot_matches_new_line.unwrap_or(false)
+    }
+
+    fn swap_greed(&self) -> bool {
+        self.swap_greed.unwrap_or(false)
+    }
+
+    fn unicode(&self) -> bool {
+        self.unicode.unwrap_or(true)
+    }
+}
+
+fn hir_ascii_class_bytes(kind: &ast::ClassAsciiKind) -> hir::ClassBytes {
+    let ranges: Vec<_> = ascii_class(kind).iter().cloned().map(|(s, e)| {
+        hir::ClassBytesRange::new(s as u8, e as u8)
+    }).collect();
+    hir::ClassBytes::new(ranges)
+}
+
+fn ascii_class(kind: &ast::ClassAsciiKind) -> &'static [(char, char)] {
+    use ast::ClassAsciiKind::*;
+
+    // The contortions below with `const` appear necessary for older versions
+    // of Rust.
+    type T = &'static [(char, char)];
+    match *kind {
+        Alnum => {
+            const X: T = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
+            X
+        }
+        Alpha => {
+            const X: T = &[('A', 'Z'), ('a', 'z')];
+            X
+        }
+        Ascii => {
+            const X: T = &[('\x00', '\x7F')];
+            X
+        }
+        Blank => {
+            const X: T = &[(' ', '\t')];
+            X
+        }
+        Cntrl => {
+            const X: T = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
+            X
+        }
+        Digit => {
+            const X: T = &[('0', '9')];
+            X
+        }
+        Graph => {
+            const X: T = &[('!', '~')];
+            X
+        }
+        Lower => {
+            const X: T = &[('a', 'z')];
+            X
+        }
+        Print => {
+            const X: T = &[(' ', '~')];
+            X
+        }
+        Punct => {
+            const X: T = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
+            X
+        }
+        Space => {
+            const X: T = &[
+                ('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'), ('\x0C', '\x0C'),
+                ('\r', '\r'), (' ', ' '),
+            ];
+            X
+        }
+        Upper => {
+            const X: T = &[('A', 'Z')];
+            X
+        }
+        Word => {
+            const X: T = &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')];
+            X
+        }
+        Xdigit => {
+            const X: T = &[('0', '9'), ('A', 'F'), ('a', 'f')];
+            X
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use ast::{self, Ast, Position, Span};
+    use ast::parse::ParserBuilder;
+    use hir::{self, Hir, HirKind};
+    use unicode::{self, ClassQuery};
+
+    use super::{TranslatorBuilder, ascii_class};
+
+    // We create these errors to compare with real hir::Errors in the tests.
+    // We define equality between TestError and hir::Error to disregard the
+    // pattern string in hir::Error, which is annoying to provide in tests.
+    #[derive(Clone, Debug)]
+    struct TestError {
+        span: Span,
+        kind: hir::ErrorKind,
+    }
+
+    impl PartialEq<hir::Error> for TestError {
+        fn eq(&self, other: &hir::Error) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    impl PartialEq<TestError> for hir::Error {
+        fn eq(&self, other: &TestError) -> bool {
+            self.span == other.span && self.kind == other.kind
+        }
+    }
+
+    fn parse(pattern: &str) -> Ast {
+        ParserBuilder::new().octal(true).build().parse(pattern).unwrap()
+    }
+
+    fn t(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .allow_invalid_utf8(false)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn t_err(pattern: &str) -> hir::Error {
+        TranslatorBuilder::new()
+            .allow_invalid_utf8(false)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap_err()
+    }
+
+    fn t_bytes(pattern: &str) -> Hir {
+        TranslatorBuilder::new()
+            .allow_invalid_utf8(true)
+            .build()
+            .translate(pattern, &parse(pattern))
+            .unwrap()
+    }
+
+    fn hir_lit(s: &str) -> Hir {
+        match s.len() {
+            0 => Hir::empty(),
+            _ => {
+                let lits = s
+                    .chars()
+                    .map(hir::Literal::Unicode)
+                    .map(Hir::literal)
+                    .collect();
+                Hir::concat(lits)
+            }
+        }
+    }
+
+    fn hir_blit(s: &[u8]) -> Hir {
+        match s.len() {
+            0 => Hir::empty(),
+            1 => Hir::literal(hir::Literal::Byte(s[0])),
+            _ => {
+                let lits = s
+                    .iter()
+                    .cloned()
+                    .map(hir::Literal::Byte)
+                    .map(Hir::literal)
+                    .collect();
+                Hir::concat(lits)
+            }
+        }
+    }
+
+    fn hir_group(i: u32, expr: Hir)  -> Hir {
+        Hir::group(hir::Group {
+            kind: hir::GroupKind::CaptureIndex(i),
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_group_name(i: u32, name: &str, expr: Hir)  -> Hir {
+        Hir::group(hir::Group {
+            kind: hir::GroupKind::CaptureName {
+                name: name.to_string(),
+                index: i,
+            },
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_group_nocap(expr: Hir)  -> Hir {
+        Hir::group(hir::Group {
+            kind: hir::GroupKind::NonCapturing,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_quest(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrOne,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_star(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::ZeroOrMore,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_plus(greedy: bool, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::OneOrMore,
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_range(greedy: bool, range: hir::RepetitionRange, expr: Hir) -> Hir {
+        Hir::repetition(hir::Repetition {
+            kind: hir::RepetitionKind::Range(range),
+            greedy: greedy,
+            hir: Box::new(expr),
+        })
+    }
+
+    fn hir_alt(alts: Vec<Hir>) -> Hir {
+        Hir::alternation(alts)
+    }
+
+    fn hir_cat(exprs: Vec<Hir>) -> Hir {
+        Hir::concat(exprs)
+    }
+
+    fn hir_uclass_query(query: ClassQuery) -> Hir {
+        Hir::class(hir::Class::Unicode(unicode::class(query).unwrap()))
+    }
+
+    fn hir_uclass_perl_word() -> Hir {
+        use unicode_tables::perl_word::PERL_WORD;
+        Hir::class(hir::Class::Unicode(unicode::hir_class(PERL_WORD)))
+    }
+
+    fn hir_uclass(ranges: &[(char, char)]) -> Hir {
+        let ranges: Vec<hir::ClassUnicodeRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+            .collect();
+        Hir::class(hir::Class::Unicode(hir::ClassUnicode::new(ranges)))
+    }
+
+    fn hir_bclass(ranges: &[(u8, u8)]) -> Hir {
+        let ranges: Vec<hir::ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| hir::ClassBytesRange::new(s, e))
+            .collect();
+        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+    }
+
+    fn hir_bclass_from_char(ranges: &[(char, char)]) -> Hir {
+        let ranges: Vec<hir::ClassBytesRange> = ranges
+            .iter()
+            .map(|&(s, e)| {
+                assert!(s as u32 <= 0x7F);
+                assert!(e as u32 <= 0x7F);
+                hir::ClassBytesRange::new(s as u8, e as u8)
+            })
+            .collect();
+        Hir::class(hir::Class::Bytes(hir::ClassBytes::new(ranges)))
+    }
+
+    fn hir_case_fold(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.case_fold_simple();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot case fold non-class Hir expr"),
+        }
+    }
+
+    fn hir_negate(expr: Hir) -> Hir {
+        match expr.into_kind() {
+            HirKind::Class(mut cls) => {
+                cls.negate();
+                Hir::class(cls)
+            }
+            _ => panic!("cannot negate non-class Hir expr"),
+        }
+    }
+
+    fn hir_union(expr1: Hir, expr2: Hir) -> Hir {
+        use hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (
+                HirKind::Class(Unicode(mut c1)),
+                HirKind::Class(Unicode(c2)),
+            ) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (
+                HirKind::Class(Bytes(mut c1)),
+                HirKind::Class(Bytes(c2)),
+            ) => {
+                c1.union(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot union non-class Hir exprs"),
+        }
+    }
+
+    fn hir_difference(expr1: Hir, expr2: Hir) -> Hir {
+        use hir::Class::{Bytes, Unicode};
+
+        match (expr1.into_kind(), expr2.into_kind()) {
+            (
+                HirKind::Class(Unicode(mut c1)),
+                HirKind::Class(Unicode(c2)),
+            ) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Unicode(c1))
+            }
+            (
+                HirKind::Class(Bytes(mut c1)),
+                HirKind::Class(Bytes(c2)),
+            ) => {
+                c1.difference(&c2);
+                Hir::class(hir::Class::Bytes(c1))
+            }
+            _ => panic!("cannot difference non-class Hir exprs"),
+        }
+    }
+
+    fn hir_anchor(anchor: hir::Anchor) -> Hir {
+        Hir::anchor(anchor)
+    }
+
+    fn hir_word(wb: hir::WordBoundary) -> Hir {
+        Hir::word_boundary(wb)
+    }
+
+    #[test]
+    fn empty() {
+        assert_eq!(t(""), Hir::empty());
+        assert_eq!(t("(?i)"), Hir::empty());
+        assert_eq!(t("()"), hir_group(1, Hir::empty()));
+        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
+        assert_eq!(t("(?P<wat>)"), hir_group_name(1, "wat", Hir::empty()));
+        assert_eq!(t("|"), hir_alt(vec![Hir::empty(), Hir::empty()]));
+        assert_eq!(t("()|()"), hir_alt(vec![
+            hir_group(1, Hir::empty()),
+            hir_group(2, Hir::empty()),
+        ]));
+        assert_eq!(t("(|b)"), hir_group(1, hir_alt(vec![
+            Hir::empty(),
+            hir_lit("b"),
+        ])));
+        assert_eq!(t("(a|)"), hir_group(1, hir_alt(vec![
+            hir_lit("a"),
+            Hir::empty(),
+        ])));
+        assert_eq!(t("(a||c)"), hir_group(1, hir_alt(vec![
+            hir_lit("a"),
+            Hir::empty(),
+            hir_lit("c"),
+        ])));
+        assert_eq!(t("(||)"), hir_group(1, hir_alt(vec![
+            Hir::empty(),
+            Hir::empty(),
+            Hir::empty(),
+        ])));
+    }
+
+    #[test]
+    fn literal() {
+        assert_eq!(t("a"), hir_lit("a"));
+        assert_eq!(t("(?-u)a"), hir_lit("a"));
+        assert_eq!(t("☃"), hir_lit("☃"));
+        assert_eq!(t("abcd"), hir_lit("abcd"));
+
+        assert_eq!(t_bytes("(?-u)a"), hir_lit("a"));
+        assert_eq!(t_bytes("(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\x61"), hir_lit("a"));
+        assert_eq!(t_bytes(r"(?-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(t_err("(?-u)☃"), TestError {
+            kind: hir::ErrorKind::UnicodeNotAllowed,
+            span: Span::new(Position::new(5, 1, 6), Position::new(8, 1, 7)),
+        });
+        assert_eq!(t_err(r"(?-u)\xFF"), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(5, 1, 6), Position::new(9, 1, 10)),
+        });
+    }
+
+    #[test]
+    fn literal_case_insensitive() {
+        assert_eq!(t("(?i)a"), hir_uclass(&[
+            ('A', 'A'), ('a', 'a'),
+        ]));
+        assert_eq!(t("(?i:a)"), hir_group_nocap(hir_uclass(&[
+            ('A', 'A'), ('a', 'a')],
+        )));
+        assert_eq!(t("a(?i)a(?-i)a"), hir_cat(vec![
+            hir_lit("a"),
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_lit("a"),
+        ]));
+        assert_eq!(t("(?i)ab@c"), hir_cat(vec![
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_uclass(&[('B', 'B'), ('b', 'b')]),
+            hir_lit("@"),
+            hir_uclass(&[('C', 'C'), ('c', 'c')]),
+        ]));
+        assert_eq!(t("(?i)β"), hir_uclass(&[
+            ('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),
+        ]));
+
+        assert_eq!(t("(?i-u)a"), hir_bclass(&[
+            (b'A', b'A'), (b'a', b'a'),
+        ]));
+        assert_eq!(t("(?-u)a(?i)a(?-i)a"), hir_cat(vec![
+            hir_lit("a"),
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+            hir_lit("a"),
+        ]));
+        assert_eq!(t("(?i-u)ab@c"), hir_cat(vec![
+            hir_bclass(&[(b'A', b'A'), (b'a', b'a')]),
+            hir_bclass(&[(b'B', b'B'), (b'b', b'b')]),
+            hir_lit("@"),
+            hir_bclass(&[(b'C', b'C'), (b'c', b'c')]),
+        ]));
+
+        assert_eq!(t_bytes("(?i-u)a"), hir_bclass(&[
+            (b'A', b'A'), (b'a', b'a'),
+        ]));
+        assert_eq!(t_bytes("(?i-u)\x61"), hir_bclass(&[
+            (b'A', b'A'), (b'a', b'a'),
+        ]));
+        assert_eq!(t_bytes(r"(?i-u)\x61"), hir_bclass(&[
+            (b'A', b'A'), (b'a', b'a'),
+        ]));
+        assert_eq!(t_bytes(r"(?i-u)\xFF"), hir_blit(b"\xFF"));
+
+        assert_eq!(t_err("(?i-u)β"), TestError {
+            kind: hir::ErrorKind::UnicodeNotAllowed,
+            span: Span::new(
+                Position::new(6, 1, 7),
+                Position::new(8, 1, 8),
+            ),
+        });
+    }
+
+    #[test]
+    fn dot() {
+        assert_eq!(t("."), hir_uclass(&[
+            ('\0', '\t'),
+            ('\x0B', '\u{10FFFF}'),
+        ]));
+        assert_eq!(t("(?s)."), hir_uclass(&[
+            ('\0', '\u{10FFFF}'),
+        ]));
+        assert_eq!(t_bytes("(?-u)."), hir_bclass(&[
+            (b'\0', b'\t'),
+            (b'\x0B', b'\xFF'),
+        ]));
+        assert_eq!(t_bytes("(?s-u)."), hir_bclass(&[
+            (b'\0', b'\xFF'),
+        ]));
+
+        // If invalid UTF-8 isn't allowed, then non-Unicode `.` isn't allowed.
+        assert_eq!(t_err("(?-u)."), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(5, 1, 6), Position::new(6, 1, 7)),
+        });
+        assert_eq!(t_err("(?s-u)."), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(6, 1, 7), Position::new(7, 1, 8)),
+        });
+    }
+
+    #[test]
+    fn assertions() {
+        assert_eq!(t("^"), hir_anchor(hir::Anchor::StartText));
+        assert_eq!(t("$"), hir_anchor(hir::Anchor::EndText));
+        assert_eq!(t(r"\A"), hir_anchor(hir::Anchor::StartText));
+        assert_eq!(t(r"\z"), hir_anchor(hir::Anchor::EndText));
+        assert_eq!(t("(?m)^"), hir_anchor(hir::Anchor::StartLine));
+        assert_eq!(t("(?m)$"), hir_anchor(hir::Anchor::EndLine));
+        assert_eq!(t(r"(?m)\A"), hir_anchor(hir::Anchor::StartText));
+        assert_eq!(t(r"(?m)\z"), hir_anchor(hir::Anchor::EndText));
+
+        assert_eq!(t(r"\b"), hir_word(hir::WordBoundary::Unicode));
+        assert_eq!(t(r"\B"), hir_word(hir::WordBoundary::UnicodeNegate));
+        assert_eq!(t(r"(?-u)\b"), hir_word(hir::WordBoundary::Ascii));
+        assert_eq!(
+            t_bytes(r"(?-u)\B"),
+            hir_word(hir::WordBoundary::AsciiNegate));
+
+        assert_eq!(t_err(r"(?-u)\B"), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(5, 1, 6), Position::new(7, 1, 8)),
+        });
+    }
+
+    #[test]
+    fn group() {
+        assert_eq!(t("(a)"), hir_group(1, hir_lit("a")));
+        assert_eq!(t("(a)(b)"), hir_cat(vec![
+            hir_group(1, hir_lit("a")),
+            hir_group(2, hir_lit("b")),
+        ]));
+        assert_eq!(t("(a)|(b)"), hir_alt(vec![
+            hir_group(1, hir_lit("a")),
+            hir_group(2, hir_lit("b")),
+        ]));
+        assert_eq!(t("(?P<foo>)"), hir_group_name(1, "foo", Hir::empty()));
+        assert_eq!(t("(?P<foo>a)"), hir_group_name(1, "foo", hir_lit("a")));
+        assert_eq!(t("(?P<foo>a)(?P<bar>b)"), hir_cat(vec![
+            hir_group_name(1, "foo", hir_lit("a")),
+            hir_group_name(2, "bar", hir_lit("b")),
+        ]));
+        assert_eq!(t("(?:)"), hir_group_nocap(Hir::empty()));
+        assert_eq!(t("(?:a)"), hir_group_nocap(hir_lit("a")));
+        assert_eq!(t("(?:a)(b)"), hir_cat(vec![
+            hir_group_nocap(hir_lit("a")),
+            hir_group(1, hir_lit("b")),
+        ]));
+        assert_eq!(t("(a)(?:b)(c)"), hir_cat(vec![
+            hir_group(1, hir_lit("a")),
+            hir_group_nocap(hir_lit("b")),
+            hir_group(2, hir_lit("c")),
+        ]));
+        assert_eq!(t("(a)(?P<foo>b)(c)"), hir_cat(vec![
+            hir_group(1, hir_lit("a")),
+            hir_group_name(2, "foo", hir_lit("b")),
+            hir_group(3, hir_lit("c")),
+        ]));
+    }
+
+    #[test]
+    fn flags() {
+        assert_eq!(t("(?i:a)a"), hir_cat(vec![
+            hir_group_nocap(hir_uclass(&[('A', 'A'), ('a', 'a')])),
+            hir_lit("a"),
+        ]));
+        assert_eq!(t("(?i-u:a)β"), hir_cat(vec![
+            hir_group_nocap(hir_bclass(&[(b'A', b'A'), (b'a', b'a')])),
+            hir_lit("β"),
+        ]));
+        assert_eq!(t("(?i)(?-i:a)a"), hir_cat(vec![
+            hir_group_nocap(hir_lit("a")),
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+        ]));
+        assert_eq!(t("(?im)a^"), hir_cat(vec![
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_anchor(hir::Anchor::StartLine),
+        ]));
+        assert_eq!(t("(?im)a^(?i-m)a^"), hir_cat(vec![
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_anchor(hir::Anchor::StartLine),
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            hir_anchor(hir::Anchor::StartText),
+        ]));
+        assert_eq!(t("(?U)a*a*?(?-U)a*a*?"), hir_cat(vec![
+            hir_star(false, hir_lit("a")),
+            hir_star(true, hir_lit("a")),
+            hir_star(true, hir_lit("a")),
+            hir_star(false, hir_lit("a")),
+        ]));
+        assert_eq!(t("(?:a(?i)a)a"), hir_cat(vec![
+            hir_group_nocap(hir_cat(vec![
+                hir_lit("a"),
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+            ])),
+            hir_lit("a"),
+        ]));
+        assert_eq!(t("(?i)(?:a(?-i)a)a"), hir_cat(vec![
+            hir_group_nocap(hir_cat(vec![
+                hir_uclass(&[('A', 'A'), ('a', 'a')]),
+                hir_lit("a"),
+            ])),
+            hir_uclass(&[('A', 'A'), ('a', 'a')]),
+        ]));
+    }
+
+    #[test]
+    fn escape() {
+        assert_eq!(
+            t(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"),
+            hir_lit(r"\.+*?()|[]{}^$#"));
+    }
+
+    #[test]
+    fn repetition() {
+        assert_eq!(t("a?"), hir_quest(true, hir_lit("a")));
+        assert_eq!(t("a*"), hir_star(true, hir_lit("a")));
+        assert_eq!(t("a+"), hir_plus(true, hir_lit("a")));
+        assert_eq!(t("a??"), hir_quest(false, hir_lit("a")));
+        assert_eq!(t("a*?"), hir_star(false, hir_lit("a")));
+        assert_eq!(t("a+?"), hir_plus(false, hir_lit("a")));
+
+        assert_eq!(
+            t("a{1}"),
+            hir_range(
+                true,
+                hir::RepetitionRange::Exactly(1),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1,}"),
+            hir_range(
+                true,
+                hir::RepetitionRange::AtLeast(1),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1,2}"),
+            hir_range(
+                true,
+                hir::RepetitionRange::Bounded(1, 2),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1}?"),
+            hir_range(
+                false,
+                hir::RepetitionRange::Exactly(1),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1,}?"),
+            hir_range(
+                false,
+                hir::RepetitionRange::AtLeast(1),
+                hir_lit("a"),
+            ));
+        assert_eq!(
+            t("a{1,2}?"),
+            hir_range(
+                false,
+                hir::RepetitionRange::Bounded(1, 2),
+                hir_lit("a"),
+            ));
+
+        assert_eq!(t("ab?"), hir_cat(vec![
+            hir_lit("a"),
+            hir_quest(true, hir_lit("b")),
+        ]));
+        assert_eq!(t("(ab)?"), hir_quest(true, hir_group(1, hir_cat(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+        ]))));
+        assert_eq!(t("a|b?"), hir_alt(vec![
+            hir_lit("a"),
+            hir_quest(true, hir_lit("b")),
+        ]));
+    }
+
+    #[test]
+    fn cat_alt() {
+        assert_eq!(t("(ab)"), hir_group(1, hir_cat(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+        ])));
+        assert_eq!(t("a|b"), hir_alt(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+        ]));
+        assert_eq!(t("a|b|c"), hir_alt(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+            hir_lit("c"),
+        ]));
+        assert_eq!(t("ab|bc|cd"), hir_alt(vec![
+            hir_lit("ab"),
+            hir_lit("bc"),
+            hir_lit("cd"),
+        ]));
+        assert_eq!(t("(a|b)"), hir_group(1, hir_alt(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+        ])));
+        assert_eq!(t("(a|b|c)"), hir_group(1, hir_alt(vec![
+            hir_lit("a"),
+            hir_lit("b"),
+            hir_lit("c"),
+        ])));
+        assert_eq!(t("(ab|bc|cd)"), hir_group(1, hir_alt(vec![
+            hir_lit("ab"),
+            hir_lit("bc"),
+            hir_lit("cd"),
+        ])));
+        assert_eq!(t("(ab|(bc|(cd)))"), hir_group(1, hir_alt(vec![
+            hir_lit("ab"),
+            hir_group(2, hir_alt(vec![
+                hir_lit("bc"),
+                hir_group(3, hir_lit("cd")),
+            ])),
+        ])));
+    }
+
+    #[test]
+    fn class_ascii() {
+        assert_eq!(
+            t("[[:alnum:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alnum)));
+        assert_eq!(
+            t("[[:alpha:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Alpha)));
+        assert_eq!(
+            t("[[:ascii:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Ascii)));
+        assert_eq!(
+            t("[[:blank:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Blank)));
+        assert_eq!(
+            t("[[:cntrl:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Cntrl)));
+        assert_eq!(
+            t("[[:digit:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Digit)));
+        assert_eq!(
+            t("[[:graph:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Graph)));
+        assert_eq!(
+            t("[[:lower:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower)));
+        assert_eq!(
+            t("[[:print:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Print)));
+        assert_eq!(
+            t("[[:punct:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Punct)));
+        assert_eq!(
+            t("[[:space:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Space)));
+        assert_eq!(
+            t("[[:upper:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Upper)));
+        assert_eq!(
+            t("[[:word:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Word)));
+        assert_eq!(
+            t("[[:xdigit:]]"),
+            hir_uclass(ascii_class(&ast::ClassAsciiKind::Xdigit)));
+
+        assert_eq!(
+            t("[[:^lower:]]"),
+            hir_negate(hir_uclass(ascii_class(&ast::ClassAsciiKind::Lower))));
+        assert_eq!(
+            t("(?i)[[:lower:]]"),
+            hir_uclass(&[
+                ('A', 'Z'), ('a', 'z'),
+                ('\u{17F}', '\u{17F}'),
+                ('\u{212A}', '\u{212A}'),
+            ]));
+
+        assert_eq!(
+            t("(?-u)[[:lower:]]"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Lower)));
+        assert_eq!(
+            t("(?i-u)[[:lower:]]"),
+            hir_case_fold(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Lower))));
+
+        assert_eq!(t_err("(?-u)[[:^lower:]]"), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(6, 1, 7), Position::new(16, 1, 17)),
+        });
+        assert_eq!(t_err("(?i-u)[[:^lower:]]"), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(7, 1, 8), Position::new(17, 1, 18)),
+        });
+    }
+
+    #[test]
+    fn class_perl() {
+        // Unicode
+        assert_eq!(
+            t(r"\d"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"\s"),
+            hir_uclass_query(ClassQuery::Binary("space")));
+        assert_eq!(
+            t(r"\w"),
+            hir_uclass_perl_word());
+        assert_eq!(
+            t(r"(?i)\d"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"(?i)\s"),
+            hir_uclass_query(ClassQuery::Binary("space")));
+        assert_eq!(
+            t(r"(?i)\w"),
+            hir_uclass_perl_word());
+
+        // Unicode, negated
+        assert_eq!(
+            t(r"\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space"))));
+        assert_eq!(
+            t(r"\W"),
+            hir_negate(hir_uclass_perl_word()));
+        assert_eq!(
+            t(r"(?i)\D"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"(?i)\S"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("space"))));
+        assert_eq!(
+            t(r"(?i)\W"),
+            hir_negate(hir_uclass_perl_word()));
+
+        // ASCII only
+        assert_eq!(
+            t(r"(?-u)\d"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)));
+        assert_eq!(
+            t(r"(?-u)\s"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)));
+        assert_eq!(
+            t(r"(?-u)\w"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)));
+        assert_eq!(
+            t(r"(?i-u)\d"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Digit)));
+        assert_eq!(
+            t(r"(?i-u)\s"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Space)));
+        assert_eq!(
+            t(r"(?i-u)\w"),
+            hir_bclass_from_char(ascii_class(&ast::ClassAsciiKind::Word)));
+
+        // ASCII only, negated
+        assert_eq!(
+            t(r"(?-u)\D"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit))));
+        assert_eq!(
+            t(r"(?-u)\S"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Space))));
+        assert_eq!(
+            t(r"(?-u)\W"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Word))));
+        assert_eq!(
+            t(r"(?i-u)\D"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit))));
+        assert_eq!(
+            t(r"(?i-u)\S"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Space))));
+        assert_eq!(
+            t(r"(?i-u)\W"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Word))));
+    }
+
+    #[test]
+    fn class_unicode() {
+        assert_eq!(
+            t(r"\pZ"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\pz"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{se      PaRa ToR}"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{gc:Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{gc=Separator}"),
+            hir_uclass_query(ClassQuery::Binary("Z")));
+        assert_eq!(
+            t(r"\p{Other}"),
+            hir_uclass_query(ClassQuery::Binary("Other")));
+        assert_eq!(
+            t(r"\pC"),
+            hir_uclass_query(ClassQuery::Binary("Other")));
+
+        assert_eq!(
+            t(r"\PZ"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))));
+        assert_eq!(
+            t(r"\P{separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))));
+        assert_eq!(
+            t(r"\P{gc!=separator}"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("Z"))));
+
+        assert_eq!(
+            t(r"\p{Greek}"),
+            hir_uclass_query(ClassQuery::Binary("Greek")));
+        assert_eq!(
+            t(r"(?i)\p{Greek}"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("Greek"))));
+        assert_eq!(
+            t(r"(?i)\P{Greek}"),
+            hir_negate(hir_case_fold(hir_uclass_query(
+                ClassQuery::Binary("Greek")))));
+
+        assert_eq!(
+            t(r"\p{any}"),
+            hir_uclass_query(ClassQuery::Binary("Any")));
+        assert_eq!(
+            t(r"\p{assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned")));
+        assert_eq!(
+            t(r"\p{ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII")));
+        assert_eq!(
+            t(r"\p{gc:any}"),
+            hir_uclass_query(ClassQuery::Binary("Any")));
+        assert_eq!(
+            t(r"\p{gc:assigned}"),
+            hir_uclass_query(ClassQuery::Binary("Assigned")));
+        assert_eq!(
+            t(r"\p{gc:ascii}"),
+            hir_uclass_query(ClassQuery::Binary("ASCII")));
+
+        assert_eq!(t_err(r"(?-u)\pZ"), TestError {
+            kind: hir::ErrorKind::UnicodeNotAllowed,
+            span: Span::new(Position::new(5, 1, 6), Position::new(8, 1, 9)),
+        });
+        assert_eq!(t_err(r"(?-u)\p{Separator}"), TestError {
+            kind: hir::ErrorKind::UnicodeNotAllowed,
+            span: Span::new(Position::new(5, 1, 6), Position::new(18, 1, 19)),
+        });
+        assert_eq!(t_err(r"\pE"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(3, 1, 4)),
+        });
+        assert_eq!(t_err(r"\p{Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(7, 1, 8)),
+        });
+        assert_eq!(t_err(r"\p{gc:Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
+        });
+        assert_eq!(t_err(r"\p{sc:Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
+        });
+        assert_eq!(t_err(r"\p{scx:Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(11, 1, 12)),
+        });
+        assert_eq!(t_err(r"\p{age:Foo}"), TestError {
+            kind: hir::ErrorKind::UnicodePropertyValueNotFound,
+            span: Span::new(Position::new(0, 1, 1), Position::new(11, 1, 12)),
+        });
+    }
+
+    #[test]
+    fn class_bracketed() {
+        assert_eq!(t("[a]"), hir_uclass(&[('a', 'a')]));
+        assert_eq!(t("[^[a]]"), hir_negate(hir_uclass(&[('a', 'a')])));
+        assert_eq!(t("[a-z]"), hir_uclass(&[('a', 'z')]));
+        assert_eq!(t("[a-fd-h]"), hir_uclass(&[('a', 'h')]));
+        assert_eq!(t("[a-fg-m]"), hir_uclass(&[('a', 'm')]));
+        assert_eq!(t(r"[\x00]"), hir_uclass(&[('\0', '\0')]));
+        assert_eq!(t(r"[\n]"), hir_uclass(&[('\n', '\n')]));
+        assert_eq!(t("[\n]"), hir_uclass(&[('\n', '\n')]));
+        assert_eq!(
+            t(r"[\d]"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"[\pZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator")));
+        assert_eq!(
+            t(r"[\p{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator")));
+        assert_eq!(
+            t(r"[^\D]"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"[^\PZ]"),
+            hir_uclass_query(ClassQuery::Binary("separator")));
+        assert_eq!(
+            t(r"[^\P{separator}]"),
+            hir_uclass_query(ClassQuery::Binary("separator")));
+        assert_eq!(
+            t(r"(?i)[^\D]"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"(?i)[^\P{greek}]"),
+            hir_case_fold(hir_uclass_query(ClassQuery::Binary("greek"))));
+
+        assert_eq!(t("(?-u)[a]"), hir_bclass(&[(b'a', b'a')]));
+        assert_eq!(t(r"(?-u)[\x00]"), hir_bclass(&[(b'\0', b'\0')]));
+        assert_eq!(t_bytes(r"(?-u)[\xFF]"), hir_bclass(&[(b'\xFF', b'\xFF')]));
+
+        assert_eq!(t("(?i)[a]"), hir_uclass(&[('A', 'A'), ('a', 'a')]));
+        assert_eq!(t("(?i)[k]"), hir_uclass(&[
+            ('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),
+        ]));
+        assert_eq!(t("(?i)[β]"), hir_uclass(&[
+            ('Β', 'Β'), ('β', 'β'), ('ϐ', 'ϐ'),
+        ]));
+        assert_eq!(t("(?i-u)[k]"), hir_bclass(&[
+            (b'K', b'K'), (b'k', b'k'),
+        ]));
+
+        assert_eq!(t("[^a]"), hir_negate(hir_uclass(&[('a', 'a')])));
+        assert_eq!(t(r"[^\x00]"), hir_negate(hir_uclass(&[('\0', '\0')])));
+        assert_eq!(
+            t_bytes("(?-u)[^a]"),
+            hir_negate(hir_bclass(&[(b'a', b'a')])));
+        assert_eq!(
+            t(r"[^\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"[^\pZ]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))));
+        assert_eq!(
+            t(r"[^\p{separator}]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("separator"))));
+        assert_eq!(
+            t(r"(?i)[^\p{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(
+                ClassQuery::Binary("greek")))));
+        assert_eq!(
+            t(r"(?i)[\P{greek}]"),
+            hir_negate(hir_case_fold(hir_uclass_query(
+                ClassQuery::Binary("greek")))));
+
+        // Test some weird cases.
+        assert_eq!(t(r"[\[]"), hir_uclass(&[('[', '[')]));
+
+        assert_eq!(t(r"[&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&\&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\x00-&]"), hir_uclass(&[('\0', '&')]));
+        assert_eq!(t(r"[&-\xFF]"), hir_uclass(&[('&', '\u{FF}')]));
+
+        assert_eq!(t(r"[~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\~\~]"), hir_uclass(&[('~', '~')]));
+        assert_eq!(t(r"[\x00-~]"), hir_uclass(&[('\0', '~')]));
+        assert_eq!(t(r"[~-\xFF]"), hir_uclass(&[('~', '\u{FF}')]));
+
+        assert_eq!(t(r"[-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\-\-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\x00-\-]"), hir_uclass(&[('\0', '-')]));
+        assert_eq!(t(r"[\--\xFF]"), hir_uclass(&[('-', '\u{FF}')]));
+
+        assert_eq!(t_err("(?-u)[^a]"), TestError {
+            kind: hir::ErrorKind::InvalidUtf8,
+            span: Span::new(Position::new(5, 1, 6), Position::new(9, 1, 10)),
+        });
+        assert_eq!(t_err(r"[^\s\S]"), TestError {
+            kind: hir::ErrorKind::EmptyClassNotAllowed,
+            span: Span::new(Position::new(0, 1, 1), Position::new(7, 1, 8)),
+        });
+        assert_eq!(t_err(r"(?-u)[^\s\S]"), TestError {
+            kind: hir::ErrorKind::EmptyClassNotAllowed,
+            span: Span::new(Position::new(5, 1, 6), Position::new(12, 1, 13)),
+        });
+    }
+
+    #[test]
+    fn class_bracketed_union() {
+        assert_eq!(
+            t("[a-zA-Z]"),
+            hir_uclass(&[('A', 'Z'), ('a', 'z')]));
+        assert_eq!(
+            t(r"[a\pZb]"),
+            hir_union(
+                hir_uclass(&[('a', 'b')]),
+                hir_uclass_query(ClassQuery::Binary("separator"))));
+        assert_eq!(
+            t(r"[\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::Binary("greek")),
+                hir_uclass_query(ClassQuery::Binary("separator"))));
+        assert_eq!(
+            t(r"[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator")))));
+        assert_eq!(
+            t(r"[[[\p{age:3.0}\pZ]\p{Greek}][\p{Cyrillic}]]"),
+            hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("cyrillic")),
+                    hir_union(
+                        hir_uclass_query(ClassQuery::Binary("greek")),
+                        hir_uclass_query(ClassQuery::Binary("separator"))))));
+
+        assert_eq!(
+            t(r"(?i)[\p{age:3.0}\pZ\p{Greek}]"),
+            hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))))));
+        assert_eq!(
+            t(r"[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator"))))));
+        assert_eq!(
+            t(r"(?i)[^\p{age:3.0}\pZ\p{Greek}]"),
+            hir_negate(hir_case_fold(hir_union(
+                hir_uclass_query(ClassQuery::ByValue {
+                    property_name: "age",
+                    property_value: "3.0",
+                }),
+                hir_union(
+                    hir_uclass_query(ClassQuery::Binary("greek")),
+                    hir_uclass_query(ClassQuery::Binary("separator")))))));
+    }
+
+    #[test]
+    fn class_bracketed_nested() {
+        assert_eq!(
+            t(r"[a[^c]]"),
+            hir_negate(hir_uclass(&[('c', 'c')])));
+        assert_eq!(
+            t(r"[a-b[^c]]"),
+            hir_negate(hir_uclass(&[('c', 'c')])));
+        assert_eq!(
+            t(r"[a-c[^c]]"),
+            hir_negate(hir_uclass(&[])));
+
+        assert_eq!(
+            t(r"[^a[^c]]"),
+            hir_uclass(&[('c', 'c')]));
+        assert_eq!(
+            t(r"[^a-b[^c]]"),
+            hir_uclass(&[('c', 'c')]));
+
+        assert_eq!(
+            t(r"(?i)[a[^c]]"),
+            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))));
+        assert_eq!(
+            t(r"(?i)[a-b[^c]]"),
+            hir_negate(hir_case_fold(hir_uclass(&[('c', 'c')]))));
+
+        assert_eq!(
+            t(r"(?i)[^a[^c]]"),
+            hir_uclass(&[('C', 'C'), ('c', 'c')]));
+        assert_eq!(
+            t(r"(?i)[^a-b[^c]]"),
+            hir_uclass(&[('C', 'C'), ('c', 'c')]));
+
+        assert_eq!(t_err(r"[^a-c[^c]]"), TestError {
+            kind: hir::ErrorKind::EmptyClassNotAllowed,
+            span: Span::new(Position::new(0, 1, 1), Position::new(10, 1, 11)),
+        });
+        assert_eq!(t_err(r"(?i)[^a-c[^c]]"), TestError {
+            kind: hir::ErrorKind::EmptyClassNotAllowed,
+            span: Span::new(Position::new(4, 1, 5), Position::new(14, 1, 15)),
+        });
+    }
+
+    #[test]
+    fn class_bracketed_intersect() {
+        assert_eq!(t("[abc&&b-c]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[abc&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[[abc]&&[b-c]]"), hir_uclass(&[('b', 'c')]));
+        assert_eq!(t("[a-z&&b-y&&c-x]"), hir_uclass(&[('c', 'x')]));
+        assert_eq!(t("[c-da-b&&a-d]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t("[a-d&&c-da-b]"), hir_uclass(&[('a', 'd')]));
+        assert_eq!(t(r"[a-z&&a-c]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[[a-z&&a-c]]"), hir_uclass(&[('a', 'c')]));
+        assert_eq!(t(r"[^[a-z&&a-c]]"), hir_negate(hir_uclass(&[('a', 'c')])));
+
+        assert_eq!(t("(?-u)[abc&&b-c]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[abc&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[[abc]&&[b-c]]"), hir_bclass(&[(b'b', b'c')]));
+        assert_eq!(t("(?-u)[a-z&&b-y&&c-x]"), hir_bclass(&[(b'c', b'x')]));
+        assert_eq!(t("(?-u)[c-da-b&&a-d]"), hir_bclass(&[(b'a', b'd')]));
+        assert_eq!(t("(?-u)[a-d&&c-da-b]"), hir_bclass(&[(b'a', b'd')]));
+
+        assert_eq!(
+            t("(?i)[abc&&b-c]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')])));
+        assert_eq!(
+            t("(?i)[abc&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')])));
+        assert_eq!(
+            t("(?i)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_uclass(&[('b', 'c')])));
+        assert_eq!(
+            t("(?i)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_uclass(&[('c', 'x')])));
+        assert_eq!(
+            t("(?i)[c-da-b&&a-d]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')])));
+        assert_eq!(
+            t("(?i)[a-d&&c-da-b]"),
+            hir_case_fold(hir_uclass(&[('a', 'd')])));
+
+        assert_eq!(
+            t("(?i-u)[abc&&b-c]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')])));
+        assert_eq!(
+            t("(?i-u)[abc&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')])));
+        assert_eq!(
+            t("(?i-u)[[abc]&&[b-c]]"),
+            hir_case_fold(hir_bclass(&[(b'b', b'c')])));
+        assert_eq!(
+            t("(?i-u)[a-z&&b-y&&c-x]"),
+            hir_case_fold(hir_bclass(&[(b'c', b'x')])));
+        assert_eq!(
+            t("(?i-u)[c-da-b&&a-d]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')])));
+        assert_eq!(
+            t("(?i-u)[a-d&&c-da-b]"),
+            hir_case_fold(hir_bclass(&[(b'a', b'd')])));
+
+        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
+        // `^` is also allowed to be unescaped after `&&`.
+        assert_eq!(t(r"[\^&&^]"), hir_uclass(&[('^', '^')]));
+        // `]` needs to be escaped after `&&` since it's not at start of class.
+        assert_eq!(t(r"[]&&\]]"), hir_uclass(&[(']', ']')]));
+        assert_eq!(t(r"[-&&-]"), hir_uclass(&[('-', '-')]));
+        assert_eq!(t(r"[\&&&&]"), hir_uclass(&[('&', '&')]));
+        assert_eq!(t(r"[\&&&\&]"), hir_uclass(&[('&', '&')]));
+        // Test precedence.
+        assert_eq!(
+            t(r"[a-w&&[^c-g]z]"),
+            hir_uclass(&[('a', 'b'), ('h', 'w')]));
+    }
+
+    #[test]
+    fn class_bracketed_intersect_negate() {
+        assert_eq!(
+            t(r"[^\w&&\d]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"[^[a-z&&a-c]]"),
+            hir_negate(hir_uclass(&[('a', 'c')])));
+        assert_eq!(
+            t(r"[^[\w&&\d]]"),
+            hir_negate(hir_uclass_query(ClassQuery::Binary("digit"))));
+        assert_eq!(
+            t(r"[^[^\w&&\d]]"),
+            hir_uclass_query(ClassQuery::Binary("digit")));
+        assert_eq!(
+            t(r"[[[^\w]&&[^\d]]]"),
+            hir_negate(hir_uclass_perl_word()));
+
+        assert_eq!(
+            t_bytes(r"(?-u)[^\w&&\d]"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit))));
+        assert_eq!(
+            t_bytes(r"(?-u)[^[a-z&&a-c]]"),
+            hir_negate(hir_bclass(&[(b'a', b'c')])));
+        assert_eq!(
+            t_bytes(r"(?-u)[^[\w&&\d]]"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit))));
+        assert_eq!(
+            t_bytes(r"(?-u)[^[^\w&&\d]]"),
+            hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Digit)));
+        assert_eq!(
+            t_bytes(r"(?-u)[[[^\w]&&[^\d]]]"),
+            hir_negate(hir_bclass_from_char(ascii_class(
+                &ast::ClassAsciiKind::Word))));
+    }
+
+    #[test]
+    fn class_bracketed_difference() {
+        assert_eq!(
+            t(r"[\pL--[:ascii:]]"),
+            hir_difference(
+                hir_uclass_query(ClassQuery::Binary("letter")),
+                hir_uclass(&[('\0', '\x7F')])));
+
+        assert_eq!(
+            t(r"(?-u)[[:alpha:]--[:lower:]]"),
+            hir_bclass(&[(b'A', b'Z')]));
+    }
+
+    #[test]
+    fn class_bracketed_symmetric_difference() {
+        assert_eq!(
+            t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
+            hir_uclass(&[
+                ('\u{0342}', '\u{0342}'),
+                ('\u{0345}', '\u{0345}'),
+                ('\u{1DC0}', '\u{1DC1}'),
+            ]));
+        assert_eq!(
+            t(r"[a-g~~c-j]"),
+            hir_uclass(&[('a', 'b'), ('h', 'j')]));
+
+        assert_eq!(
+            t(r"(?-u)[a-g~~c-j]"),
+            hir_bclass(&[(b'a', b'b'), (b'h', b'j')]));
+    }
+
+    #[test]
+    fn ignore_whitespace() {
+        assert_eq!(t(r"(?x)\12 3"), hir_lit("\n3"));
+        assert_eq!(t(r"(?x)\x { 53 }"), hir_lit("S"));
+        assert_eq!(t(r"(?x)\x # comment
+{ # comment
+    53 # comment
+} #comment"), hir_lit("S"));
+
+        assert_eq!(t(r"(?x)\x 53"), hir_lit("S"));
+        assert_eq!(t(r"(?x)\x # comment
+        53 # comment"), hir_lit("S"));
+        assert_eq!(t(r"(?x)\x5 3"), hir_lit("S"));
+
+        assert_eq!(t(r"(?x)\p # comment
+{ # comment
+    Separator # comment
+} # comment"), hir_uclass_query(ClassQuery::Binary("separator")));
+
+        assert_eq!(t(r"(?x)a # comment
+{ # comment
+    5 # comment
+    , # comment
+    10 # comment
+} # comment"),
+            hir_range(
+                true, hir::RepetitionRange::Bounded(5, 10), hir_lit("a")));
+
+        assert_eq!(t(r"(?x)a\  # hi there"), hir_lit("a "));
+    }
+
+    #[test]
+    fn analysis_is_always_utf8() {
+        // Positive examples.
+        assert!(t_bytes(r"a").is_always_utf8());
+        assert!(t_bytes(r"ab").is_always_utf8());
+        assert!(t_bytes(r"(?-u)a").is_always_utf8());
+        assert!(t_bytes(r"(?-u)ab").is_always_utf8());
+        assert!(t_bytes(r"\xFF").is_always_utf8());
+        assert!(t_bytes(r"\xFF\xFF").is_always_utf8());
+        assert!(t_bytes(r"[^a]").is_always_utf8());
+        assert!(t_bytes(r"[^a][^a]").is_always_utf8());
+        assert!(t_bytes(r"\b").is_always_utf8());
+        assert!(t_bytes(r"\B").is_always_utf8());
+        assert!(t_bytes(r"(?-u)\b").is_always_utf8());
+
+        // Negative examples.
+        assert!(!t_bytes(r"(?-u)\xFF").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)\xFF\xFF").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)[^a]").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)[^a][^a]").is_always_utf8());
+        assert!(!t_bytes(r"(?-u)\B").is_always_utf8());
+    }
+
+    #[test]
+    fn analysis_is_all_assertions() {
+        // Positive examples.
+        assert!(t(r"\b").is_all_assertions());
+        assert!(t(r"\B").is_all_assertions());
+        assert!(t(r"^").is_all_assertions());
+        assert!(t(r"$").is_all_assertions());
+        assert!(t(r"\A").is_all_assertions());
+        assert!(t(r"\z").is_all_assertions());
+        assert!(t(r"$^\z\A\b\B").is_all_assertions());
+        assert!(t(r"$|^|\z|\A|\b|\B").is_all_assertions());
+        assert!(t(r"^$|$^").is_all_assertions());
+        assert!(t(r"((\b)+())*^").is_all_assertions());
+
+        // Negative examples.
+        assert!(!t(r"^a").is_all_assertions());
+    }
+
+    #[test]
+    fn analysis_is_anchored() {
+        // Positive examples.
+        assert!(t(r"^").is_anchored_start());
+        assert!(t(r"$").is_anchored_end());
+
+        assert!(t(r"^^").is_anchored_start());
+        assert!(t(r"$$").is_anchored_end());
+
+        assert!(t(r"^$").is_anchored_start());
+        assert!(t(r"^$").is_anchored_end());
+
+        assert!(t(r"^foo").is_anchored_start());
+        assert!(t(r"foo$").is_anchored_end());
+
+        assert!(t(r"^foo|^bar").is_anchored_start());
+        assert!(t(r"foo$|bar$").is_anchored_end());
+
+        assert!(t(r"^(foo|bar)").is_anchored_start());
+        assert!(t(r"(foo|bar)$").is_anchored_end());
+
+        assert!(t(r"^+").is_anchored_start());
+        assert!(t(r"$+").is_anchored_end());
+        assert!(t(r"^++").is_anchored_start());
+        assert!(t(r"$++").is_anchored_end());
+        assert!(t(r"(^)+").is_anchored_start());
+        assert!(t(r"($)+").is_anchored_end());
+
+        assert!(t(r"$^").is_anchored_start());
+        assert!(t(r"$^").is_anchored_end());
+        assert!(t(r"$^|^$").is_anchored_start());
+        assert!(t(r"$^|^$").is_anchored_end());
+
+        assert!(t(r"\b^").is_anchored_start());
+        assert!(t(r"$\b").is_anchored_end());
+        assert!(t(r"^(?m:^)").is_anchored_start());
+        assert!(t(r"(?m:$)$").is_anchored_end());
+        assert!(t(r"(?m:^)^").is_anchored_start());
+        assert!(t(r"$(?m:$)").is_anchored_end());
+
+        // Negative examples.
+        assert!(!t(r"(?m)^").is_anchored_start());
+        assert!(!t(r"(?m)$").is_anchored_end());
+        assert!(!t(r"(?m:^$)|$^").is_anchored_start());
+        assert!(!t(r"(?m:^$)|$^").is_anchored_end());
+        assert!(!t(r"$^|(?m:^$)").is_anchored_start());
+        assert!(!t(r"$^|(?m:^$)").is_anchored_end());
+
+        assert!(!t(r"a^").is_anchored_start());
+        assert!(!t(r"$a").is_anchored_start());
+
+        assert!(!t(r"a^").is_anchored_start());
+        assert!(!t(r"$a").is_anchored_start());
+
+        assert!(!t(r"^foo|bar").is_anchored_start());
+        assert!(!t(r"foo|bar$").is_anchored_end());
+
+        assert!(!t(r"^*").is_anchored_start());
+        assert!(!t(r"$*").is_anchored_end());
+        assert!(!t(r"^*+").is_anchored_start());
+        assert!(!t(r"$*+").is_anchored_end());
+        assert!(!t(r"^+*").is_anchored_start());
+        assert!(!t(r"$+*").is_anchored_end());
+        assert!(!t(r"(^)*").is_anchored_start());
+        assert!(!t(r"($)*").is_anchored_end());
+    }
+
+    #[test]
+    fn analysis_is_any_anchored() {
+        // Positive examples.
+        assert!(t(r"^").is_any_anchored_start());
+        assert!(t(r"$").is_any_anchored_end());
+        assert!(t(r"\A").is_any_anchored_start());
+        assert!(t(r"\z").is_any_anchored_end());
+
+        // Negative examples.
+        assert!(!t(r"(?m)^").is_any_anchored_start());
+        assert!(!t(r"(?m)$").is_any_anchored_end());
+        assert!(!t(r"$").is_any_anchored_start());
+        assert!(!t(r"^").is_any_anchored_end());
+    }
+
+    #[test]
+    fn analysis_is_match_empty() {
+        // Positive examples.
+        assert!(t(r"").is_match_empty());
+        assert!(t(r"()").is_match_empty());
+        assert!(t(r"()*").is_match_empty());
+        assert!(t(r"()+").is_match_empty());
+        assert!(t(r"()?").is_match_empty());
+        assert!(t(r"a*").is_match_empty());
+        assert!(t(r"a?").is_match_empty());
+        assert!(t(r"a{0}").is_match_empty());
+        assert!(t(r"a{0,}").is_match_empty());
+        assert!(t(r"a{0,1}").is_match_empty());
+        assert!(t(r"a{0,10}").is_match_empty());
+        assert!(t(r"\pL*").is_match_empty());
+        assert!(t(r"a*|b").is_match_empty());
+        assert!(t(r"b|a*").is_match_empty());
+        assert!(t(r"a*a?(abcd)*").is_match_empty());
+        assert!(t(r"^").is_match_empty());
+        assert!(t(r"$").is_match_empty());
+        assert!(t(r"(?m)^").is_match_empty());
+        assert!(t(r"(?m)$").is_match_empty());
+        assert!(t(r"\A").is_match_empty());
+        assert!(t(r"\z").is_match_empty());
+        assert!(t(r"\B").is_match_empty());
+        assert!(t_bytes(r"(?-u)\B").is_match_empty());
+
+        // Negative examples.
+        assert!(!t(r"a+").is_match_empty());
+        assert!(!t(r"a{1}").is_match_empty());
+        assert!(!t(r"a{1,}").is_match_empty());
+        assert!(!t(r"a{1,2}").is_match_empty());
+        assert!(!t(r"a{1,10}").is_match_empty());
+        assert!(!t(r"b|a").is_match_empty());
+        assert!(!t(r"a*a+(abcd)*").is_match_empty());
+        assert!(!t(r"\b").is_match_empty());
+        assert!(!t(r"(?-u)\b").is_match_empty());
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/hir/visitor.rs
@@ -0,0 +1,222 @@
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use hir::{self, Hir, HirKind};
+
+/// A trait for visiting the high-level IR (HIR) in depth first order.
+///
+/// The principle aim of this trait is to enable callers to perform case
+/// analysis on a high-level intermediate representation of a regular
+/// expression without necessarily using recursion. In particular, this permits
+/// callers to do case analysis with constant stack usage, which can be
+/// important since the size of an HIR may be proportional to end user input.
+///
+/// Typical usage of this trait involves providing an implementation and then
+/// running it using the [`visit`](fn.visit.html) function.
+pub trait Visitor {
+    /// The result of visiting an HIR.
+    type Output;
+    /// An error that visiting an HIR might return.
+    type Err;
+
+    /// All implementors of `Visitor` must provide a `finish` method, which
+    /// yields the result of visiting the HIR or an error.
+    fn finish(self) -> Result<Self::Output, Self::Err>;
+
+    /// This method is called before beginning traversal of the HIR.
+    fn start(&mut self) {}
+
+    /// This method is called on an `Hir` before descending into child `Hir`
+    /// nodes.
+    fn visit_pre(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called on an `Hir` after descending all of its child
+    /// `Hir` nodes.
+    fn visit_post(&mut self, _hir: &Hir) -> Result<(), Self::Err> {
+        Ok(())
+    }
+
+    /// This method is called between child nodes of an alternation.
+    fn visit_alternation_in(&mut self) -> Result<(), Self::Err> {
+        Ok(())
+    }
+}
+
+/// Executes an implementation of `Visitor` in constant stack space.
+///
+/// This function will visit every node in the given `Hir` while calling
+/// appropriate methods provided by the
+/// [`Visitor`](trait.Visitor.html) trait.
+///
+/// The primary use case for this method is when one wants to perform case
+/// analysis over an `Hir` without using a stack size proportional to the depth
+/// of the `Hir`. Namely, this method will instead use constant stack space,
+/// but will use heap space proportional to the size of the `Hir`. This may be
+/// desirable in cases where the size of `Hir` is proportional to end user
+/// input.
+///
+/// If the visitor returns an error at any point, then visiting is stopped and
+/// the error is returned.
+pub fn visit<V: Visitor>(hir: &Hir, visitor: V) -> Result<V::Output, V::Err> {
+    HeapVisitor::new().visit(hir, visitor)
+}
+
+/// HeapVisitor visits every item in an `Hir` recursively using constant stack
+/// size and a heap size proportional to the size of the `Hir`.
+struct HeapVisitor<'a> {
+    /// A stack of `Hir` nodes. This is roughly analogous to the call stack
+    /// used in a typical recursive visitor.
+    stack: Vec<(&'a Hir, Frame<'a>)>,
+}
+
+/// Represents a single stack frame while performing structural induction over
+/// an `Hir`.
+enum Frame<'a> {
+    /// A stack frame allocated just before descending into a repetition
+    /// operator's child node.
+    Repetition(&'a hir::Repetition),
+    /// A stack frame allocated just before descending into a group's child
+    /// node.
+    Group(&'a hir::Group),
+    /// The stack frame used while visiting every child node of a concatenation
+    /// of expressions.
+    Concat {
+        /// The child node we are currently visiting.
+        head: &'a Hir,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Hir],
+    },
+    /// The stack frame used while visiting every child node of an alternation
+    /// of expressions.
+    Alternation {
+        /// The child node we are currently visiting.
+        head: &'a Hir,
+        /// The remaining child nodes to visit (which may be empty).
+        tail: &'a [Hir],
+    },
+}
+
+impl<'a> HeapVisitor<'a> {
+    fn new() -> HeapVisitor<'a> {
+        HeapVisitor { stack: vec![] }
+    }
+
+    fn visit<V: Visitor>(
+        &mut self,
+        mut hir: &'a Hir,
+        mut visitor: V,
+    ) -> Result<V::Output, V::Err> {
+        self.stack.clear();
+
+        visitor.start();
+        loop {
+            visitor.visit_pre(hir)?;
+            if let Some(x) = self.induct(hir) {
+                let child = x.child();
+                self.stack.push((hir, x));
+                hir = child;
+                continue;
+            }
+            // No induction means we have a base case, so we can post visit
+            // it now.
+            visitor.visit_post(hir)?;
+
+            // At this point, we now try to pop our call stack until it is
+            // either empty or we hit another inductive case.
+            loop {
+                let (post_hir, frame) = match self.stack.pop() {
+                    None => return visitor.finish(),
+                    Some((post_hir, frame)) => (post_hir, frame),
+                };
+                // If this is a concat/alternate, then we might have additional
+                // inductive steps to process.
+                if let Some(x) = self.pop(frame) {
+                    if let Frame::Alternation {..} = x {
+                        visitor.visit_alternation_in()?;
+                    }
+                    hir = x.child();
+                    self.stack.push((post_hir, x));
+                    break;
+                }
+                // Otherwise, we've finished visiting all the child nodes for
+                // this HIR, so we can post visit it now.
+                visitor.visit_post(post_hir)?;
+            }
+        }
+    }
+
+    /// Build a stack frame for the given HIR if one is needed (which occurs if
+    /// and only if there are child nodes in the HIR). Otherwise, return None.
+    fn induct(&mut self, hir: &'a Hir) -> Option<Frame<'a>> {
+        match *hir.kind() {
+            HirKind::Repetition(ref x) => Some(Frame::Repetition(x)),
+            HirKind::Group(ref x) => Some(Frame::Group(x)),
+            HirKind::Concat(ref x) if x.is_empty() => None,
+            HirKind::Concat(ref x) => {
+                Some(Frame::Concat {
+                    head: &x[0],
+                    tail: &x[1..],
+                })
+            }
+            HirKind::Alternation(ref x) if x.is_empty() => None,
+            HirKind::Alternation(ref x) => {
+                Some(Frame::Alternation {
+                    head: &x[0],
+                    tail: &x[1..],
+                })
+            }
+            _ => None,
+        }
+    }
+
+    /// Pops the given frame. If the frame has an additional inductive step,
+    /// then return it, otherwise return `None`.
+    fn pop(&self, induct: Frame<'a>) -> Option<Frame<'a>> {
+        match induct {
+            Frame::Repetition(_) => None,
+            Frame::Group(_) => None,
+            Frame::Concat { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Concat {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+            Frame::Alternation { tail, .. } => {
+                if tail.is_empty() {
+                    None
+                } else {
+                    Some(Frame::Alternation {
+                        head: &tail[0],
+                        tail: &tail[1..],
+                    })
+                }
+            }
+        }
+    }
+}
+
+impl<'a> Frame<'a> {
+    /// Perform the next inductive step on this frame and return the next
+    /// child HIR node to visit.
+    fn child(&self) -> &'a Hir {
+        match *self {
+            Frame::Repetition(rep) => &rep.hir,
+            Frame::Group(group) => &group.hir,
+            Frame::Concat { head, .. } => head,
+            Frame::Alternation { head, .. } => head,
+        }
+    }
+}
--- a/third_party/rust/regex-syntax/src/lib.rs
+++ b/third_party/rust/regex-syntax/src/lib.rs
@@ -1,2214 +1,221 @@
-// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
 // http://rust-lang.org/COPYRIGHT.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
 /*!
-This crate provides a regular expression parser and an abstract syntax for
-regular expressions. The abstract syntax is defined by the `Expr` type. The
-concrete syntax is enumerated in the
-[`regex`](../regex/index.html#syntax)
-crate documentation.
+This crate provides a robust regular expression parser.
+
+This crate defines two primary types:
 
-Note that since this crate is first and foremost an implementation detail for
-the `regex` crate, it may experience more frequent breaking changes. It is
-exposed as a separate crate so that others may use it to do analysis on regular
-expressions or even build their own matching engine.
+* [`Ast`](ast/enum.Ast.html) is the abstract syntax of a regular expression.
+  An abstract syntax corresponds to a *structured representation* of the
+  concrete syntax of a regular expression, where the concrete syntax is the
+  pattern string itself (e.g., `foo(bar)+`). Given some abstract syntax, it
+  can be converted back to the original concrete syntax (modulo some details,
+  like whitespace). To a first approximation, the abstract syntax is complex
+  and difficult to analyze.
+* [`Hir`](hir/struct.Hir.html) is the high-level intermediate representation
+  ("HIR" or "high-level IR" for short) of regular expression. It corresponds to
+  an intermediate state of a regular expression that sits between the abstract
+  syntax and the low level compiled opcodes that are eventually responsible for
+  executing a regular expression search. Given some high-level IR, it is not
+  possible to produce the original concrete syntax (although it is possible to
+  produce an equivalent conrete syntax, but it will likely scarcely resemble
+  the original pattern). To a first approximation, the high-level IR is simple
+  and easy to analyze.
+
+These two types come with conversion routines:
 
-# Example: parsing an expression
+* An [`ast::parse::Parser`](ast/parse/struct.Parser.html) converts concrete
+  syntax (a `&str`) to an [`Ast`](ast/enum.Ast.html).
+* A [`hir::translate::Translator`](hir/translate/struct.Translator.html)
+  converts an [`Ast`](ast/enum.Ast.html) to a [`Hir`](hir/struct.Hir.html).
 
-Parsing a regular expression can be done with the `Expr::parse` function.
+As a convenience, the above two conversion routines are combined into one via
+the top-level [`Parser`](struct.Parser.html) type. This `Parser` will first
+convert your pattern to an `Ast` and then convert the `Ast` to an `Hir`.
+
 
-```rust
-use regex_syntax::Expr;
+# Example
+
+This example shows how to parse a pattern string into its HIR:
 
-assert_eq!(Expr::parse(r"ab|yz").unwrap(), Expr::Alternate(vec![
-    Expr::Literal { chars: vec!['a', 'b'], casei: false },
-    Expr::Literal { chars: vec!['y', 'z'], casei: false },
+```
+use regex_syntax::Parser;
+use regex_syntax::hir::{self, Hir};
+
+let hir = Parser::new().parse("a|b").unwrap();
+assert_eq!(hir, Hir::alternation(vec![
+    Hir::literal(hir::Literal::Unicode('a')),
+    Hir::literal(hir::Literal::Unicode('b')),
 ]));
 ```
 
-# Example: inspecting an error
+
+# Concrete syntax supported
+
+The concrete syntax is documented as part of the public API of the
+[`regex` crate](https://docs.rs/regex/%2A/regex/#syntax).
 
-The parser in this crate provides very detailed error values. For example,
-if an invalid character class range is given:
+
+# Input safety
 
-```rust
-use regex_syntax::{Expr, ErrorKind};
+A key feature of this library is that it is safe to use with end user facing
+input. This plays a significant role in the internal implementation. In
+particular:
 
-let err = Expr::parse(r"[z-a]").unwrap_err();
-assert_eq!(err.position(), 4);
-assert_eq!(err.kind(), &ErrorKind::InvalidClassRange {
-    start: 'z',
-    end: 'a',
-});
-```
+1. Parsers provide a `nest_limit` option that permits callers to control how
+   deeply nested a regular expression is allowed to be. This makes it possible
+   to do case analysis over an `Ast` or an `Hir` using recursion without
+   worrying about stack overflow.
+2. Since relying on a particular stack size is brittle, this crate goes to
+   great lengths to ensure that all interactions with both the `Ast` and the
+   `Hir` do not use recursion. Namely, they use constant stack space and heap
+   space proportional to the size of the original pattern string (in bytes).
+   This includes the type's corresponding destructors. (One exception to this
+   is literal extraction, but this will eventually get fixed.)
+
+
+# Error reporting
 
-Or unbalanced parentheses:
+The `Display` implementations on all `Error` types exposed in this library
+provide nice human readable errors that are suitable for showing to end users
+in a monospace font.
 
-```rust
-use regex_syntax::{Expr, ErrorKind};
+
+# Literal extraction
 
-let err = Expr::parse(r"ab(cd").unwrap_err();
-assert_eq!(err.position(), 2);
-assert_eq!(err.kind(), &ErrorKind::UnclosedParen);
-```
+This crate provides limited support for
+[literal extraction from `Hir` values](hir/literal/struct.Literals.html).
+Be warned that literal extraction currently uses recursion, and therefore,
+stack size proportional to the size of the `Hir`.
+
+The purpose of literal extraction is to speed up searches. That is, if you
+know a regular expression must match a prefix or suffix literal, then it is
+often quicker to search for instances of that literal, and then confirm or deny
+the match using the full regular expression engine. These optimizations are
+done automatically in the `regex` crate.
 */
 
 #![deny(missing_docs)]
-#![cfg_attr(test, deny(warnings))]
+
+extern crate ucd_util;
 
-#[cfg(test)] extern crate quickcheck;
-#[cfg(test)] extern crate rand;
+pub use error::{Error, Result};
+pub use parser::{Parser, ParserBuilder};
 
-mod literals;
+pub mod ast;
+mod either;
+mod error;
+pub mod hir;
 mod parser;
 mod unicode;
-
-use std::ascii;
-use std::char;
-use std::cmp::{Ordering, max, min};
-use std::fmt;
-use std::iter::IntoIterator;
-use std::ops::Deref;
-use std::result;
-use std::slice;
-use std::u8;
-use std::vec;
-
-use unicode::case_folding;
-
-use self::Expr::*;
-use self::Repeater::*;
-
-use parser::{Flags, Parser};
-
-pub use literals::{Literals, Lit};
-
-/// A regular expression abstract syntax tree.
-///
-/// An `Expr` represents the abstract syntax of a regular expression.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum Expr {
-    /// An empty regex (which never matches any text).
-    Empty,
-    /// A sequence of one or more literal characters to be matched.
-    Literal {
-        /// The characters.
-        chars: Vec<char>,
-        /// Whether to match case insensitively.
-        casei: bool,
-    },
-    /// A sequence of one or more literal bytes to be matched.
-    LiteralBytes {
-        /// The bytes.
-        bytes: Vec<u8>,
-        /// Whether to match case insensitively.
-        ///
-        /// The interpretation of "case insensitive" in this context is
-        /// ambiguous since `bytes` can be arbitrary. However, a good heuristic
-        /// is to assume that the bytes are ASCII-compatible and do simple
-        /// ASCII case folding.
-        casei: bool,
-    },
-    /// Match any character.
-    AnyChar,
-    /// Match any character, excluding new line (`0xA`).
-    AnyCharNoNL,
-    /// Match any byte.
-    AnyByte,
-    /// Match any byte, excluding new line (`0xA`).
-    AnyByteNoNL,
-    /// A character class.
-    Class(CharClass),
-    /// A character class with byte ranges only.
-    ClassBytes(ByteClass),
-    /// Match the start of a line or beginning of input.
-    StartLine,
-    /// Match the end of a line or end of input.
-    EndLine,
-    /// Match the beginning of input.
-    StartText,
-    /// Match the end of input.
-    EndText,
-    /// Match a word boundary (word character on one side and a non-word
-    /// character on the other).
-    WordBoundary,
-    /// Match a position that is not a word boundary (word or non-word
-    /// characters on both sides).
-    NotWordBoundary,
-    /// Match an ASCII word boundary.
-    WordBoundaryAscii,
-    /// Match a position that is not an ASCII word boundary.
-    NotWordBoundaryAscii,
-    /// A group, possibly non-capturing.
-    Group {
-        /// The expression inside the group.
-        e: Box<Expr>,
-        /// The capture index (starting at `1`) only for capturing groups.
-        i: Option<usize>,
-        /// The capture name, only for capturing named groups.
-        name: Option<String>,
-    },
-    /// A repeat operator (`?`, `*`, `+` or `{m,n}`).
-    Repeat {
-        /// The expression to be repeated. Limited to literals, `.`, classes
-        /// or grouped expressions.
-        e: Box<Expr>,
-        /// The type of repeat operator used.
-        r: Repeater,
-        /// Whether the repeat is greedy (match the most) or not (match the
-        /// least).
-        greedy: bool,
-    },
-    /// A concatenation of expressions. Must be matched one after the other.
-    ///
-    /// N.B. A concat expression can only appear at the top-level or
-    /// immediately inside a group expression.
-    Concat(Vec<Expr>),
-    /// An alternation of expressions. Only one must match.
-    ///
-    /// N.B. An alternate expression can only appear at the top-level or
-    /// immediately inside a group expression.
-    Alternate(Vec<Expr>),
-}
-
-type CaptureIndex = Option<usize>;
-
-type CaptureName = Option<String>;
-
-/// The type of a repeat operator expression.
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum Repeater {
-    /// Match zero or one (`?`).
-    ZeroOrOne,
-    /// Match zero or more (`*`).
-    ZeroOrMore,
-    /// Match one or more (`+`).
-    OneOrMore,
-    /// Match for at least `min` and at most `max` (`{m,n}`).
-    ///
-    /// When `max` is `None`, there is no upper bound on the number of matches.
-    Range {
-        /// Lower bound on the number of matches.
-        min: u32,
-        /// Optional upper bound on the number of matches.
-        max: Option<u32>,
-    },
-}
-
-impl Repeater {
-    /// Returns true if and only if this repetition can match the empty string.
-    fn matches_empty(&self) -> bool {
-        use self::Repeater::*;
-        match *self {
-            ZeroOrOne => true,
-            ZeroOrMore => true,
-            OneOrMore => false,
-            Range { min, .. } => min == 0,
-        }
-    }
-}
-
-/// A character class.
-///
-/// A character class has a canonical format that the parser guarantees. Its
-/// canonical format is defined by the following invariants:
-///
-/// 1. Given any Unicode scalar value, it is matched by *at most* one character
-///    range in a canonical character class.
-/// 2. Every adjacent character range is separated by at least one Unicode
-///    scalar value.
-/// 3. Given any pair of character ranges `r1` and `r2`, if
-///    `r1.end < r2.start`, then `r1` comes before `r2` in a canonical
-///    character class.
-///
-/// In sum, any `CharClass` produced by this crate's parser is a sorted
-/// sequence of non-overlapping ranges. This makes it possible to test whether
-/// a character is matched by a class with a binary search.
-///
-/// If the case insensitive flag was set when parsing a character class, then
-/// simple case folding is done automatically. For example, `(?i)[a-c]` is
-/// automatically translated to `[a-cA-C]`.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct CharClass {
-    ranges: Vec<ClassRange>,
-}
-
-/// A single inclusive range in a character class.
-///
-/// Since range boundaries are defined by Unicode scalar values, the boundaries
-/// can never be in the open interval `(0xD7FF, 0xE000)`. However, a range may
-/// *cover* codepoints that are not scalar values.
-///
-/// Note that this has a few convenient impls on `PartialEq` and `PartialOrd`
-/// for testing whether a character is contained inside a given range.
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
-pub struct ClassRange {
-    /// The start character of the range.
-    ///
-    /// This must be less than or equal to `end`.
-    pub start: char,
-
-    /// The end character of the range.
-    ///
-    /// This must be greater than or equal to `start`.
-    pub end: char,
-}
-
-/// A byte class for byte ranges only.
-///
-/// A byte class has a canonical format that the parser guarantees. Its
-/// canonical format is defined by the following invariants:
-///
-/// 1. Given any byte, it is matched by *at most* one byte range in a canonical
-///    character class.
-/// 2. Every adjacent byte range is separated by at least one byte.
-/// 3. Given any pair of byte ranges `r1` and `r2`, if
-///    `r1.end < r2.start`, then `r1` comes before `r2` in a canonical
-///    character class.
-///
-/// In sum, any `ByteClass` produced by this crate's parser is a sorted
-/// sequence of non-overlapping ranges. This makes it possible to test whether
-/// a byte is matched by a class with a binary search.
-///
-/// If the case insensitive flag was set when parsing a character class,
-/// then simple ASCII-only case folding is done automatically. For example,
-/// `(?i)[a-c]` is automatically translated to `[a-cA-C]`.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub struct ByteClass {
-    ranges: Vec<ByteRange>,
-}
-
-/// A single inclusive range in a byte class.
-///
-/// Note that this has a few convenient impls on `PartialEq` and `PartialOrd`
-/// for testing whether a byte is contained inside a given range.
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
-pub struct ByteRange {
-    /// The start byte of the range.
-    ///
-    /// This must be less than or equal to `end`.
-    pub start: u8,
-
-    /// The end byte of the range.
-    ///
-    /// This must be greater than or equal to `end`.
-    pub end: u8,
-}
-
-/// A builder for configuring regular expression parsing.
-///
-/// This allows setting the default values of flags and other options, such
-/// as the maximum nesting depth.
-#[derive(Clone, Debug)]
-pub struct ExprBuilder {
-    flags: Flags,
-    nest_limit: usize,
-}
-
-impl ExprBuilder {
-    /// Create a new builder for configuring expression parsing.
-    ///
-    /// Note that all flags are disabled by default.
-    pub fn new() -> ExprBuilder {
-        ExprBuilder {
-            flags: Flags::default(),
-            nest_limit: 200,
-        }
-    }
-
-    /// Set the default value for the case insensitive (`i`) flag.
-    pub fn case_insensitive(mut self, yes: bool) -> ExprBuilder {
-        self.flags.casei = yes;
-        self
-    }
-
-    /// Set the default value for the multi-line matching (`m`) flag.
-    pub fn multi_line(mut self, yes: bool) -> ExprBuilder {
-        self.flags.multi = yes;
-        self
-    }
-
-    /// Set the default value for the any character (`s`) flag.
-    pub fn dot_matches_new_line(mut self, yes: bool) -> ExprBuilder {
-        self.flags.dotnl = yes;
-        self
-    }
-
-    /// Set the default value for the greedy swap (`U`) flag.
-    pub fn swap_greed(mut self, yes: bool) -> ExprBuilder {
-        self.flags.swap_greed = yes;
-        self
-    }
-
-    /// Set the default value for the ignore whitespace (`x`) flag.
-    pub fn ignore_whitespace(mut self, yes: bool) -> ExprBuilder {
-        self.flags.ignore_space = yes;
-        self
-    }
-
-    /// Set the default value for the Unicode (`u`) flag.
-    ///
-    /// If `yes` is false, then `allow_bytes` is set to true.
-    pub fn unicode(mut self, yes: bool) -> ExprBuilder {
-        self.flags.unicode = yes;
-        if !yes {
-            self.allow_bytes(true)
-        } else {
-            self
-        }
-    }
-
-    /// Whether the parser allows matching arbitrary bytes or not.
-    ///
-    /// When the `u` flag is disabled (either with this builder or in the
-    /// expression itself), the parser switches to interpreting the expression
-    /// as matching arbitrary bytes instead of Unicode codepoints. For example,
-    /// the expression `(?u:\xFF)` matches the *codepoint* `\xFF`, which
-    /// corresponds to the UTF-8 byte sequence `\xCE\xBF`. Conversely,
-    /// `(?-u:\xFF)` matches the *byte* `\xFF`, which is not valid UTF-8.
-    ///
-    /// When `allow_bytes` is disabled (the default), an expression like
-    /// `(?-u:\xFF)` will cause the parser to return an error, since it would
-    /// otherwise match invalid UTF-8. When enabled, it will be allowed.
-    pub fn allow_bytes(mut self, yes: bool) -> ExprBuilder {
-        self.flags.allow_bytes = yes;
-        self
-    }
-
-    /// Set the nesting limit for regular expression parsing.
-    ///
-    /// Regular expressions that nest more than this limit will result in a
-    /// `StackExhausted` error.
-    pub fn nest_limit(mut self, limit: usize) -> ExprBuilder {
-        self.nest_limit = limit;
-        self
-    }
-
-    /// Parse a string as a regular expression using the current configuraiton.
-    pub fn parse(self, s: &str) -> Result<Expr> {
-        Parser::parse(s, self.flags).and_then(|e| e.simplify(self.nest_limit))
-    }
-}
-
-impl Expr {
-    /// Parses a string in a regular expression syntax tree.
-    ///
-    /// This is a convenience method for parsing an expression using the
-    /// default configuration. To tweak parsing options (such as which flags
-    /// are enabled by default), use the `ExprBuilder` type.
-    pub fn parse(s: &str) -> Result<Expr> {
-        ExprBuilder::new().parse(s)
-    }
-
-    /// Returns true iff the expression can be repeated by a quantifier.
-    fn can_repeat(&self) -> bool {
-        match *self {
-            Literal{..} | LiteralBytes{..}
-            | AnyChar | AnyCharNoNL | AnyByte | AnyByteNoNL
-            | Class(_) | ClassBytes(_)
-            | StartLine | EndLine | StartText | EndText
-            | WordBoundary | NotWordBoundary
-            | WordBoundaryAscii | NotWordBoundaryAscii
-            | Group{..}
-            => true,
-            _ => false,
-        }
-    }
-
-    fn simplify(self, nest_limit: usize) -> Result<Expr> {
-        fn combine_literals(es: &mut Vec<Expr>, e: Expr) {
-            match (es.pop(), e) {
-                (None, e) => es.push(e),
-                (Some(Literal { chars: mut chars1, casei: casei1 }),
-                      Literal { chars: chars2, casei: casei2 }) => {
-                    if casei1 == casei2 {
-                        chars1.extend(chars2);
-                        es.push(Literal { chars: chars1, casei: casei1 });
-                    } else {
-                        es.push(Literal { chars: chars1, casei: casei1 });
-                        es.push(Literal { chars: chars2, casei: casei2 });
-                    }
-                }
-                (Some(LiteralBytes { bytes: mut bytes1, casei: casei1 }),
-                      LiteralBytes { bytes: bytes2, casei: casei2 }) => {
-                    if casei1 == casei2 {
-                        bytes1.extend(bytes2);
-                        es.push(LiteralBytes { bytes: bytes1, casei: casei1 });
-                    } else {
-                        es.push(LiteralBytes { bytes: bytes1, casei: casei1 });
-                        es.push(LiteralBytes { bytes: bytes2, casei: casei2 });
-                    }
-                }
-                (Some(e1), e2) => {
-                    es.push(e1);
-                    es.push(e2);
-                }
-            }
-        }
-        fn simp(expr: Expr, recurse: usize, limit: usize) -> Result<Expr> {
-            if recurse > limit {
-                return Err(Error {
-                    pos: 0,
-                    surround: "".to_owned(),
-                    kind: ErrorKind::StackExhausted,
-                });
-            }
-            let simplify = |e| simp(e, recurse + 1, limit);
-            Ok(match expr {
-                Repeat { e, r, greedy } => Repeat {
-                    e: Box::new(try!(simplify(*e))),
-                    r: r,
-                    greedy: greedy,
-                },
-                Group { e, i, name } => {
-                    let e = try!(simplify(*e));
-                    if i.is_none() && name.is_none() && e.can_repeat() {
-                        e
-                    } else {
-                        Group { e: Box::new(e), i: i, name: name }
-                    }
-                }
-                Concat(es) => {
-                    let mut new_es = Vec::with_capacity(es.len());
-                    for e in es {
-                        combine_literals(&mut new_es, try!(simplify(e)));
-                    }
-                    if new_es.len() == 1 {
-                        new_es.pop().unwrap()
-                    } else {
-                        Concat(new_es)
-                    }
-                }
-                Alternate(es) => {
-                    let mut new_es = Vec::with_capacity(es.len());
-                    for e in es {
-                        new_es.push(try!(simplify(e)));
-                    }
-                    Alternate(new_es)
-                }
-                e => e,
-            })
-        }
-        simp(self, 0, nest_limit)
-    }
-
-    /// Returns a set of literal prefixes extracted from this expression.
-    pub fn prefixes(&self) -> Literals {
-        let mut lits = Literals::empty();
-        lits.union_prefixes(self);
-        lits
-    }
-
-    /// Returns a set of literal suffixes extracted from this expression.
-    pub fn suffixes(&self) -> Literals {
-        let mut lits = Literals::empty();
-        lits.union_suffixes(self);
-        lits
-    }
-
-    /// Returns true if and only if the expression is required to match from
-    /// the beginning of text.
-    pub fn is_anchored_start(&self) -> bool {
-        match *self {
-            Repeat { ref e, r, .. } => {
-                !r.matches_empty() && e.is_anchored_start()
-            }
-            Group { ref e, .. } => e.is_anchored_start(),
-            Concat(ref es) => es[0].is_anchored_start(),
-            Alternate(ref es) => es.iter().all(|e| e.is_anchored_start()),
-            StartText => true,
-            _ => false,
-        }
-    }
-
-    /// Returns true if and only if the expression has at least one matchable
-    /// sub-expression that must match the beginning of text.
-    pub fn has_anchored_start(&self) -> bool {
-        match *self {
-            Repeat { ref e, r, .. } => {
-                !r.matches_empty() && e.has_anchored_start()
-            }
-            Group { ref e, .. } => e.has_anchored_start(),
-            Concat(ref es) => es[0].has_anchored_start(),
-            Alternate(ref es) => es.iter().any(|e| e.has_anchored_start()),
-            StartText => true,
-            _ => false,
-        }
-    }
-
-    /// Returns true if and only if the expression is required to match at the
-    /// end of the text.
-    pub fn is_anchored_end(&self) -> bool {
-        match *self {
-            Repeat { ref e, r, .. } => {
-                !r.matches_empty() && e.is_anchored_end()
-            }
-            Group { ref e, .. } => e.is_anchored_end(),
-            Concat(ref es) => es[es.len() - 1].is_anchored_end(),
-            Alternate(ref es) => es.iter().all(|e| e.is_anchored_end()),
-            EndText => true,
-            _ => false,
-        }
-    }
-
-    /// Returns true if and only if the expression has at least one matchable
-    /// sub-expression that must match the beginning of text.
-    pub fn has_anchored_end(&self) -> bool {
-        match *self {
-            Repeat { ref e, r, .. } => {
-                !r.matches_empty() && e.has_anchored_end()
-            }
-            Group { ref e, .. } => e.has_anchored_end(),
-            Concat(ref es) => es[es.len() - 1].has_anchored_end(),
-            Alternate(ref es) => es.iter().any(|e| e.has_anchored_end()),
-            EndText => true,
-            _ => false,
-        }
-    }
-
-    /// Returns true if and only if the expression contains sub-expressions
-    /// that can match arbitrary bytes.
-    pub fn has_bytes(&self) -> bool {
-        match *self {
-            Repeat { ref e, .. } => e.has_bytes(),
-            Group { ref e, .. } => e.has_bytes(),
-            Concat(ref es) => es.iter().any(|e| e.has_bytes()),
-            Alternate(ref es) => es.iter().any(|e| e.has_bytes()),
-            LiteralBytes{..} => true,
-            AnyByte | AnyByteNoNL => true,
-            ClassBytes(_) => true,
-            WordBoundaryAscii | NotWordBoundaryAscii => true,
-            _ => false,
-        }
-    }
-}
-
-impl Deref for CharClass {
-    type Target = Vec<ClassRange>;
-    fn deref(&self) -> &Vec<ClassRange> { &self.ranges }
-}
-
-impl IntoIterator for CharClass {
-    type Item = ClassRange;
-    type IntoIter = vec::IntoIter<ClassRange>;
-    fn into_iter(self) -> vec::IntoIter<ClassRange> { self.ranges.into_iter() }
-}
-
-impl<'a> IntoIterator for &'a CharClass {
-    type Item = &'a ClassRange;
-    type IntoIter = slice::Iter<'a, ClassRange>;
-    fn into_iter(self) -> slice::Iter<'a, ClassRange> { self.iter() }
-}
-
-impl CharClass {
-    /// Create a new class from an existing set of ranges.
-    pub fn new(ranges: Vec<ClassRange>) -> CharClass {
-        CharClass { ranges: ranges }
-    }
-
-    /// Create an empty class.
-    fn empty() -> CharClass {
-        CharClass::new(Vec::new())
-    }
-
-    /// Returns true if `c` is matched by this character class.
-    pub fn matches(&self, c: char) -> bool {
-        self.binary_search_by(|range| c.partial_cmp(range).unwrap()).is_ok()
-    }
-
-    /// Removes the given character from the class if it exists.
-    ///
-    /// Note that this takes `O(n)` time in the number of ranges.
-    pub fn remove(&mut self, c: char) {
-        let mut i = match self.binary_search_by(|r| c.partial_cmp(r).unwrap()) {
-            Ok(i) => i,
-            Err(_) => return,
-        };
-        let mut r = self.ranges.remove(i);
-        if r.start == c {
-            r.start = inc_char(c);
-            if r.start > r.end || c == char::MAX {
-                return;
-            }
-            self.ranges.insert(i, r);
-        } else if r.end == c {
-            r.end = dec_char(c);
-            if r.end < r.start || c == '\x00' {
-                return;
-            }
-            self.ranges.insert(0, r);
-        } else {
-            let (mut r1, mut r2) = (r.clone(), r.clone());
-            r1.end = dec_char(c);
-            if r1.start <= r1.end {
-                self.ranges.insert(i, r1);
-                i += 1;
-            }
-            r2.start = inc_char(c);
-            if r2.start <= r2.end {
-                self.ranges.insert(i, r2);
-            }
-        }
-    }
-
-    /// Create a new empty class from this one.
-    fn to_empty(&self) -> CharClass {
-        CharClass { ranges: Vec::with_capacity(self.len()) }
-    }
-
-    /// Create a byte class from this character class.
-    ///
-    /// Codepoints above 0xFF are removed.
-    fn to_byte_class(self) -> ByteClass {
-        ByteClass::new(
-            self.ranges.into_iter()
-                       .filter_map(|r| r.to_byte_range())
-                       .collect()).canonicalize()
-    }
-
-    /// Merge two classes and canonicalize them.
-    #[cfg(test)]
-    fn merge(mut self, other: CharClass) -> CharClass {
-        self.ranges.extend(other);
-        self.canonicalize()
-    }
-
-    /// Canonicalize any sequence of ranges.
-    ///
-    /// This is responsible for enforcing the canonical format invariants
-    /// as described on the docs for the `CharClass` type.
-    fn canonicalize(mut self) -> CharClass {
-        // TODO: Save some cycles here by checking if already canonicalized.
-        self.ranges.sort();
-        let mut ordered = self.to_empty(); // TODO: Do this in place?
-        for candidate in self {
-            // If the candidate overlaps with an existing range, then it must
-            // be the most recent range added because we process the candidates
-            // in order.
-            if let Some(or) = ordered.ranges.last_mut() {
-                if or.overlapping(candidate) {
-                    *or = or.merge(candidate);
-                    continue;
-                }
-            }
-            ordered.ranges.push(candidate);
-        }
-        ordered
-    }
-
-    /// Calculate the intersection of two canonical character classes.
-    ///
-    /// The returned intersection is canonical.
-    fn intersection(&self, other: &CharClass) -> CharClass {
-        if self.ranges.is_empty() || other.ranges.is_empty() {
-            return CharClass::empty();
-        }
-
-        let mut intersection = CharClass::empty();
-
-        let mut iter_a = self.ranges.iter();
-        let mut iter_b = other.ranges.iter();
-        let mut a = iter_a.next().unwrap();
-        let mut b = iter_b.next().unwrap();
-        loop {
-            if let Some(i) = a.intersection(&b) {
-                intersection.ranges.push(i);
-            }
-
-            // If the range with the smaller end didn't match this time,
-            // it won't ever match, so move on to the next one.
-            let (iter, item) = if a.end < b.end {
-                (&mut iter_a, &mut a)
-            } else {
-                (&mut iter_b, &mut b)
-            };
-            match iter.next() {
-                Some(v) => *item = v,
-                None => break, // no more ranges to check, done
-            }
-        }
-
-        intersection.canonicalize()
-    }
-
-    /// Negates the character class.
-    ///
-    /// For all `c` where `c` is a Unicode scalar value, `c` matches `self`
-    /// if and only if `c` does not match `self.negate()`.
-    pub fn negate(mut self) -> CharClass {
-        fn range(s: char, e: char) -> ClassRange { ClassRange::new(s, e) }
-
-        if self.is_empty() {
-            // Inverting an empty range yields all of Unicode.
-            return CharClass {
-                ranges: vec![ClassRange { start: '\x00', end: '\u{10ffff}' }],
-            };
-        }
-        self = self.canonicalize();
-        let mut inv = self.to_empty();
-        if self[0].start > '\x00' {
-            inv.ranges.push(range('\x00', dec_char(self[0].start)));
-        }
-        for win in self.windows(2) {
-            inv.ranges.push(range(inc_char(win[0].end),
-                                  dec_char(win[1].start)));
-        }
-        if self[self.len() - 1].end < char::MAX {
-            inv.ranges.push(range(inc_char(self[self.len() - 1].end),
-                                  char::MAX));
-        }
-        inv
-    }
-
-    /// Apply case folding to this character class.
-    ///
-    /// N.B. Applying case folding to a negated character class probably
-    /// won't produce the expected result. e.g., `(?i)[^x]` really should
-    /// match any character sans `x` and `X`, but if `[^x]` is negated
-    /// before being case folded, you'll end up matching any character.
-    pub fn case_fold(self) -> CharClass {
-        let mut folded = self.to_empty();
-        for r in self {
-            // Applying case folding to a range is expensive because *every*
-            // character needs to be examined. Thus, we avoid that drudgery
-            // if no character in the current range is in our case folding
-            // table.
-            if r.needs_case_folding() {
-                folded.ranges.extend(r.case_fold());
-            }
-            folded.ranges.push(r);
-        }
-        folded.canonicalize()
-    }
-
-    /// Returns the number of characters that match this class.
-    fn num_chars(&self) -> usize {
-        self.ranges.iter()
-            .map(|&r| 1 + (r.end as u32) - (r.start as u32))
-            .fold(0, |acc, len| acc + len)
-            as usize
-    }
-}
-
-impl ClassRange {
-    /// Create a new class range.
-    ///
-    /// If `end < start`, then the two values are swapped so that
-    /// the invariant `start <= end` is preserved.
-    fn new(start: char, end: char) -> ClassRange {
-        if start <= end {
-            ClassRange { start: start, end: end }
-        } else {
-            ClassRange { start: end, end: start }
-        }
-    }
-
-    /// Translate this to a byte class.
-    ///
-    /// If the start codepoint exceeds 0xFF, then this returns `None`.
-    ///
-    /// If the end codepoint exceeds 0xFF, then it is set to 0xFF.
-    fn to_byte_range(self) -> Option<ByteRange> {
-        if self.start > '\u{FF}' {
-            None
-        } else {
-            let s = self.start as u8;
-            let e = min('\u{FF}', self.end) as u8;
-            Some(ByteRange::new(s, e))
-        }
-    }
-
-    /// Create a range of one character.
-    fn one(c: char) -> ClassRange {
-        ClassRange { start: c, end: c }
-    }
-
-    /// Returns true if and only if the two ranges are overlapping. Note that
-    /// since ranges are inclusive, `a-c` and `d-f` are overlapping!
-    fn overlapping(self, other: ClassRange) -> bool {
-        max(self.start, other.start) <= inc_char(min(self.end, other.end))
-    }
-
-    /// Returns the intersection of the two ranges if they have common
-    /// characters, `None` otherwise.
-    fn intersection(&self, other: &ClassRange) -> Option<ClassRange> {
-        let start = max(self.start, other.start);
-        let end = min(self.end, other.end);
-        if start <= end {
-            Some(ClassRange::new(start, end))
-        } else {
-            None
-        }
-    }
-
-    /// Creates a new range representing the union of `self` and `other.
-    fn merge(self, other: ClassRange) -> ClassRange {
-        ClassRange {
-            start: min(self.start, other.start),
-            end: max(self.end, other.end),
-        }
-    }
-
-    /// Returns true if and only if this range contains a character that is
-    /// in the case folding table.
-    fn needs_case_folding(self) -> bool {
-        case_folding::C_plus_S_both_table
-        .binary_search_by(|&(c, _)| self.partial_cmp(&c).unwrap()).is_ok()
-    }
-
-    /// Apply case folding to this range.
-    ///
-    /// Since case folding might add characters such that the range is no
-    /// longer contiguous, this returns multiple class ranges. They are in
-    /// canonical order.
-    fn case_fold(self) -> Vec<ClassRange> {
-        let table = &case_folding::C_plus_S_both_table;
-        let (s, e) = (self.start as u32, self.end as u32 + 1);
-        let mut start = self.start;
-        let mut end = start;
-        let mut next_case_fold = '\x00';
-        let mut ranges = Vec::with_capacity(10);
-        for mut c in (s..e).filter_map(char::from_u32) {
-            if c >= next_case_fold {
-                c = match simple_case_fold_both_result(c) {
-                    Ok(i) => {
-                        for &(c1, c2) in &table[i..] {
-                            if c1 != c {
-                                break;
-                            }
-                            if c2 != inc_char(end) {
-                                ranges.push(ClassRange::new(start, end));
-                                start = c2;
-                            }
-                            end = c2;
-                        }
-                        continue;
-                    }
-                    Err(i) => {
-                        if i < table.len() {
-                            next_case_fold = table[i].0;
-                        } else {
-                            next_case_fold = '\u{10FFFF}';
-                        }
-                        c
-                    }
-                };
-            }
-            // The fast path. We know this character doesn't have an entry
-            // in the case folding table.
-            if c != inc_char(end) {
-                ranges.push(ClassRange::new(start, end));
-                start = c;
-            }
-            end = c;
-        }
-        ranges.push(ClassRange::new(start, end));
-        ranges
-    }
-}
-
-impl PartialEq<char> for ClassRange {
-    #[inline]
-    fn eq(&self, other: &char) -> bool {
-        self.start <= *other && *other <= self.end
-    }
-}
-
-impl PartialEq<ClassRange> for char {
-    #[inline]
-    fn eq(&self, other: &ClassRange) -> bool {
-        other.eq(self)
-    }
-}
-
-impl PartialOrd<char> for ClassRange {
-    #[inline]
-    fn partial_cmp(&self, other: &char) -> Option<Ordering> {
-        Some(if self == other {
-            Ordering::Equal
-        } else if *other > self.end {
-            Ordering::Greater
-        } else {
-            Ordering::Less
-        })
-    }
-}
-
-impl PartialOrd<ClassRange> for char {
-    #[inline]
-    fn partial_cmp(&self, other: &ClassRange) -> Option<Ordering> {
-        other.partial_cmp(self).map(|o| o.reverse())
-    }
-}
-
-impl ByteClass {
-    /// Create a new class from an existing set of ranges.
-    pub fn new(ranges: Vec<ByteRange>) -> ByteClass {
-        ByteClass { ranges: ranges }
-    }
-
-    /// Returns true if `b` is matched by this byte class.
-    pub fn matches(&self, b: u8) -> bool {
-        self.binary_search_by(|range| b.partial_cmp(range).unwrap()).is_ok()
-    }
-
-    /// Removes the given byte from the class if it exists.
-    ///
-    /// Note that this takes `O(n)` time in the number of ranges.
-    pub fn remove(&mut self, b: u8) {
-        let mut i = match self.binary_search_by(|r| b.partial_cmp(r).unwrap()) {
-            Ok(i) => i,
-            Err(_) => return,
-        };
-        let mut r = self.ranges.remove(i);
-        if r.start == b {
-            r.start = b.saturating_add(1);
-            if r.start > r.end || b == u8::MAX {
-                return;
-            }
-            self.ranges.insert(i, r);
-        } else if r.end == b {
-            r.end = b.saturating_sub(1);
-            if r.end < r.start || b == b'\x00' {
-                return;
-            }
-            self.ranges.insert(0, r);
-        } else {
-            let (mut r1, mut r2) = (r.clone(), r.clone());
-            r1.end = b.saturating_sub(1);
-            if r1.start <= r1.end {
-                self.ranges.insert(i, r1);
-                i += 1;
-            }
-            r2.start = b.saturating_add(1);
-            if r2.start <= r2.end {
-                self.ranges.insert(i, r2);
-            }
-        }
-    }
-
-    /// Create a new empty class from this one.
-    fn to_empty(&self) -> ByteClass {
-        ByteClass { ranges: Vec::with_capacity(self.len()) }
-    }
-
-    /// Canonicalze any sequence of ranges.
-    ///
-    /// This is responsible for enforcing the canonical format invariants
-    /// as described on the docs for the `ByteClass` type.
-    fn canonicalize(mut self) -> ByteClass {
-        // TODO: Save some cycles here by checking if already canonicalized.
-        self.ranges.sort();
-        let mut ordered = self.to_empty(); // TODO: Do this in place?
-        for candidate in self {
-            // If the candidate overlaps with an existing range, then it must
-            // be the most recent range added because we process the candidates
-            // in order.
-            if let Some(or) = ordered.ranges.last_mut() {
-                if or.overlapping(candidate) {
-                    *or = or.merge(candidate);
-                    continue;
-                }
-            }
-            ordered.ranges.push(candidate);
-        }
-        ordered
-    }
-
-    /// Negates the byte class.
-    ///
-    /// For all `b` where `b` is a byte, `b` matches `self` if and only if `b`
-    /// does not match `self.negate()`.
-    pub fn negate(mut self) -> ByteClass {
-        fn range(s: u8, e: u8) -> ByteRange { ByteRange::new(s, e) }
-
-        if self.is_empty() {
-            // Inverting an empty range yields all bytes.
-            return ByteClass {
-                ranges: vec![ByteRange { start: b'\x00', end: b'\xff' }],
-            };
-        }
-        self = self.canonicalize();
-        let mut inv = self.to_empty();
-        if self[0].start > b'\x00' {
-            inv.ranges.push(range(b'\x00', self[0].start.saturating_sub(1)));
-        }
-        for win in self.windows(2) {
-            inv.ranges.push(range(win[0].end.saturating_add(1),
-                                  win[1].start.saturating_sub(1)));
-        }
-        if self[self.len() - 1].end < u8::MAX {
-            inv.ranges.push(range(self[self.len() - 1].end.saturating_add(1),
-                                  u8::MAX));
-        }
-        inv
-    }
-
-    /// Apply case folding to this byte class.
-    ///
-    /// This assumes that the bytes in the ranges are ASCII compatible.
-    ///
-    /// N.B. Applying case folding to a negated character class probably
-    /// won't produce the expected result. e.g., `(?i)[^x]` really should
-    /// match any character sans `x` and `X`, but if `[^x]` is negated
-    /// before being case folded, you'll end up matching any character.
-    pub fn case_fold(self) -> ByteClass {
-        let mut folded = self.to_empty();
-        for r in self {
-            folded.ranges.extend(r.case_fold());
-        }
-        folded.canonicalize()
-    }
-
-    /// Returns the number of bytes that match this class.
-    fn num_bytes(&self) -> usize {
-        self.ranges.iter()
-            .map(|&r| 1 + (r.end as u32) - (r.start as u32))
-            .fold(0, |acc, len| acc + len)
-            as usize
-    }
-}
-
-impl ByteRange {
-    /// Create a new class range.
-    ///
-    /// If `end < start`, then the two values are swapped so that
-    /// the invariant `start <= end` is preserved.
-    fn new(start: u8, end: u8) -> ByteRange {
-        if start <= end {
-            ByteRange { start: start, end: end }
-        } else {
-            ByteRange { start: end, end: start }
-        }
-    }
-
-    /// Returns true if and only if the two ranges are overlapping. Note that
-    /// since ranges are inclusive, `a-c` and `d-f` are overlapping!
-    fn overlapping(self, other: ByteRange) -> bool {
-        max(self.start, other.start)
-        <= min(self.end, other.end).saturating_add(1)
-    }
-
-    /// Returns true if and only if the intersection of self and other is non
-    /// empty.
-    fn is_intersect_empty(self, other: ByteRange) -> bool {
-        max(self.start, other.start) > min(self.end, other.end)
-    }
-
-    /// Creates a new range representing the union of `self` and `other.
-    fn merge(self, other: ByteRange) -> ByteRange {
-        ByteRange {
-            start: min(self.start, other.start),
-            end: max(self.end, other.end),
-        }
-    }
-
-    /// Apply case folding to this range.
-    ///
-    /// Since case folding might add bytes such that the range is no
-    /// longer contiguous, this returns multiple byte ranges.
-    ///
-    /// This assumes that the bytes in this range are ASCII compatible.
-    fn case_fold(self) -> Vec<ByteRange> {
-        // So much easier than Unicode case folding!
-        let mut ranges = vec![self];
-        if !ByteRange::new(b'a', b'z').is_intersect_empty(self) {
-            let lower = max(self.start, b'a');
-            let upper = min(self.end, b'z');
-            ranges.push(ByteRange::new(lower - 32, upper - 32));
-        }
-        if !ByteRange::new(b'A', b'Z').is_intersect_empty(self) {
-            let lower = max(self.start, b'A');
-            let upper = min(self.end, b'Z');
-            ranges.push(ByteRange::new(lower + 32, upper + 32));
-        }
-        ranges
-    }
-}
-
-impl Deref for ByteClass {
-    type Target = Vec<ByteRange>;
-    fn deref(&self) -> &Vec<ByteRange> { &self.ranges }
-}
-
-impl IntoIterator for ByteClass {
-    type Item = ByteRange;
-    type IntoIter = vec::IntoIter<ByteRange>;
-    fn into_iter(self) -> vec::IntoIter<ByteRange> { self.ranges.into_iter() }
-}
-
-impl<'a> IntoIterator for &'a ByteClass {
-    type Item = &'a ByteRange;
-    type IntoIter = slice::Iter<'a, ByteRange>;
-    fn into_iter(self) -> slice::Iter<'a, ByteRange> { self.iter() }
-}
-
-impl PartialEq<u8> for ByteRange {
-    #[inline]
-    fn eq(&self, other: &u8) -> bool {
-        self.start <= *other && *other <= self.end
-    }
-}
-
-impl PartialEq<ByteRange> for u8 {
-    #[inline]
-    fn eq(&self, other: &ByteRange) -> bool {
-        other.eq(self)
-    }
-}
-
-impl PartialOrd<u8> for ByteRange {
-    #[inline]
-    fn partial_cmp(&self, other: &u8) -> Option<Ordering> {
-        Some(if self == other {
-            Ordering::Equal
-        } else if *other > self.end {
-            Ordering::Greater
-        } else {
-            Ordering::Less
-        })
-    }
-}
-
-impl PartialOrd<ByteRange> for u8 {
-    #[inline]
-    fn partial_cmp(&self, other: &ByteRange) -> Option<Ordering> {
-        other.partial_cmp(self).map(|o| o.reverse())
-    }
-}
-
-/// This implementation of `Display` will write a regular expression from the
-/// syntax tree. It does not write the original string parsed.
-impl fmt::Display for Expr {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            Empty => write!(f, ""),
-            Literal { ref chars, casei } => {
-                if casei {
-                    try!(write!(f, "(?iu:"));
-                } else {
-                    try!(write!(f, "(?u:"));
-                }
-                for &c in chars {
-                    try!(write!(f, "{}", quote_char(c)));
-                }
-                try!(write!(f, ")"));
-                Ok(())
-            }
-            LiteralBytes { ref bytes, casei } => {
-                if casei {
-                    try!(write!(f, "(?i-u:"));
-                } else {
-                    try!(write!(f, "(?-u:"));
-                }
-                for &b in bytes {
-                    try!(write!(f, "{}", quote_byte(b)));
-                }
-                try!(write!(f, ")"));
-                Ok(())
-            }
-            AnyChar => write!(f, "(?su:.)"),
-            AnyCharNoNL => write!(f, "(?u:.)"),
-            AnyByte => write!(f, "(?s-u:.)"),
-            AnyByteNoNL => write!(f, "(?-u:.)"),
-            Class(ref cls) => write!(f, "{}", cls),
-            ClassBytes(ref cls) => write!(f, "{}", cls),
-            StartLine => write!(f, "(?m:^)"),
-            EndLine => write!(f, "(?m:$)"),
-            StartText => write!(f, r"^"),
-            EndText => write!(f, r"$"),
-            WordBoundary => write!(f, r"(?u:\b)"),
-            NotWordBoundary => write!(f, r"(?u:\B)"),
-            WordBoundaryAscii => write!(f, r"(?-u:\b)"),
-            NotWordBoundaryAscii => write!(f, r"(?-u:\B)"),
-            Group { ref e, i: None, name: None } => write!(f, "(?:{})", e),
-            Group { ref e, name: None, .. } => write!(f, "({})", e),
-            Group { ref e, name: Some(ref n), .. } => {
-                write!(f, "(?P<{}>{})", n, e)
-            }
-            Repeat { ref e, r, greedy } => {
-                match &**e {
-                    &Literal { ref chars, .. } if chars.len() > 1 => {
-                        try!(write!(f, "(?:{}){}", e, r))
-                    }
-                    _ => try!(write!(f, "{}{}", e, r)),
-                }
-                if !greedy { try!(write!(f, "?")); }
-                Ok(())
-            }
-            Concat(ref es) => {
-                for e in es {
-                    try!(write!(f, "{}", e));
-                }
-                Ok(())
-            }
-            Alternate(ref es) => {
-                for (i, e) in es.iter().enumerate() {
-                    if i > 0 { try!(write!(f, "|")); }
-                    try!(write!(f, "{}", e));
-                }
-                Ok(())
-            }
-        }
-    }
-}
-
-impl fmt::Display for Repeater {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match *self {
-            ZeroOrOne => write!(f, "?"),
-            ZeroOrMore => write!(f, "*"),
-            OneOrMore => write!(f, "+"),
-            Range { min: s, max: None } => write!(f, "{{{},}}", s),
-            Range { min: s, max: Some(e) } if s == e => write!(f, "{{{}}}", s),
-            Range { min: s, max: Some(e) } => write!(f, "{{{}, {}}}", s, e),
-        }
-    }
-}
-
-impl fmt::Display for CharClass {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        try!(write!(f, "(?u:["));
-        for range in self.iter() {
-            if range.start == '-' || range.end == '-' {
-                try!(write!(f, "-"));
-                break;
-            }
-        }
-        for range in self.iter() {
-            let mut range = *range;
-            if range.start == '-' {
-                range.start = ((range.start as u8) + 1) as char;
-            }
-            if range.end == '-' {
-                range.end = ((range.end as u8) - 1) as char;
-            }
-            if range.start > range.end {
-                continue;
-            }
-            try!(write!(f, "{}", range));
-        }
-        try!(write!(f, "])"));
-        Ok(())
-    }
-}
-
-impl fmt::Display for ClassRange {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}-{}", quote_char(self.start), quote_char(self.end))
-    }
-}
-
-impl fmt::Display for ByteClass {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        try!(write!(f, "(?-u:["));
-        for range in self.iter() {
-            if range.start == b'-' || range.end == b'-' {
-                try!(write!(f, "-"));
-                break;
-            }
-        }
-        for range in self.iter() {
-            let mut range = *range;
-            if range.start == b'-' {
-                range.start += 1;
-            }
-            if range.end == b'-' {
-                range.start -= 1;
-            }
-            if range.start > range.end {
-                continue;
-            }
-            try!(write!(f, "{}", range));
-        }
-        try!(write!(f, "])"));
-        Ok(())
-    }
-}
-
-impl fmt::Display for ByteRange {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}-{}", quote_byte(self.start), quote_byte(self.end))
-    }
-}
-
-/// An alias for computations that can return a `Error`.
-pub type Result<T> = ::std::result::Result<T, Error>;
-
-/// A parse error.
-///
-/// This includes details about the specific type of error and a rough
-/// approximation of where it occurred.
-#[derive(Clone, Debug, PartialEq)]
-pub struct Error {
-    pos: usize,
-    surround: String,
-    kind: ErrorKind,
-}
-
-/// The specific type of parse error that can occur.
-#[derive(Clone, Debug, PartialEq)]
-pub enum ErrorKind {
-    /// A negation symbol is used twice in flag settings.
-    /// e.g., `(?-i-s)`.
-    DoubleFlagNegation,
-    /// The same capture name was used more than once.
-    /// e.g., `(?P<a>.)(?P<a>.)`.
-    DuplicateCaptureName(String),
-    /// An alternate is empty. e.g., `(|a)`.
-    EmptyAlternate,
-    /// A capture group name is empty. e.g., `(?P<>a)`.
-    EmptyCaptureName,
-    /// A negation symbol was not proceded by any flags. e.g., `(?i-)`.
-    EmptyFlagNegation,
-    /// A group is empty. e.g., `()`.
-    EmptyGroup,
-    /// An invalid number was used in a counted repetition. e.g., `a{b}`.
-    InvalidBase10(String),
-    /// An invalid hexadecimal number was used in an escape sequence.
-    /// e.g., `\xAG`.
-    InvalidBase16(String),
-    /// An invalid capture name was used. e.g., `(?P<0a>b)`.
-    InvalidCaptureName(String),
-    /// An invalid class range was givien. Specifically, when the start of the
-    /// range is greater than the end. e.g., `[z-a]`.
-    InvalidClassRange {
-        /// The first character specified in the range.
-        start: char,
-        /// The second character specified in the range.
-        end: char,
-    },
-    /// An escape sequence was used in a character class where it is not
-    /// allowed. e.g., `[a-\pN]` or `[\A]`.
-    InvalidClassEscape(Expr),
-    /// An invalid counted repetition min/max was given. e.g., `a{2,1}`.
-    InvalidRepeatRange {
-        /// The first number specified in the repetition.
-        min: u32,
-        /// The second number specified in the repetition.
-        max: u32,
-    },
-    /// An invalid Unicode scalar value was used in a long hexadecimal
-    /// sequence. e.g., `\x{D800}`.
-    InvalidScalarValue(u32),
-    /// An empty counted repetition operator. e.g., `a{}`.
-    MissingBase10,
-    /// A repetition operator was not applied to an expression. e.g., `*`.
-    RepeaterExpectsExpr,
-    /// A repetition operator was applied to an expression that cannot be
-    /// repeated. e.g., `a+*` or `a|*`.
-    RepeaterUnexpectedExpr(Expr),
-    /// A capture group name that is never closed. e.g., `(?P<a`.
-    UnclosedCaptureName(String),
-    /// An unclosed hexadecimal literal. e.g., `\x{a`.
-    UnclosedHex,
-    /// An unclosed parenthesis. e.g., `(a`.
-    UnclosedParen,
-    /// An unclosed counted repetition operator. e.g., `a{2`.
-    UnclosedRepeat,
-    /// An unclosed named Unicode class. e.g., `\p{Yi`.
-    UnclosedUnicodeName,
-    /// Saw end of regex before class was closed. e.g., `[a`.
-    UnexpectedClassEof,
-    /// Saw end of regex before escape sequence was closed. e.g., `\`.
-    UnexpectedEscapeEof,
-    /// Saw end of regex before flags were closed. e.g., `(?i`.
-    UnexpectedFlagEof,
-    /// Saw end of regex before two hexadecimal digits were seen. e.g., `\xA`.
-    UnexpectedTwoDigitHexEof,
-    /// Unopened parenthesis. e.g., `)`.
-    UnopenedParen,
-    /// Unrecognized escape sequence. e.g., `\q`.
-    UnrecognizedEscape(char),
-    /// Unrecognized flag. e.g., `(?a)`.
-    UnrecognizedFlag(char),
-    /// Unrecognized named Unicode class. e.g., `\p{Foo}`.
-    UnrecognizedUnicodeClass(String),
-    /// Indicates that the regex uses too much nesting.
-    ///
-    /// (N.B. This error exists because traversing the Expr is recursive and
-    /// an explicit heap allocated stack is not (yet?) used. Regardless, some
-    /// sort of limit must be applied to avoid unbounded memory growth.
-    StackExhausted,
-    /// A disallowed flag was found (e.g., `u`).
-    FlagNotAllowed(char),
-    /// A Unicode class was used when the Unicode (`u`) flag was disabled.
-    UnicodeNotAllowed,
-    /// InvalidUtf8 indicates that the expression may match non-UTF-8 bytes.
-    /// This never returned if the parser is permitted to allow expressions
-    /// that match arbitrary bytes.
-    InvalidUtf8,
-    /// A character class was constructed such that it is empty.
-    /// e.g., `[^\d\D]`.
-    EmptyClass,
-    /// Indicates that unsupported notation was used in a character class.
-    ///
-    /// The char in this error corresponds to the illegal character.
-    ///
-    /// The intent of this error is to carve a path to support set notation
-    /// as described in UTS#18 RL1.3. We do this by rejecting regexes that
-    /// would use the notation.
-    ///
-    /// The work around for end users is to escape the character included in
-    /// this error message.
-    UnsupportedClassChar(char),
-    /// Hints that destructuring should not be exhaustive.
-    ///
-    /// This enum may grow additional variants, so this makes sure clients
-    /// don't count on exhaustive matching. (Otherwise, adding a new variant
-    /// could break existing code.)
-    #[doc(hidden)]
-    __Nonexhaustive,
-}
-
-impl Error {
-    /// Returns an approximate *character* offset at which the error occurred.
-    ///
-    /// The character offset may be equal to the number of characters in the
-    /// string, in which case it should be interpreted as pointing to the end
-    /// of the regex.
-    pub fn position(&self) -> usize {
-        self.pos
-    }
-
-    /// Returns the type of the regex parse error.
-    pub fn kind(&self) -> &ErrorKind {
-        &self.kind
-    }
-}
-
-impl ErrorKind {
-    fn description(&self) -> &str {
-        use ErrorKind::*;
-        match *self {
-            DoubleFlagNegation => "double flag negation",
-            DuplicateCaptureName(_) => "duplicate capture name",
-            EmptyAlternate => "empty alternate",
-            EmptyCaptureName => "empty capture name",
-            EmptyFlagNegation => "flag negation without any flags",
-            EmptyGroup => "empty group (e.g., '()')",
-            InvalidBase10(_) => "invalid base 10 number",
-            InvalidBase16(_) => "invalid base 16 number",
-            InvalidCaptureName(_) => "invalid capture name",
-            InvalidClassRange{..} => "invalid character class range",
-            InvalidClassEscape(_) => "invalid escape sequence in class",
-            InvalidRepeatRange{..} => "invalid counted repetition range",
-            InvalidScalarValue(_) => "invalid Unicode scalar value",
-            MissingBase10 => "missing count in repetition operator",
-            RepeaterExpectsExpr => "repetition operator missing expression",
-            RepeaterUnexpectedExpr(_) => "expression cannot be repeated",
-            UnclosedCaptureName(_) => "unclosed capture group name",
-            UnclosedHex => "unclosed hexadecimal literal",
-            UnclosedParen => "unclosed parenthesis",
-            UnclosedRepeat => "unclosed counted repetition operator",
-            UnclosedUnicodeName => "unclosed Unicode class literal",
-            UnexpectedClassEof => "unexpected EOF in character class",
-            UnexpectedEscapeEof => "unexpected EOF in escape sequence",
-            UnexpectedFlagEof => "unexpected EOF in flags",
-            UnexpectedTwoDigitHexEof => "unexpected EOF in hex literal",
-            UnopenedParen => "unopened parenthesis",
-            UnrecognizedEscape(_) => "unrecognized escape sequence",
-            UnrecognizedFlag(_) => "unrecognized flag",
-            UnrecognizedUnicodeClass(_) => "unrecognized Unicode class name",
-            StackExhausted => "stack exhausted, too much nesting",
-            FlagNotAllowed(_) => "flag not allowed",
-            UnicodeNotAllowed => "Unicode features not allowed",
-            InvalidUtf8 => "matching arbitrary bytes is not allowed",
-            EmptyClass => "empty character class",
-            UnsupportedClassChar(_) => "unsupported class notation",
-            __Nonexhaustive => unreachable!(),
-        }
-    }
-}
-
-impl ::std::error::Error for Error {
-    fn description(&self) -> &str {
-        self.kind.description()
-    }
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        if let ErrorKind::StackExhausted = self.kind {
-            write!(f, "Error parsing regex: {}", self.kind)
-        } else {
-            write!(
-                f, "Error parsing regex near '{}' at character offset {}: {}",
-                self.surround, self.pos, self.kind)
-        }
-    }
-}
-
-impl fmt::Display for ErrorKind {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        use ErrorKind::*;
-        match *self {
-            DoubleFlagNegation =>
-                write!(f, "Only one negation symbol is allowed in flags."),
-            DuplicateCaptureName(ref s) =>
-                write!(f, "Capture name '{}' is used more than once.", s),
-            EmptyAlternate =>
-                write!(f, "Alternations cannot be empty."),
-            EmptyCaptureName =>
-                write!(f, "Capture names cannot be empty."),
-            EmptyFlagNegation =>
-                write!(f, "Flag negation requires setting at least one flag."),
-            EmptyGroup =>
-                write!(f, "Empty regex groups (e.g., '()') are not allowed."),
-            InvalidBase10(ref s) =>
-                write!(f, "Not a valid base 10 number: '{}'", s),
-            InvalidBase16(ref s) =>
-                write!(f, "Not a valid base 16 number: '{}'", s),
-            InvalidCaptureName(ref s) =>
-                write!(f, "Invalid capture name: '{}'. Capture names must \
-                           consist of [_a-zA-Z0-9] and are not allowed to \
-                           start with with a number.", s),
-            InvalidClassRange { start, end } =>
-                write!(f, "Invalid character class range '{}-{}'. \
-                           Character class ranges must start with the smaller \
-                           character, but {} > {}", start, end, start, end),
-            InvalidClassEscape(ref e) =>
-                write!(f, "Invalid escape sequence in character \
-                           class: '{}'.", e),
-            InvalidRepeatRange { min, max } =>
-                write!(f, "Invalid counted repetition range: {{{}, {}}}. \
-                           Counted repetition ranges must start with the \
-                           minimum, but {} > {}", min, max, min, max),
-            InvalidScalarValue(c) =>
-                write!(f, "Number does not correspond to a Unicode scalar \
-                           value: '{}'.", c),
-            MissingBase10 =>
-                write!(f, "Missing maximum in counted
-repetition operator."),
-            RepeaterExpectsExpr =>
-                write!(f, "Missing expression for repetition operator."),
-            RepeaterUnexpectedExpr(ref e) =>
-                write!(f, "Invalid application of repetition operator to: \
-                          '{}'.", e),
-            UnclosedCaptureName(ref s) =>
-                write!(f, "Capture name group for '{}' is not closed. \
-                           (Missing a '>'.)", s),
-            UnclosedHex =>
-                write!(f, "Unclosed hexadecimal literal (missing a '}}')."),
-            UnclosedParen =>
-                write!(f, "Unclosed parenthesis."),
-            UnclosedRepeat =>
-                write!(f, "Unclosed counted repetition (missing a '}}')."),
-            UnclosedUnicodeName =>
-                write!(f, "Unclosed Unicode literal (missing a '}}')."),
-            UnexpectedClassEof =>
-                write!(f, "Character class was not closed before the end of \
-                           the regex (missing a ']')."),
-            UnexpectedEscapeEof =>
-                write!(f, "Started an escape sequence that didn't finish \
-                           before the end of the regex."),
-            UnexpectedFlagEof =>
-                write!(f, "Inline flag settings was not closed before the end \
-                           of the regex (missing a ')' or ':')."),
-            UnexpectedTwoDigitHexEof =>
-                write!(f, "Unexpected end of two digit hexadecimal literal."),
-            UnopenedParen =>
-                write!(f, "Unopened parenthesis."),
-            UnrecognizedEscape(c) =>
-                write!(f, "Unrecognized escape sequence: '\\{}'.", c),
-            UnrecognizedFlag(c) =>
-                write!(f, "Unrecognized flag: '{}'. \
-                           (Allowed flags: i, m, s, U, u, x.)", c),
-            UnrecognizedUnicodeClass(ref s) =>
-                write!(f, "Unrecognized Unicode class name: '{}'.", s),
-            StackExhausted =>
-                write!(f, "Exhausted space required to parse regex with too \
-                           much nesting."),
-            FlagNotAllowed(flag) =>
-                write!(f, "Use of the flag '{}' is not allowed.", flag),
-            UnicodeNotAllowed =>
-                write!(f, "Unicode features are not allowed when the Unicode \
-                           (u) flag is not set."),
-            InvalidUtf8 =>
-                write!(f, "Matching arbitrary bytes is not allowed."),
-            EmptyClass =>
-                write!(f, "Empty character classes are not allowed."),
-            UnsupportedClassChar(c) =>
-                write!(f, "Use of unescaped '{}' in character class is \
-                           not allowed.", c),
-            __Nonexhaustive => unreachable!(),
-        }
-    }
-}
-
-/// The result of binary search on the simple case folding table.
-///
-/// Note that this binary search is done on the "both" table, such that
-/// the index returned corresponds to the *first* location of `c1` in the
-/// table. The table can then be scanned linearly starting from the position
-/// returned to find other case mappings for `c1`.
-fn simple_case_fold_both_result(c1: char) -> result::Result<usize, usize> {
-    let table = &case_folding::C_plus_S_both_table;
-    let i = binary_search(table, |&(c2, _)| c1 <= c2);
-    if i >= table.len() || table[i].0 != c1 {
-        Err(i)
-    } else {
-        Ok(i)
-    }
-}
-
-/// Binary search to find first element such that `pred(T) == true`.
-///
-/// Assumes that if `pred(xs[i]) == true` then `pred(xs[i+1]) == true`.
-///
-/// If all elements yield `pred(T) == false`, then `xs.len()` is returned.
-fn binary_search<T, F>(xs: &[T], mut pred: F) -> usize
-        where F: FnMut(&T) -> bool {
-    let (mut left, mut right) = (0, xs.len());
-    while left < right {
-        let mid = (left + right) / 2;
-        if pred(&xs[mid]) {
-            right = mid;
-        } else {
-            left = mid + 1;
-        }
-    }
-    left
-}
+mod unicode_tables;
 
 /// Escapes all regular expression meta characters in `text`.
 ///
 /// The string returned may be safely used as a literal in a regular
 /// expression.
 pub fn escape(text: &str) -> String {
     let mut quoted = String::with_capacity(text.len());
-    for c in text.chars() {
-        if parser::is_punct(c) {
-            quoted.push('\\');
-        }
-        quoted.push(c);
-    }
+    escape_into(text, &mut quoted);
     quoted
 }
 
-fn quote_char(c: char) -> String {
-    let mut s = String::new();
-    if parser::is_punct(c) {
-        s.push('\\');
-    }
-    s.push(c);
-    s
-}
-
-fn quote_byte(b: u8) -> String {
-    if parser::is_punct(b as char) || b == b'\'' || b == b'"' {
-        quote_char(b as char)
-    } else {
-        let escaped: Vec<u8> = ascii::escape_default(b).collect();
-        String::from_utf8(escaped).unwrap()
+/// Escapes all meta characters in `text` and writes the result into `buf`.
+///
+/// This will append escape characters into the given buffer. The characters
+/// that are appended are safe to use as a literal in a regular expression.
+pub fn escape_into(text: &str, buf: &mut String) {
+    for c in text.chars() {
+        if is_meta_character(c) {
+            buf.push('\\');
+        }
+        buf.push(c);
     }
 }
 
-fn inc_char(c: char) -> char {
+/// Returns true if the give character has significance in a regex.
+///
+/// These are the only characters that are allowed to be escaped, with one
+/// exception: an ASCII space character may be escaped when extended mode (with
+/// the `x` flag) is enabld. In particular, `is_meta_character(' ')` returns
+/// `false`.
+///
+/// Note that the set of characters for which this function returns `true` or
+/// `false` is fixed and won't change in a semver compatible release.
+pub fn is_meta_character(c: char) -> bool {
     match c {
-        char::MAX => char::MAX,
-        '\u{D7FF}' => '\u{E000}',
-        c => char::from_u32(c as u32 + 1).unwrap(),
+        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
+        '[' | ']' | '{' | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true,
+        _ => false,
     }
 }
 
-fn dec_char(c: char) -> char {
-    match c {
-        '\x00' => '\x00',
-        '\u{E000}' => '\u{D7FF}',
-        c => char::from_u32(c as u32 - 1).unwrap(),
-    }
-}
+/// Returns true if and only if the given character is a Unicode word
+/// character.
+///
+/// A Unicode word character is defined by
+/// [UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
+/// In particular, a character
+/// is considered a word character if it is in either of the `Alphabetic` or
+/// `Join_Control` properties, or is in one of the `Decimal_Number`, `Mark`
+/// or `Connector_Punctuation` general categories.
+pub fn is_word_character(c: char) -> bool {
+    use std::cmp::Ordering;
+    use unicode_tables::perl_word::PERL_WORD;
 
-/// Returns true if and only if `c` is a word character.
-#[doc(hidden)]
-pub fn is_word_char(c: char) -> bool {
-    match c {
-        '_' | '0' ... '9' | 'a' ... 'z' | 'A' ... 'Z'  => true,
-        _ => ::unicode::regex::PERLW.binary_search_by(|&(start, end)| {
-            if c >= start && c <= end {
+    if c <= 0x7F as char && is_word_byte(c as u8) {
+        return true;
+    }
+    PERL_WORD
+        .binary_search_by(|&(start, end)| {
+            if start <= c && c <= end {
                 Ordering::Equal
             } else if start > c {
                 Ordering::Greater
             } else {
                 Ordering::Less
             }
-        }).is_ok(),
-    }
+        }).is_ok()
 }
 
-/// Returns true if and only if `c` is an ASCII word byte.
-#[doc(hidden)]
-pub fn is_word_byte(b: u8) -> bool {
-    match b {
+/// Returns true if and only if the given character is an ASCII word character.
+///
+/// An ASCII word character is defined by the following character class:
+/// `[_0-9a-zA-Z]'.
+pub fn is_word_byte(c: u8) -> bool {
+    match c {
         b'_' | b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z'  => true,
         _ => false,
     }
 }
 
 #[cfg(test)]
-mod properties;
-
-#[cfg(test)]
 mod tests {
-    use {CharClass, ClassRange, ByteClass, ByteRange, Expr};
-
-    fn class(ranges: &[(char, char)]) -> CharClass {
-        let ranges = ranges.iter().cloned()
-                           .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
-        CharClass::new(ranges)
-    }
-
-    fn bclass(ranges: &[(u8, u8)]) -> ByteClass {
-        let ranges = ranges.iter().cloned()
-                           .map(|(c1, c2)| ByteRange::new(c1, c2)).collect();
-        ByteClass::new(ranges)
-    }
-
-    fn e(re: &str) -> Expr { Expr::parse(re).unwrap() }
-
-    #[test]
-    fn stack_exhaustion() {
-        use std::iter::repeat;
-
-        let open: String = repeat('(').take(200).collect();
-        let close: String = repeat(')').take(200).collect();
-        assert!(Expr::parse(&format!("{}a{}", open, close)).is_ok());
-
-        let open: String = repeat('(').take(200 + 1).collect();
-        let close: String = repeat(')').take(200 + 1).collect();
-        assert!(Expr::parse(&format!("{}a{}", open, close)).is_err());
-    }
-
-    #[test]
-    fn anchored_start() {
-        assert!(e("^a").is_anchored_start());
-        assert!(e("(^a)").is_anchored_start());
-        assert!(e("^a|^b").is_anchored_start());
-        assert!(e("(^a)|(^b)").is_anchored_start());
-        assert!(e("(^(a|b))").is_anchored_start());
-
-        assert!(!e("^a|b").is_anchored_start());
-        assert!(!e("a|^b").is_anchored_start());
-    }
-
-    #[test]
-    fn anchored_end() {
-        assert!(e("a$").is_anchored_end());
-        assert!(e("(a$)").is_anchored_end());
-        assert!(e("a$|b$").is_anchored_end());
-        assert!(e("(a$)|(b$)").is_anchored_end());
-        assert!(e("((a|b)$)").is_anchored_end());
-
-        assert!(!e("a$|b").is_anchored_end());
-        assert!(!e("a|b$").is_anchored_end());
-    }
-
-    #[test]
-    fn class_canon_no_change() {
-        let cls = class(&[('a', 'c'), ('x', 'z')]);
-        assert_eq!(cls.clone().canonicalize(), cls);
-    }
-
-    #[test]
-    fn class_canon_unordered() {
-        let cls = class(&[('x', 'z'), ('a', 'c')]);
-        assert_eq!(cls.canonicalize(), class(&[
-            ('a', 'c'), ('x', 'z'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_overlap() {
-        let cls = class(&[('x', 'z'), ('w', 'y')]);
-        assert_eq!(cls.canonicalize(), class(&[
-            ('w', 'z'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_overlap_many() {
-        let cls = class(&[
-            ('c', 'f'), ('a', 'g'), ('d', 'j'), ('a', 'c'),
-            ('m', 'p'), ('l', 's'),
-        ]);
-        assert_eq!(cls.clone().canonicalize(), class(&[
-            ('a', 'j'), ('l', 's'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_overlap_boundary() {
-        let cls = class(&[('x', 'z'), ('u', 'w')]);
-        assert_eq!(cls.canonicalize(), class(&[
-            ('u', 'z'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_extreme_edge_case() {
-        let cls = class(&[('\x00', '\u{10FFFF}'), ('\x00', '\u{10FFFF}')]);
-        assert_eq!(cls.canonicalize(), class(&[
-            ('\x00', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_canon_singles() {
-        let cls = class(&[('a', 'a'), ('b', 'b')]);
-        assert_eq!(cls.canonicalize(), class(&[('a', 'b')]));
-    }
+    use super::*;
 
     #[test]
-    fn class_negate_single() {
-        let cls = class(&[('a', 'a')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_singles() {
-        let cls = class(&[('a', 'a'), ('b', 'b')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\x60'), ('\x63', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_multiples() {
-        let cls = class(&[('a', 'c'), ('x', 'z')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\x60'), ('\x64', '\x77'), ('\x7b', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_min_scalar() {
-        let cls = class(&[('\x00', 'a')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x62', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_max_scalar() {
-        let cls = class(&[('a', '\u{10FFFF}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\x60'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_everything() {
-        let cls = class(&[('\x00', '\u{10FFFF}')]);
-        assert_eq!(cls.negate(), class(&[]));
-    }
-
-    #[test]
-    fn class_negate_everything_sans_one() {
-        let cls = class(&[
-            ('\x00', '\u{10FFFD}'), ('\u{10FFFF}', '\u{10FFFF}')
-        ]);
-        assert_eq!(cls.negate(), class(&[
-            ('\u{10FFFE}', '\u{10FFFE}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_surrogates_min() {
-        let cls = class(&[('\x00', '\u{D7FF}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\u{E000}', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_surrogates_min_edge() {
-        let cls = class(&[('\x00', '\u{D7FE}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\u{D7FF}', '\u{10FFFF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_surrogates_max() {
-        let cls = class(&[('\u{E000}', '\u{10FFFF}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\u{D7FF}'),
-        ]));
-    }
-
-    #[test]
-    fn class_negate_surrogates_max_edge() {
-        let cls = class(&[('\u{E001}', '\u{10FFFF}')]);
-        assert_eq!(cls.negate(), class(&[
-            ('\x00', '\u{E000}'),
-        ]));
-    }
-
-    #[test]
-    fn class_intersection_empty() {
-        let cls1 = class(&[]);
-        let cls2 = class(&[('a', 'a')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_single_equal() {
-        let cls1 = class(&[('a', 'a')]);
-        let cls2 = class(&[('a', 'a')]);
-        assert_intersection(cls1, cls2, class(&[('a', 'a')]));
-    }
-
-    #[test]
-    fn class_intersection_single_unequal() {
-        let cls1 = class(&[('a', 'a')]);
-        let cls2 = class(&[('b', 'b')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_single_in_other() {
-        let cls1 = class(&[('a', 'a')]);
-        let cls2 = class(&[('a', 'c')]);
-        assert_intersection(cls1, cls2, class(&[('a', 'a')]));
+    fn escape_meta() {
+        assert_eq!(
+            escape(r"\.+*?()|[]{}^$#&-~"),
+            r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~".to_string());
     }
 
     #[test]
-    fn class_intersection_range_in_other() {
-        let cls1 = class(&[('a', 'b')]);
-        let cls2 = class(&[('a', 'c')]);
-        assert_intersection(cls1, cls2, class(&[('a', 'b')]));
-    }
-
-    #[test]
-    fn class_intersection_range_intersection() {
-        let cls1 = class(&[('a', 'b')]);
-        let cls2 = class(&[('b', 'c')]);
-        assert_intersection(cls1, cls2, class(&[('b', 'b')]));
-    }
-
-    #[test]
-    fn class_intersection_only_adjacent() {
-        let cls1 = class(&[('a', 'b')]);
-        let cls2 = class(&[('c', 'd')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_range_subset() {
-        let cls1 = class(&[('b', 'c')]);
-        let cls2 = class(&[('a', 'd')]);
-        assert_intersection(cls1, cls2, class(&[('b', 'c')]));
-    }
-
-    #[test]
-    fn class_intersection_many_ranges_in_one_big() {
-        let cls1 = class(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
-        let cls2 = class(&[('a', 'h')]);
-        assert_intersection(cls1, cls2, class(&[
-            ('a', 'b'), ('d', 'e'), ('g', 'h')
-        ]));
-    }
-
-    #[test]
-    fn class_intersection_many_ranges_same() {
-        let cls1 = class(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
-        let cls2 = class(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
-        assert_intersection(cls1, cls2, class(&[
-            ('a', 'b'), ('d', 'e'), ('g', 'h')
-        ]));
-    }
-
-    #[test]
-    fn class_intersection_multiple_non_intersecting() {
-        let cls1 = class(&[('a', 'b'), ('g', 'h')]);
-        let cls2 = class(&[('d', 'e'), ('k', 'l')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_non_intersecting_then_intersecting() {
-        let cls1 = class(&[('a', 'b'), ('d', 'e'), ('g', 'h')]);
-        let cls2 = class(&[('h', 'h')]);
-        assert_intersection(cls1, cls2, class(&[('h', 'h')]));
-    }
-
-    #[test]
-    fn class_intersection_adjacent_alternating() {
-        let cls1 = class(&[('a', 'b'), ('e', 'f'), ('i', 'j')]);
-        let cls2 = class(&[('c', 'd'), ('g', 'h'), ('k', 'l')]);
-        assert_intersection(cls1, cls2, class(&[]));
-    }
-
-    #[test]
-    fn class_intersection_overlapping_alternating() {
-        let cls1 = class(&[('a', 'b'), ('c', 'd'), ('e', 'f')]);
-        let cls2 = class(&[('b', 'c'), ('d', 'e'), ('f', 'g')]);
-        assert_intersection(cls1, cls2, class(&[('b', 'f')]));
-    }
-
-    #[test]
-    fn class_canon_overlap_many_case_fold() {
-        let cls = class(&[
-            ('C', 'F'), ('A', 'G'), ('D', 'J'), ('A', 'C'),
-            ('M', 'P'), ('L', 'S'), ('c', 'f'),
-        ]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('A', 'J'), ('L', 'S'),
-            ('a', 'j'), ('l', 's'),
-            ('\u{17F}', '\u{17F}'),
-        ]));
-
-        let cls = bclass(&[
-            (b'C', b'F'), (b'A', b'G'), (b'D', b'J'), (b'A', b'C'),
-            (b'M', b'P'), (b'L', b'S'), (b'c', b'f'),
-        ]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'A', b'J'), (b'L', b'S'),
-            (b'a', b'j'), (b'l', b's'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_az() {
-        let cls = class(&[('A', 'Z')]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('A', 'Z'), ('a', 'z'),
-            ('\u{17F}', '\u{17F}'),
-            ('\u{212A}', '\u{212A}'),
-        ]));
-        let cls = class(&[('a', 'z')]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('A', 'Z'), ('a', 'z'),
-            ('\u{17F}', '\u{17F}'),
-            ('\u{212A}', '\u{212A}'),
-        ]));
+    fn word() {
+        assert!(is_word_byte(b'a'));
+        assert!(!is_word_byte(b'-'));
 
-        let cls = bclass(&[(b'A', b'Z')]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'A', b'Z'), (b'a', b'z'),
-        ]));
-        let cls = bclass(&[(b'a', b'z')]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'A', b'Z'), (b'a', b'z'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_a_underscore() {
-        let cls = class(&[('A', 'A'), ('_', '_')]);
-        assert_eq!(cls.clone().canonicalize(), class(&[
-            ('A', 'A'), ('_', '_'),
-        ]));
-        assert_eq!(cls.case_fold(), class(&[
-            ('A', 'A'), ('_', '_'), ('a', 'a'),
-        ]));
-
-        let cls = bclass(&[(b'A', b'A'), (b'_', b'_')]);
-        assert_eq!(cls.clone().canonicalize(), bclass(&[
-            (b'A', b'A'), (b'_', b'_'),
-        ]));
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'A', b'A'), (b'_', b'_'), (b'a', b'a'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_a_equals() {
-        let cls = class(&[('A', 'A'), ('=', '=')]);
-        assert_eq!(cls.clone().canonicalize(), class(&[
-            ('=', '='), ('A', 'A'),
-        ]));
-        assert_eq!(cls.case_fold(), class(&[
-            ('=', '='), ('A', 'A'), ('a', 'a'),
-        ]));
-
-        let cls = bclass(&[(b'A', b'A'), (b'=', b'=')]);
-        assert_eq!(cls.clone().canonicalize(), bclass(&[
-            (b'=', b'='), (b'A', b'A'),
-        ]));
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'=', b'='), (b'A', b'A'), (b'a', b'a'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_no_folding_needed() {
-        let cls = class(&[('\x00', '\x10')]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('\x00', '\x10'),
-        ]));
-
-        let cls = bclass(&[(b'\x00', b'\x10')]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'\x00', b'\x10'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_negated() {
-        let cls = class(&[('x', 'x')]);
-        assert_eq!(cls.clone().case_fold(), class(&[
-            ('X', 'X'), ('x', 'x'),
-        ]));
-        assert_eq!(cls.case_fold().negate(), class(&[
-            ('\x00', 'W'), ('Y', 'w'), ('y', '\u{10FFFF}'),
-        ]));
-
-        let cls = bclass(&[(b'x', b'x')]);
-        assert_eq!(cls.clone().case_fold(), bclass(&[
-            (b'X', b'X'), (b'x', b'x'),
-        ]));
-        assert_eq!(cls.case_fold().negate(), bclass(&[
-            (b'\x00', b'W'), (b'Y', b'w'), (b'y', b'\xff'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_single_to_multiple() {
-        let cls = class(&[('k', 'k')]);
-        assert_eq!(cls.case_fold(), class(&[
-            ('K', 'K'), ('k', 'k'), ('\u{212A}', '\u{212A}'),
-        ]));
-
-        let cls = bclass(&[(b'k', b'k')]);
-        assert_eq!(cls.case_fold(), bclass(&[
-            (b'K', b'K'), (b'k', b'k'),
-        ]));
-    }
-
-    #[test]
-    fn class_fold_at() {
-        let cls = class(&[('@', '@')]);
-        assert_eq!(cls.clone().canonicalize(), class(&[('@', '@')]));
-        assert_eq!(cls.case_fold(), class(&[('@', '@')]));
-
-        let cls = bclass(&[(b'@', b'@')]);
-        assert_eq!(cls.clone().canonicalize(), bclass(&[(b'@', b'@')]));
-        assert_eq!(cls.case_fold(), bclass(&[(b'@', b'@')]));
-    }
-
-    #[test]
-    fn roundtrip_class_hypen() {
-        let expr = e("[-./]");
-        assert_eq!("(?u:[-\\.-/])", expr.to_string());
-
-        let expr = e("(?-u)[-./]");
-        assert_eq!("(?-u:[-\\.-/])", expr.to_string());
-    }
-
-    fn assert_intersection(cls1: CharClass, cls2: CharClass, expected: CharClass) {
-        // intersection operation should be commutative
-        assert_eq!(cls1.intersection(&cls2), expected);
-        assert_eq!(cls2.intersection(&cls1), expected);
+        assert!(is_word_character('a'));
+        assert!(is_word_character('β'));
+        assert!(!is_word_character('-'));
+        assert!(!is_word_character('☃'));
     }
 }
--- a/third_party/rust/regex-syntax/src/parser.rs
+++ b/third_party/rust/regex-syntax/src/parser.rs
@@ -1,3311 +1,206 @@
-// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use std::cmp::{max, min};
-use std::u8;
-
-use unicode::regex::UNICODE_CLASSES;
+use ast;
+use hir;
 
-use {
-    Expr, Repeater, CharClass, ClassRange,
-    CaptureIndex, CaptureName,
-    Error, ErrorKind, Result,
-};
-
-/// Parser state.
-///
-/// Keeps the entire input in memory and maintains a cursor (char offset).
-///
-/// It also keeps an expression stack, which is responsible for managing
-/// grouped expressions and flag state.
-#[derive(Debug)]
-pub struct Parser {
-    chars: Vec<char>,
-    chari: usize,
-    stack: Vec<Build>,
-    caps: usize,
-    names: Vec<String>, // to check for duplicates
-    flags: Flags,
-}
+use Result;
 
-/// Flag state used in the parser.
-#[derive(Clone, Copy, Debug)]
-pub struct Flags {
-    /// i
-    pub casei: bool,
-    /// m
-    pub multi: bool,
-    /// s
-    pub dotnl: bool,
-    /// U
-    pub swap_greed: bool,
-    /// x
-    pub ignore_space: bool,
-    /// u
-    pub unicode: bool,
-    /// Not actually a flag, but when disabled, every regex that may not match
-    /// UTF-8 exclusively will cause the parser to return an error.
-    pub allow_bytes: bool,
-}
-
-impl Default for Flags {
-    fn default() -> Self {
-        Flags {
-            casei: false,
-            multi: false,
-            dotnl: false,
-            swap_greed: false,
-            ignore_space: false,
-            unicode: true,
-            allow_bytes: false,
-        }
-    }
-}
-
-/// An ephemeral type for representing the expression stack.
+/// A builder for a regular expression parser.
+///
+/// This builder permits modifying configuration options for the parser.
 ///
-/// Everything on the stack is either a regular expression or a marker
-/// indicating the opening of a group (possibly non-capturing). The opening
-/// of a group copies the current flag state, which is reset on the parser
-/// state once the group closes.
-#[derive(Debug)]
-enum Build {
-    Expr(Expr),
-    LeftParen {
-        i: CaptureIndex,
-        name: CaptureName,
-        chari: usize,
-        old_flags: Flags,
-    },
-}
-
-/// A type for representing the elements of a bracket stack used for parsing
-/// character classes.
-///
-/// This is for parsing nested character classes without recursion.
-#[derive(Debug)]
-enum Bracket {
-    /// The opening of a character class (possibly negated)
-    LeftBracket {
-        negated: bool,
-    },
-    /// A set of characters within a character class, e.g., `a-z`
-    Set(CharClass),
-    /// An intersection operator (`&&`)
-    Intersection,
+/// This type combines the builder options for both the
+/// [AST `ParserBuilder`](ast/parse/struct.ParserBuilder.html)
+/// and the
+/// [HIR `TranslatorBuilder`](hir/translate/struct.TranslatorBuilder.html).
+#[derive(Clone, Debug, Default)]
+pub struct ParserBuilder {
+    ast: ast::parse::ParserBuilder,
+    hir: hir::translate::TranslatorBuilder,
 }
 
-// Primary expression parsing routines.
-impl Parser {
-    pub fn parse(s: &str, flags: Flags) -> Result<Expr> {
-        Parser {
-            chars: s.chars().collect(),
-            chari: 0,
-            stack: vec![],
-            caps: 0,
-            names: vec![],
-            flags: flags,
-        }.parse_expr()
-    }
-
-    // Top-level expression parser.
-    //
-    // Starts at the beginning of the input and consumes until either the end
-    // of input or an error.
-    fn parse_expr(mut self) -> Result<Expr> {
-        loop {
-            self.ignore_space();
-            if self.eof() {
-                break;
-            }
-            let build_expr = match self.cur() {
-                '\\' => try!(self.parse_escape()),
-                '|' => { let e = try!(self.alternate()); self.bump(); e }
-                '?' => try!(self.parse_simple_repeat(Repeater::ZeroOrOne)),
-                '*' => try!(self.parse_simple_repeat(Repeater::ZeroOrMore)),
-                '+' => try!(self.parse_simple_repeat(Repeater::OneOrMore)),
-                '{' => try!(self.parse_counted_repeat()),
-                '[' => try!(self.parse_class()),
-                '^' => {
-                    if self.flags.multi {
-                        self.parse_one(Expr::StartLine)
-                    } else {
-                        self.parse_one(Expr::StartText)
-                    }
-                }
-                '$' => {
-                    if self.flags.multi {
-                        self.parse_one(Expr::EndLine)
-                    } else {
-                        self.parse_one(Expr::EndText)
-                    }
-                }
-                '.' => {
-                    if self.flags.dotnl {
-                        if self.flags.unicode {
-                            self.parse_one(Expr::AnyChar)
-                        } else {
-                            if !self.flags.allow_bytes {
-                                return Err(self.err(ErrorKind::InvalidUtf8));
-                            }
-                            self.parse_one(Expr::AnyByte)
-                        }
-                    } else {
-                        if self.flags.unicode {
-                            self.parse_one(Expr::AnyCharNoNL)
-                        } else {
-                            if !self.flags.allow_bytes {
-                                return Err(self.err(ErrorKind::InvalidUtf8));
-                            }
-                            self.parse_one(Expr::AnyByteNoNL)
-                        }
-                    }
-                }
-                '(' => try!(self.parse_group()),
-                ')' => {
-                    let (old_flags, e) = try!(self.close_paren());
-                    self.bump();
-                    self.flags = old_flags;
-                    e
-                }
-                _ => {
-                    let c = self.bump();
-                    try!(self.lit(c))
-                }
-            };
-            if !build_expr.is_empty() {
-                self.stack.push(build_expr);
-            }
-        }
-        self.finish_concat()
-    }
-
-    // Parses an escape sequence, e.g., \Ax
-    //
-    // Start: `\`
-    // End:   `x`
-    fn parse_escape(&mut self) -> Result<Build> {
-        self.bump();
-        if self.eof() {
-            return Err(self.err(ErrorKind::UnexpectedEscapeEof));
-        }
-        let c = self.cur();
-        if is_punct(c) || (self.flags.ignore_space && c.is_whitespace()) {
-            let c = self.bump();
-            return Ok(try!(self.lit(c)));
-        }
-        match c {
-            'a' => { self.bump(); Ok(try!(self.lit('\x07'))) }
-            'f' => { self.bump(); Ok(try!(self.lit('\x0C'))) }
-            't' => { self.bump(); Ok(try!(self.lit('\t'))) }
-            'n' => { self.bump(); Ok(try!(self.lit('\n'))) }
-            'r' => { self.bump(); Ok(try!(self.lit('\r'))) }
-            'v' => { self.bump(); Ok(try!(self.lit('\x0B'))) }
-            'A' => { self.bump(); Ok(Build::Expr(Expr::StartText)) }
-            'z' => { self.bump(); Ok(Build::Expr(Expr::EndText)) }
-            'b' => {
-                self.bump();
-                Ok(Build::Expr(if self.flags.unicode {
-                    Expr::WordBoundary
-                } else {
-                    Expr::WordBoundaryAscii
-                }))
-            }
-            'B' => {
-                self.bump();
-                Ok(Build::Expr(if self.flags.unicode {
-                    Expr::NotWordBoundary
-                } else {
-                    Expr::NotWordBoundaryAscii
-                }))
-            }
-            '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7' => self.parse_octal(),
-            'x' => { self.bump(); self.parse_hex() }
-            'p'|'P' => {
-                self.bump();
-                self.parse_unicode_class(c == 'P')
-                    .map(|cls| Build::Expr(Expr::Class(cls)))
-            }
-            'd'|'s'|'w'|'D'|'S'|'W' => {
-                self.bump();
-                Ok(Build::Expr(Expr::Class(self.parse_perl_class(c))))
-            }
-            c => Err(self.err(ErrorKind::UnrecognizedEscape(c))),
-        }
-    }
-
-    // Parses a group, e.g., `(abc)`.
-    //
-    // Start: `(`
-    // End:   `a`
-    //
-    // A more interesting example, `(?P<foo>abc)`.
-    //
-    // Start: `(`
-    // End:   `a`
-    fn parse_group(&mut self) -> Result<Build> {
-        let chari = self.chari;
-        let mut name: CaptureName = None;
-        self.bump();
-        self.ignore_space();
-        if self.bump_if("?P<") {
-            let n = try!(self.parse_group_name());
-            if self.names.iter().any(|n2| n2 == &n) {
-                return Err(self.err(ErrorKind::DuplicateCaptureName(n)));
-            }
-            self.names.push(n.clone());
-            name = Some(n);
-        } else if self.bump_if("?") {
-            // This can never be capturing. It's either setting flags for
-            // the current group, or it's opening a non-capturing group or
-            // it's opening a group with a specific set of flags (which is
-            // also non-capturing).
-            // Anything else is an error.
-            return self.parse_group_flags(chari);
-        }
-        self.caps = checkadd(self.caps, 1);
-        Ok(Build::LeftParen {
-            i: Some(self.caps),
-            name: name,
-            chari: chari,
-            old_flags: self.flags, // no flags changed if we're here
-        })
-    }
-
-    // Parses flags (inline or grouped), e.g., `(?s-i:abc)`.
-    //
-    // Start: `s`
-    // End:   `a`
-    //
-    // Another example, `(?s-i)a`.
-    //
-    // Start: `s`
-    // End:   `a`
-    fn parse_group_flags(&mut self, opening_chari: usize) -> Result<Build> {
-        let old_flags = self.flags;
-        let mut sign = true;
-        let mut saw_flag = false;
-        loop {
-            if self.eof() {
-                // e.g., (?i
-                return Err(self.err(ErrorKind::UnexpectedFlagEof));
-            }
-            match self.cur() {
-                'i' => { self.flags.casei = sign; saw_flag = true }
-                'm' => { self.flags.multi = sign; saw_flag = true }
-                's' => { self.flags.dotnl = sign; saw_flag = true }
-                'U' => { self.flags.swap_greed = sign; saw_flag = true }
-                'x' => { self.flags.ignore_space = sign; saw_flag = true }
-                'u' => { self.flags.unicode = sign; saw_flag = true }
-                '-' => {
-                    if !sign {
-                        // e.g., (?-i-s)
-                        return Err(self.err(ErrorKind::DoubleFlagNegation));
-                    }
-                    sign = false;
-                    saw_flag = false;
-                }
-                ')' => {
-                    if !saw_flag {
-                        // e.g., (?)
-                        return Err(self.err(ErrorKind::EmptyFlagNegation));
-                    }
-                    // At this point, we're just changing the flags inside
-                    // the current group, which means the old flags have
-                    // been saved elsewhere. Our modifications in place are
-                    // okey dokey!
-                    //
-                    // This particular flag expression only has a stateful
-                    // impact on a regex's AST, so nothing gets explicitly
-                    // added.
-                    self.bump();
-                    return Ok(Build::Expr(Expr::Empty));
-                }
-                ':' => {
-                    if !sign && !saw_flag {
-                        // e.g., (?i-:a)
-                        // Note that if there's no negation, it's OK not
-                        // to see flag, because you end up with a regular
-                        // non-capturing group: `(?:a)`.
-                        return Err(self.err(ErrorKind::EmptyFlagNegation));
-                    }
-                    self.bump();
-                    return Ok(Build::LeftParen {
-                        i: None,
-                        name: None,
-                        chari: opening_chari,
-                        old_flags: old_flags,
-                    });
-                }
-                // e.g., (?z:a)
-                c => return Err(self.err(ErrorKind::UnrecognizedFlag(c))),
-            }
-            self.bump();
-        }
-    }
-
-    // Parses a group name, e.g., `foo` in `(?P<foo>abc)`.
-    //
-    // Start: `f`
-    // End:   `a`
-    fn parse_group_name(&mut self) -> Result<String> {
-        let mut name = String::new();
-        while !self.eof() && !self.peek_is('>') {
-            name.push(self.bump());
-        }
-        if self.eof() {
-            // e.g., (?P<a
-            return Err(self.err(ErrorKind::UnclosedCaptureName(name)));
-        }
-        let all_valid = name.chars().all(is_valid_capture_char);
-        match name.chars().next() {
-            // e.g., (?P<>a)
-            None => Err(self.err(ErrorKind::EmptyCaptureName)),
-            Some(c) if (c >= '0' && c <= '9') || !all_valid => {
-                // e.g., (?P<a#>x)
-                // e.g., (?P<1a>x)
-                Err(self.err(ErrorKind::InvalidCaptureName(name)))
-            }
-            _ => {
-                self.bump(); // for `>`
-                Ok(name)
-            }
-        }
-    }
-
-    // Parses a counted repeition operator, e.g., `a{2,4}?z`.
-    //
-    // Start: `{`
-    // End:   `z`
-    fn parse_counted_repeat(&mut self) -> Result<Build> {
-        let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr)); // e.g., ({5}
-        if !e.can_repeat() {
-            // e.g., a*{5}
-            return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
-        }
-        self.bump();
-        self.ignore_space();
-        let min = try!(self.parse_decimal());
-        let mut max_opt = Some(min);
-        self.ignore_space();
-        if self.bump_if(',') {
-            self.ignore_space();
-            if self.peek_is('}') {
-                max_opt = None;
-            } else {
-                let max = try!(self.parse_decimal());
-                if min > max {
-                    // e.g., a{2,1}
-                    return Err(self.err(ErrorKind::InvalidRepeatRange {
-                        min: min,
-                        max: max,
-                    }));
-                }
-                max_opt = Some(max);
-            }
-        }
-        self.ignore_space();
-        if !self.bump_if('}') {
-            Err(self.err(ErrorKind::UnclosedRepeat))
-        } else {
-            Ok(Build::Expr(Expr::Repeat {
-                e: Box::new(e),
-                r: Repeater::Range { min: min, max: max_opt },
-                greedy: !self.bump_if('?') ^ self.flags.swap_greed,
-            }))
-        }
-    }
-
-    // Parses a simple repetition operator, e.g., `a+?z`.
-    //
-    // Start: `+`
-    // End:   `z`
-    //
-    // N.B. "simple" in this context means "not min/max repetition",
-    // e.g., `a{1,2}`.
-    fn parse_simple_repeat(&mut self, rep: Repeater) -> Result<Build> {
-        let e = try!(self.pop(ErrorKind::RepeaterExpectsExpr)); // e.g., (*
-        if !e.can_repeat() {
-            // e.g., a**
-            return Err(self.err(ErrorKind::RepeaterUnexpectedExpr(e)));
-        }
-        self.bump();
-        Ok(Build::Expr(Expr::Repeat {
-            e: Box::new(e),
-            r: rep,
-            greedy: !self.bump_if('?') ^ self.flags.swap_greed,
-        }))
-    }
-
-    // Parses a decimal number until the given character, e.g., `a{123,456}`.
-    //
-    // Start: `1`
-    // End:   `,` (where `until == ','`)
-    fn parse_decimal(&mut self) -> Result<u32> {
-        match self.bump_get(|c| is_ascii_word(c) || c.is_whitespace()) {
-            // e.g., a{}
-            None => Err(self.err(ErrorKind::MissingBase10)),
-            Some(n) => {
-                // e.g., a{xyz
-                // e.g., a{9999999999}
-                let n = n.trim();
-                u32::from_str_radix(n, 10)
-                    .map_err(|_| self.err(ErrorKind::InvalidBase10(n.into())))
-            }
-        }
-    }
-
-    // Parses an octal number, up to 3 digits, e.g., `a\123b`
-    //
-    // Start: `1`
-    // End:   `b`
-    fn parse_octal(&mut self) -> Result<Build> {
-        use std::char;
-        let mut i = 0; // counter for limiting octal to 3 digits.
-        let n = self.bump_get(|c| { i += 1; i <= 3 && c >= '0' && c <= '7' })
-                    .expect("octal string"); // guaranteed at least 1 digit
-        // I think both of the following unwraps are impossible to fail.
-        // We limit it to a three digit octal number, which maxes out at
-        // `0777` or `511` in decimal. Since all digits are in `0...7`, we'll
-        // always have a valid `u32` number. Moreover, since all numbers in
-        // the range `0...511` are valid Unicode scalar values, it will always
-        // be a valid `char`.
-        //
-        // Hence, we `unwrap` with reckless abandon.
-        let n = u32::from_str_radix(&n, 8).ok().expect("valid octal number");
-        if !self.flags.unicode {
-            return Ok(try!(self.u32_to_one_byte(n)));
-        }
-        let c = char::from_u32(n).expect("Unicode scalar value");
-        Ok(try!(self.lit(c)))
-    }
-
-    // Parses a hex number, e.g., `a\x5ab`.
-    //
-    // Start: `5`
-    // End:   `b`
-    //
-    // And also, `a\x{2603}b`.
-    //
-    // Start: `{`
-    // End:   `b`
-    fn parse_hex(&mut self) -> Result<Build> {
-        self.ignore_space();
-        if self.bump_if('{') {
-            self.parse_hex_many_digits()
-        } else {
-            self.parse_hex_two_digits()
-        }
-    }
-
-    // Parses a many-digit hex number, e.g., `a\x{2603}b`.
-    //
-    // Start: `2`
-    // End:   `b`
-    fn parse_hex_many_digits(&mut self) -> Result<Build> {
-        use std::char;
-
-        self.ignore_space();
-        let s = self.bump_get(is_ascii_word).unwrap_or("".into());
-        let n = try!(u32::from_str_radix(&s, 16)
-                         .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
-        self.ignore_space();
-        if !self.bump_if('}') {
-            // e.g., a\x{d
-            return Err(self.err(ErrorKind::UnclosedHex));
-        }
-        if !self.flags.unicode {
-            return Ok(try!(self.u32_to_one_byte(n)));
-        }
-        let c = try!(char::from_u32(n)
-                          .ok_or(self.err(ErrorKind::InvalidScalarValue(n))));
-        Ok(try!(self.lit(c)))
-    }
-
-    // Parses a two-digit hex number, e.g., `a\x5ab`.
-    //
-    // Start: `5`
-    // End:   `b`
-    fn parse_hex_two_digits(&mut self) -> Result<Build> {
-        use std::char;
-
-        let mut i = 0;
-        let s = self.bump_get(|_| { i += 1; i <= 2 }).unwrap_or("".into());
-        if s.len() < 2 {
-            // e.g., a\x
-            // e.g., a\xf
-            return Err(self.err(ErrorKind::UnexpectedTwoDigitHexEof));
-        }
-        let n = try!(u32::from_str_radix(&s, 16)
-                         .map_err(|_| self.err(ErrorKind::InvalidBase16(s))));
-        if !self.flags.unicode {
-            return Ok(try!(self.u32_to_one_byte(n)));
-        }
-        let c = char::from_u32(n).expect("Unicode scalar value");
-        Ok(try!(self.lit(c)))
-    }
-
-    // Parses a character class, e.g., `[^a-zA-Z0-9]+`.
-    //
-    // If the Unicode flag is enabled, the class is returned as a `CharClass`,
-    // otherwise it is converted to a `ByteClass`.
-    //
-    // Start: `[`
-    // End:   `+`
-    fn parse_class(&mut self) -> Result<Build> {
-        let class = try!(self.parse_class_as_chars());
-        Ok(Build::Expr(if self.flags.unicode {
-            Expr::Class(class)
-        } else {
-            let byte_class = class.to_byte_class();
-
-            // If `class` was only non-empty due to multibyte characters, the
-            // corresponding byte class will now be empty.
-            //
-            // See https://github.com/rust-lang/regex/issues/303
-            if byte_class.is_empty() {
-                // e.g., (?-u)[^\x00-\xFF]
-                return Err(self.err(ErrorKind::EmptyClass));
-            }
-
-            Expr::ClassBytes(byte_class)
-        }))
-    }
-
-    // Parses a character class as a `CharClass`, e.g., `[^a-zA-Z0-9]+`.
-    //
-    // Start: `[`
-    // End:   `+`
-    fn parse_class_as_chars(&mut self) -> Result<CharClass> {
-        let mut bracket_stack = vec![];
-        bracket_stack.extend(self.parse_open_bracket());
-        loop {
-            self.ignore_space();
-            if self.eof() {
-                // e.g., [a
-                return Err(self.err(ErrorKind::UnexpectedClassEof));
-            }
-            match self.cur() {
-                '[' => {
-                    if let Some(class) = self.maybe_parse_ascii() {
-                        // e.g. `[:alnum:]`
-                        bracket_stack.push(Bracket::Set(class));
-                    } else {
-                        // nested set, e.g. `[c-d]` in `[a-b[c-d]]`
-                        bracket_stack.extend(self.parse_open_bracket());
-                    }
-                }
-                ']' => {
-                    self.bump();
-                    let class = try!(self.close_bracket(&mut bracket_stack));
-                    if bracket_stack.is_empty() {
-                        // That was the outermost class, so stop now
-                        return Ok(class);
-                    }
-                    bracket_stack.push(Bracket::Set(class));
-                }
-                '\\' => {
-                    let class = try!(self.parse_class_escape());
-                    bracket_stack.push(Bracket::Set(class));
-                }
-                '&' if self.peek_is("&&") => {
-                    self.bump();
-                    self.bump();
-                    bracket_stack.push(Bracket::Intersection);
-                }
-                start => {
-                    if !self.flags.unicode {
-                        let _ = try!(self.codepoint_to_one_byte(start));
-                    }
-                    self.bump();
-                    match start {
-                        '~'|'-' => {
-                            // Only report an error if we see ~~ or --.
-                            if self.peek_is(start) {
-                                return Err(self.err(
-                                    ErrorKind::UnsupportedClassChar(start)));
-                            }
-                        }
-                        _ => {}
-                    }
-                    let class = try!(self.parse_class_range(start));
-                    bracket_stack.push(Bracket::Set(class));
-                }
-            }
-        }
-    }
-
-    // Parses the start of a character class or a nested character class.
-    // That includes negation using `^` and unescaped `-` and `]` allowed at
-    // the start of the class.
-    //
-    // e.g., `[^a]` or `[-a]` or `[]a]`
-    //
-    // Start: `[`
-    // End:   `a`
-    fn parse_open_bracket(&mut self) -> Vec<Bracket> {
-        self.bump();
-        self.ignore_space();
-        let negated = self.bump_if('^');
-        self.ignore_space();
-
-        let mut class = CharClass::empty();
-        while self.bump_if('-') {
-            class.ranges.push(ClassRange::one('-'));
-            self.ignore_space();
-        }
-        if class.is_empty() {
-            if self.bump_if(']') {
-                class.ranges.push(ClassRange::one(']'));
-                self.ignore_space();
-            }
-        }
-
-        let bracket = Bracket::LeftBracket { negated: negated };
-        if class.is_empty() {
-            vec![bracket]
-        } else {
-            vec![bracket, Bracket::Set(class)]
-        }
-    }
-
-    // Parses an escape in a character class.
-    //
-    // This is a helper for `parse_class`. Instead of returning an `Ok` value,
-    // it either mutates the char class or returns an error.
-    //
-    // e.g., `\wx`
-    //
-    // Start: `\`
-    // End:   `x`
-    fn parse_class_escape(&mut self) -> Result<CharClass> {
-        match try!(self.parse_escape()) {
-            Build::Expr(Expr::Class(class)) => {
-                Ok(class)
-            }
-            Build::Expr(Expr::ClassBytes(class2)) => {
-                let mut class = CharClass::empty();
-                for byte_range in class2 {
-                    let s = byte_range.start as char;
-                    let e = byte_range.end as char;
-                    class.ranges.push(ClassRange::new(s, e));
-                }
-                Ok(class)
-            }
-            Build::Expr(Expr::Literal { chars, .. }) => {
-                self.parse_class_range(chars[0])
-            }
-            Build::Expr(Expr::LiteralBytes { bytes, .. }) => {
-                let start = bytes[0] as char;
-                self.parse_class_range(start)
-            }
-            Build::Expr(e) => {
-                let err = ErrorKind::InvalidClassEscape(e);
-                Err(self.err(err))
-            }
-            // Because `parse_escape` can never return `LeftParen`.
-            _ => unreachable!(),
-        }
+impl ParserBuilder {
+    /// Create a new parser builder with a default configuration.
+    pub fn new() -> ParserBuilder {
+        ParserBuilder::default()
     }
 
-    // Parses a single range in a character class.
-    //
-    // e.g., `[a-z]`
-    //
-    // Start: `-` (with start == `a`)
-    // End:   `]`
-    fn parse_class_range(&mut self, start: char) -> Result<CharClass> {
-        self.ignore_space();
-        if !self.bump_if('-') {
-            // Not a range, so just return a singleton range.
-            return Ok(CharClass::new(vec![ClassRange::one(start)]));
-        }
-        self.ignore_space();
-        if self.eof() {
-            // e.g., [a-
-            return Err(self.err(ErrorKind::UnexpectedClassEof));
-        }
-        if self.peek_is(']') {
-            // This is the end of the class, so we permit use of `-` as a
-            // regular char (just like we do in the beginning).
-            return Ok(CharClass::new(vec![ClassRange::one(start), ClassRange::one('-')]));
-        }
-
-        // We have a real range. Just need to check to parse literal and
-        // make sure it's a valid range.
-        let end = match self.cur() {
-            '\\' => match try!(self.parse_escape()) {
-                Build::Expr(Expr::Literal { chars, .. }) => {
-                    chars[0]
-                }
-                Build::Expr(Expr::LiteralBytes { bytes, .. }) => {
-                    bytes[0] as char
-                }
-                Build::Expr(e) => {
-                    return Err(self.err(ErrorKind::InvalidClassEscape(e)));
-                }
-                // Because `parse_escape` can never return `LeftParen`.
-                _ => unreachable!(),
-            },
-            c => {
-                self.bump();
-                if c == '-' {
-                    return Err(self.err(ErrorKind::UnsupportedClassChar('-')));
-                }
-                if !self.flags.unicode {
-                    let _ = try!(self.codepoint_to_one_byte(c));
-                }
-                c
-            }
-        };
-        if end < start {
-            // e.g., [z-a]
-            return Err(self.err(ErrorKind::InvalidClassRange {
-                start: start,
-                end: end,
-            }));
-        }
-        Ok(CharClass::new(vec![ClassRange::new(start, end)]))
-    }
-
-    // Parses an ASCII class, e.g., `[:alnum:]+`.
-    //
-    // Start: `[`
-    // End:   `+`
-    //
-    // Also supports negation, e.g., `[:^alnum:]`.
-    //
-    // This parsing routine is distinct from the others in that it doesn't
-    // actually report any errors. Namely, if it fails, then the parser should
-    // fall back to parsing a regular class.
-    //
-    // This method will only make progress in the parser if it succeeds.
-    // Otherwise, the input remains where it started.
-    fn maybe_parse_ascii(&mut self) -> Option<CharClass> {
-        fn parse(p: &mut Parser) -> Option<CharClass> {
-            p.bump(); // the `[`
-            if !p.bump_if(':') { return None; }
-            let negate = p.bump_if('^');
-            let name = match p.bump_get(|c| c != ':') {
-                None => return None,
-                Some(name) => name,
-            };
-            if !p.bump_if(":]") { return None; }
-            ascii_class(&name).map(|cls| p.class_transform(negate, cls))
-        }
-        let start = self.chari;
-        match parse(self) {
-            None => { self.chari = start; None }
-            result => result,
-        }
-    }
-
-    // Parses a Uncode class name, e.g., `a\pLb`.
-    //
-    // Start: `L`
-    // End:   `b`
-    //
-    // And also, `a\p{Greek}b`.
-    //
-    // Start: `{`
-    // End:   `b`
-    //
-    // `negate` is true when the class name is used with `\P`.
-    fn parse_unicode_class(&mut self, neg: bool) -> Result<CharClass> {
-        self.ignore_space();
-        let name =
-            if self.bump_if('{') {
-                self.ignore_space();
-                let n = self.bump_get(is_ascii_word).unwrap_or("".into());
-                self.ignore_space();
-                if n.is_empty() || !self.bump_if('}') {
-                    // e.g., \p{Greek
-                    return Err(self.err(ErrorKind::UnclosedUnicodeName));
-                }
-                n
-            } else {
-                if self.eof() {
-                    // e.g., \p
-                    return Err(self.err(ErrorKind::UnexpectedEscapeEof));
-                }
-                self.bump().to_string()
-            };
-        match unicode_class(&name) {
-            None => Err(self.err(ErrorKind::UnrecognizedUnicodeClass(name))),
-            Some(cls) => {
-                if self.flags.unicode {
-                    Ok(self.class_transform(neg, cls))
-                } else {
-                    Err(self.err(ErrorKind::UnicodeNotAllowed))
-                }
-            }
-        }
-    }
-
-    // Parses a perl character class with Unicode support.
-    //
-    // `name` must be one of d, s, w, D, S, W. If not, this function panics.
-    //
-    // No parser state is changed.
-    fn parse_perl_class(&mut self, name: char) -> CharClass {
-        use unicode::regex::{PERLD, PERLS, PERLW};
-        let (cls, negate) = match (self.flags.unicode, name) {
-            (true, 'd') => (raw_class_to_expr(PERLD), false),
-            (true, 'D') => (raw_class_to_expr(PERLD), true),
-            (true, 's') => (raw_class_to_expr(PERLS), false),
-            (true, 'S') => (raw_class_to_expr(PERLS), true),
-            (true, 'w') => (raw_class_to_expr(PERLW), false),
-            (true, 'W') => (raw_class_to_expr(PERLW), true),
-            (false, 'd') => (ascii_class("digit").unwrap(), false),
-            (false, 'D') => (ascii_class("digit").unwrap(), true),
-            (false, 's') => (ascii_class("space").unwrap(), false),
-            (false, 'S') => (ascii_class("space").unwrap(), true),
-            (false, 'w') => (ascii_class("word").unwrap(), false),
-            (false, 'W') => (ascii_class("word").unwrap(), true),
-            _ => unreachable!(),
-        };
-        self.class_transform(negate, cls)
-    }
-
-    // Always bump to the next input and return the given expression as a
-    // `Build`.
-    //
-    // This is mostly for convenience when the surrounding context implies
-    // that the next character corresponds to the given expression.
-    fn parse_one(&mut self, e: Expr) -> Build {
-        self.bump();
-        Build::Expr(e)
-    }
-}
-
-// Auxiliary helper methods.
-impl Parser {
-    fn chars(&self) -> Chars {
-        Chars::new(&self.chars[self.chari..])
-    }
-
-    fn ignore_space(&mut self) {
-        if !self.flags.ignore_space {
-            return;
-        }
-        while !self.eof() {
-            match self.cur() {
-                '#' => {
-                    self.bump();
-                    while !self.eof() {
-                        match self.bump() {
-                            '\n' => break,
-                            _ => continue,
-                        }
-                    }
-                },
-                c => if !c.is_whitespace() {
-                    return;
-                } else {
-                    self.bump();
-                }
-            }
-        }
-    }
-
-    fn bump(&mut self) -> char {
-        let c = self.cur();
-        self.chari = checkadd(self.chari, self.chars().next_count());
-        c
-    }
-
-    fn cur(&self) -> char { self.chars().next().unwrap() }
-
-    fn eof(&self) -> bool { self.chars().next().is_none() }
-
-    fn bump_get<B: Bumpable>(&mut self, s: B) -> Option<String> {
-        let n = s.match_end(self);
-        if n == 0 {
-            None
-        } else {
-            let end = checkadd(self.chari, n);
-            let s = self.chars[self.chari..end]
-                        .iter().cloned().collect::<String>();
-            self.chari = end;
-            Some(s)
-        }
-    }
-
-    fn bump_if<B: Bumpable>(&mut self, s: B) -> bool {
-        let n = s.match_end(self);
-        if n == 0 {
-            false
-        } else {
-            self.chari = checkadd(self.chari, n);
-            true
-        }
-    }
-
-    fn peek_is<B: Bumpable>(&self, s: B) -> bool {
-        s.match_end(self) > 0
-    }
-
-    fn err(&self, kind: ErrorKind) -> Error {
-        self.errat(self.chari, kind)
-    }
-
-    fn errat(&self, pos: usize, kind: ErrorKind) -> Error {
-        Error { pos: pos, surround: self.windowat(pos), kind: kind }
-    }
-
-    fn windowat(&self, pos: usize) -> String {
-        let s = max(5, pos) - 5;
-        let e = min(self.chars.len(), checkadd(pos, 5));
-        self.chars[s..e].iter().cloned().collect()
-    }
-
-    fn pop(&mut self, expected: ErrorKind) -> Result<Expr> {
-        match self.stack.pop() {
-            None | Some(Build::LeftParen{..}) => Err(self.err(expected)),
-            Some(Build::Expr(e)) => Ok(e),
-        }
-    }
-
-    // If the current context calls for case insensitivity, then apply
-    // case folding. Similarly, if `negate` is `true`, then negate the
-    // class. (Negation always proceeds case folding.)
-    fn class_transform(&self, negate: bool, mut cls: CharClass) -> CharClass {
-        if self.flags.casei {
-            cls = cls.case_fold();
-        }
-        if negate {
-            cls = cls.negate();
-        }
-        cls
-    }
-
-    // Translates a Unicode codepoint into a single UTF-8 byte, and returns an
-    // error if it's not possible.
-    //
-    // This will panic if self.flags.unicode == true.
-    fn codepoint_to_one_byte(&self, c: char) -> Result<u8> {
-        assert!(!self.flags.unicode);
-        let bytes = c.to_string().as_bytes().to_owned();
-        if bytes.len() > 1 {
-            return Err(self.err(ErrorKind::UnicodeNotAllowed));
-        }
-        Ok(bytes[0])
-    }
-
-    // Creates a new byte literal from a single byte.
-    //
-    // If the given number can't fit into a single byte, then it is assumed
-    // to be a Unicode codepoint and an error is returned.
-    //
-    // This should only be called when the bytes flag is enabled.
-    fn u32_to_one_byte(&self, b: u32) -> Result<Build> {
-        assert!(!self.flags.unicode);
-        if b > u8::MAX as u32 {
-            Err(self.err(ErrorKind::UnicodeNotAllowed))
-        } else if !self.flags.allow_bytes && b > 0x7F {
-            Err(self.err(ErrorKind::InvalidUtf8))
-        } else {
-            Ok(Build::Expr(Expr::LiteralBytes {
-                bytes: vec![b as u8],
-                casei: self.flags.casei,
-            }))
-        }
-    }
-
-    // Creates a new literal expr from a Unicode codepoint.
-    //
-    // Creates a byte literal if the `bytes` flag is set.
-    fn lit(&self, c: char) -> Result<Build> {
-        Ok(Build::Expr(if self.flags.unicode {
-            Expr::Literal {
-                chars: vec![c],
-                casei: self.flags.casei,
-            }
-        } else {
-            Expr::LiteralBytes {
-                bytes: vec![try!(self.codepoint_to_one_byte(c))],
-                casei: self.flags.casei,
-            }
-        }))
-    }
-}
-
-struct Chars<'a> {
-    chars: &'a [char],
-    cur: usize,
-}
-
-impl<'a> Iterator for Chars<'a> {
-    type Item = char;
-    fn next(&mut self) -> Option<char> {
-        let x = self.c();
-        self.advance();
-        return x;
-    }
-}
-
-impl<'a> Chars<'a> {
-    fn new(chars: &[char]) -> Chars {
-        Chars {
-            chars: chars,
-            cur: 0,
-        }
-    }
-
-    fn c(&self) -> Option<char> {
-        self.chars.get(self.cur).map(|&c| c)
-    }
-
-    fn advance(&mut self) {
-        self.cur = checkadd(self.cur, 1);
-    }
-
-    fn next_count(&mut self) -> usize {
-        self.next();
-        self.cur
-    }
-}
-
-// Auxiliary methods for manipulating the expression stack.
-impl Parser {
-    // Called whenever an alternate (`|`) is found.
-    //
-    // This pops the expression stack until:
-    //
-    //  1. The stack is empty. Pushes an alternation with one arm.
-    //  2. An opening parenthesis is found. Leave the parenthesis
-    //     on the stack and push an alternation with one arm.
-    //  3. An alternate (`|`) is found. Pop the existing alternation,
-    //     add an arm and push the modified alternation.
-    //
-    // Each "arm" in the above corresponds to the concatenation of all
-    // popped expressions.
-    //
-    // In the first two cases, the stack is left in an invalid state
-    // because an alternation with one arm is not allowed. This
-    // particular state will be detected by `finish_concat` and an
-    // error will be reported.
-    //
-    // In none of the cases is an empty arm allowed. If an empty arm
-    // is found, an error is reported.
-    fn alternate(&mut self) -> Result<Build> {
-        let mut concat = vec![];
-        let alts = |es| Ok(Build::Expr(Expr::Alternate(es)));
-        loop {
-            match self.stack.pop() {
-                None => {
-                    if concat.is_empty() {
-                        // e.g., |a
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    return alts(vec![rev_concat(concat)]);
-                }
-                Some(e @ Build::LeftParen{..}) => {
-                    if concat.is_empty() {
-                        // e.g., (|a)
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    self.stack.push(e);
-                    return alts(vec![rev_concat(concat)]);
-                }
-                Some(Build::Expr(Expr::Alternate(mut es))) => {
-                    if concat.is_empty() {
-                        // e.g., a||
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    es.push(rev_concat(concat));
-                    return alts(es);
-                }
-                Some(Build::Expr(e)) => { concat.push(e); }
-            }
-        }
-    }
-
-    // Called whenever a closing parenthesis (`)`) is found.
-    //
-    // This pops the expression stack until:
-    //
-    //  1. The stack is empty. An error is reported because this
-    //     indicates an unopened parenthesis.
-    //  2. An opening parenthesis is found. Pop the opening parenthesis
-    //     and push a `Group` expression.
-    //  3. An alternate (`|`) is found. Pop the existing alternation
-    //     and an arm to it in place. Pop one more item from the stack.
-    //     If the stack was empty, then report an unopened parenthesis
-    //     error, otherwise assume it is an opening parenthesis and
-    //     push a `Group` expression with the popped alternation.
-    //     (We can assume this is an opening parenthesis because an
-    //     alternation either corresponds to the entire Regex or it
-    //     corresponds to an entire group. This is guaranteed by the
-    //     `alternate` method.)
-    //
-    // Each "arm" in the above corresponds to the concatenation of all
-    // popped expressions.
-    //
-    // Empty arms nor empty groups are allowed.
-    fn close_paren(&mut self) -> Result<(Flags, Build)> {
-        let mut concat = vec![];
-        loop {
-            match self.stack.pop() {
-                // e.g., )
-                None => return Err(self.err(ErrorKind::UnopenedParen)),
-                Some(Build::LeftParen { i, name, old_flags, .. }) => {
-                    if concat.is_empty() {
-                        // e.g., ()
-                        return Err(self.err(ErrorKind::EmptyGroup));
-                    }
-                    return Ok((old_flags, Build::Expr(Expr::Group {
-                        e: Box::new(rev_concat(concat)),
-                        i: i,
-                        name: name,
-                    })));
-                }
-                Some(Build::Expr(Expr::Alternate(mut es))) => {
-                    if concat.is_empty() {
-                        // e.g., (a|)
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    es.push(rev_concat(concat));
-                    match self.stack.pop() {
-                        // e.g., a|b)
-                        None => return Err(self.err(ErrorKind::UnopenedParen)),
-                        Some(Build::Expr(_)) => unreachable!(),
-                        Some(Build::LeftParen { i, name, old_flags, .. }) => {
-                            return Ok((old_flags, Build::Expr(Expr::Group {
-                                e: Box::new(Expr::Alternate(es)),
-                                i: i,
-                                name: name,
-                            })));
-                        }
-                    }
-                }
-                Some(Build::Expr(e)) => { concat.push(e); }
-            }
-        }
-    }
-
-    // Called only when the parser reaches the end of input.
-    //
-    // This pops the expression stack until:
-    //
-    //  1. The stack is empty. Return concatenation of popped
-    //     expressions. This concatenation may be empty!
-    //  2. An alternation is found. Pop the alternation and push
-    //     a new arm. Return the alternation as the entire Regex.
-    //     After this, the stack must be empty, or else there is
-    //     an unclosed paren.
-    //
-    // If an opening parenthesis is popped, then an error is
-    // returned since it indicates an unclosed parenthesis.
-    fn finish_concat(&mut self) -> Result<Expr> {
-        let mut concat = vec![];
-        loop {
-            match self.stack.pop() {
-                None => { return Ok(rev_concat(concat)); }
-                Some(Build::LeftParen{ chari, ..}) => {
-                    // e.g., a(b
-                    return Err(self.errat(chari, ErrorKind::UnclosedParen));
-                }
-                Some(Build::Expr(Expr::Alternate(mut es))) => {
-                    if concat.is_empty() {
-                        // e.g., a|
-                        return Err(self.err(ErrorKind::EmptyAlternate));
-                    }
-                    es.push(rev_concat(concat));
-                    // Make sure there are no opening parens remaining.
-                    match self.stack.pop() {
-                        None => return Ok(Expr::Alternate(es)),
-                        Some(Build::LeftParen{ chari, ..}) => {
-                            // e.g., (a|b
-                            return Err(self.errat(
-                                chari, ErrorKind::UnclosedParen));
-                        }
-                        e => unreachable!("{:?}", e),
-                    }
-                }
-                Some(Build::Expr(e)) => { concat.push(e); }
-            }
-        }
-    }
-}
-
-// Methods for working with the bracket stack used for character class parsing.
-impl Parser {
-
-    // After parsing a closing bracket `]`, process elements of the bracket
-    // stack until finding the corresponding opening bracket `[`, and return
-    // the combined character class. E.g. with `[^b-f&&ab-c]`:
-    //
-    // 1. Adjacent sets are merged into a single union: `ab-c` -> `a-c`
-    // 2. Unions separated by `&&` are intersected: `b-f` and `a-c` -> `b-c`
-    // 3. Negation is applied if necessary: `b-c` -> negation of `b-c`
-    fn close_bracket(&self, stack: &mut Vec<Bracket>) -> Result<CharClass> {
-        let mut union = CharClass::empty();
-        let mut intersect = vec![];
-        loop {
-            match stack.pop() {
-                Some(Bracket::Set(class)) => {
-                    union.ranges.extend(class);
-                }
-                Some(Bracket::Intersection) => {
-                    let class = self.class_union_transform(union);
-                    intersect.push(class);
-                    union = CharClass::empty();
-                }
-                Some(Bracket::LeftBracket { negated }) => {
-                    let mut class = self.class_union_transform(union);
-                    for c in intersect {
-                        class = class.intersection(&c);
-                    }
-                    // negate after combining all sets (`^` has lower precedence than `&&`)
-                    if negated {
-                        class = class.negate();
-                    }
-                    if class.is_empty() {
-                        // e.g., [^\d\D]
-                        return Err(self.err(ErrorKind::EmptyClass));
-                    }
-                    return Ok(class);
-                }
-                // The first element on the stack is a `LeftBracket`
-                None => unreachable!()
-            }
-        }
-    }
-
-    // Apply case folding if requested on the union character class, and
-    // return a canonicalized class.
-    fn class_union_transform(&self, class: CharClass) -> CharClass {
-        if self.flags.casei {
-            // Case folding canonicalizes too
-            class.case_fold()
-        } else {
-            class.canonicalize()
-        }
-    }
-}
-
-impl Build {
-    fn is_empty(&self) -> bool {
-        match *self {
-            Build::Expr(Expr::Empty) => true,
-            _ => false,
-        }
-    }
-}
-
-// Make it ergonomic to conditionally bump the parser.
-// i.e., `bump_if('a')` or `bump_if("abc")`.
-trait Bumpable {
-    fn match_end(self, p: &Parser) -> usize;
-}
-
-impl Bumpable for char {
-    fn match_end(self, p: &Parser) -> usize {
-        let mut chars = p.chars();
-        if chars.next().map(|c| c == self).unwrap_or(false) {
-            chars.cur
-        } else {
-            0
-        }
-    }
-}
-
-impl<'a> Bumpable for &'a str {
-    fn match_end(self, p: &Parser) -> usize {
-        let mut search = self.chars();
-        let mut rest = p.chars();
-        let mut count = 0;
-        loop {
-            match (rest.next(), search.next()) {
-                (Some(c1), Some(c2)) if c1 == c2 => count = rest.cur,
-                (_, None) => return count,
-                _ => return 0,
-            }
-        }
-    }
-}
-
-impl<F: FnMut(char) -> bool> Bumpable for F {
-    fn match_end(mut self, p: &Parser) -> usize {
-        let mut chars = p.chars();
-        let mut count = 0;
-        while let Some(c) = chars.next() {
-            if !self(c) {
-                break
-            }
-            count = chars.cur;
-        }
-        count
-    }
-}
-
-// Turn a sequence of expressions into a concatenation.
-// This only uses `Concat` if there are 2 or more expressions.
-fn rev_concat(mut exprs: Vec<Expr>) -> Expr {
-    if exprs.len() == 0 {
-        Expr::Empty
-    } else if exprs.len() == 1 {
-        exprs.pop().unwrap()
-    } else {
-        exprs.reverse();
-        Expr::Concat(exprs)
-    }
-}
-
-// Returns true if and only if the given character is allowed in a capture
-// name. Note that the first char of a capture name must not be numeric.
-fn is_valid_capture_char(c: char) -> bool {
-    c == '_' || (c >= '0' && c <= '9')
-    || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')
-}
-
-fn is_ascii_word(c: char) -> bool {
-    match c {
-        'a' ... 'z' | 'A' ... 'Z' | '_' | '0' ... '9' => true,
-        _ => false,
-    }
-}
-
-/// Returns true if the give character has significance in a regex.
-pub fn is_punct(c: char) -> bool {
-    match c {
-        '\\' | '.' | '+' | '*' | '?' | '(' | ')' | '|' |
-        '[' | ']' | '{' | '}' | '^' | '$' | '#' | '&' | '-' | '~' => true,
-        _ => false,
-    }
-}
-
-fn checkadd(x: usize, y: usize) -> usize {
-    x.checked_add(y).expect("regex length overflow")
-}
-
-fn unicode_class(name: &str) -> Option<CharClass> {
-    UNICODE_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
-        raw_class_to_expr(UNICODE_CLASSES[i].1)
-    })
-}
-
-fn ascii_class(name: &str) -> Option<CharClass> {
-    ASCII_CLASSES.binary_search_by(|&(s, _)| s.cmp(name)).ok().map(|i| {
-        raw_class_to_expr(ASCII_CLASSES[i].1)
-    })
-}
-
-fn raw_class_to_expr(raw: &[(char, char)]) -> CharClass {
-    let range = |&(s, e)| ClassRange { start: s, end: e };
-    CharClass::new(raw.iter().map(range).collect())
-}
-
-type Class = &'static [(char, char)];
-type NamedClasses = &'static [(&'static str, Class)];
-
-const ASCII_CLASSES: NamedClasses = &[
-    // Classes must be in alphabetical order so that bsearch works.
-    // [:alnum:]      alphanumeric (== [0-9A-Za-z])
-    // [:alpha:]      alphabetic (== [A-Za-z])
-    // [:ascii:]      ASCII (== [\x00-\x7F])
-    // [:blank:]      blank (== [\t ])
-    // [:cntrl:]      control (== [\x00-\x1F\x7F])
-    // [:digit:]      digits (== [0-9])
-    // [:graph:]      graphical (== [!-~])
-    // [:lower:]      lower case (== [a-z])
-    // [:print:]      printable (== [ -~] == [ [:graph:]])
-    // [:punct:]      punctuation (== [!-/:-@[-`{-~])
-    // [:space:]      whitespace (== [\t\n\v\f\r ])
-    // [:upper:]      upper case (== [A-Z])
-    // [:word:]       word characters (== [0-9A-Za-z_])
-    // [:xdigit:]     hex digit (== [0-9A-Fa-f])
-    // Taken from: http://golang.org/pkg/regex/syntax/
-    ("alnum", &ALNUM),
-    ("alpha", &ALPHA),
-    ("ascii", &ASCII),
-    ("blank", &BLANK),
-    ("cntrl", &CNTRL),
-    ("digit", &DIGIT),
-    ("graph", &GRAPH),
-    ("lower", &LOWER),
-    ("print", &PRINT),
-    ("punct", &PUNCT),
-    ("space", &SPACE),
-    ("upper", &UPPER),
-    ("word", &WORD),
-    ("xdigit", &XDIGIT),
-];
-
-const ALNUM: Class = &[('0', '9'), ('A', 'Z'), ('a', 'z')];
-const ALPHA: Class = &[('A', 'Z'), ('a', 'z')];
-const ASCII: Class = &[('\x00', '\x7F')];
-const BLANK: Class = &[(' ', ' '), ('\t', '\t')];
-const CNTRL: Class = &[('\x00', '\x1F'), ('\x7F', '\x7F')];
-const DIGIT: Class = &[('0', '9')];
-const GRAPH: Class = &[('!', '~')];
-const LOWER: Class = &[('a', 'z')];
-const PRINT: Class = &[(' ', '~')];
-const PUNCT: Class = &[('!', '/'), (':', '@'), ('[', '`'), ('{', '~')];
-const SPACE: Class = &[('\t', '\t'), ('\n', '\n'), ('\x0B', '\x0B'),
-                       ('\x0C', '\x0C'), ('\r', '\r'), (' ', ' ')];
-const UPPER: Class = &[('A', 'Z')];
-const WORD: Class = &[('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z')];
-const XDIGIT: Class = &[('0', '9'), ('A', 'F'), ('a', 'f')];
-
-#[cfg(test)]
-mod tests {
-    use {
-        CharClass, ClassRange, ByteClass, ByteRange,
-        Expr, Repeater,
-        ErrorKind,
-    };
-    use unicode::regex::{PERLD, PERLS, PERLW};
-    use super::{LOWER, UPPER, WORD, Flags, Parser, ascii_class};
-
-    static YI: &'static [(char, char)] = &[
-        ('\u{a000}', '\u{a48c}'), ('\u{a490}', '\u{a4c6}'),
-    ];
-
-    fn p(s: &str) -> Expr { Parser::parse(s, Flags::default()).unwrap() }
-    fn pf(s: &str, flags: Flags) -> Expr { Parser::parse(s, flags).unwrap() }
-    fn lit(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: false } }
-    fn liti(c: char) -> Expr { Expr::Literal { chars: vec![c], casei: true } }
-    fn b<T>(v: T) -> Box<T> { Box::new(v) }
-    fn c(es: &[Expr]) -> Expr { Expr::Concat(es.to_vec()) }
-
-    fn pb(s: &str) -> Expr {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        Parser::parse(s, flags).unwrap()
-    }
-
-    fn blit(b: u8) -> Expr {
-        Expr::LiteralBytes {
-            bytes: vec![b],
-            casei: false,
-        }
-    }
-
-    fn bliti(b: u8) -> Expr {
-        Expr::LiteralBytes {
-            bytes: vec![b],
-            casei: true,
+    /// Build a parser from this configuration with the given pattern.
+    pub fn build(&self) -> Parser {
+        Parser {
+            ast: self.ast.build(),
+            hir: self.hir.build(),
         }
     }
 
-    fn class(ranges: &[(char, char)]) -> CharClass {
-        let ranges = ranges.iter().cloned()
-                           .map(|(c1, c2)| ClassRange::new(c1, c2)).collect();
-        CharClass::new(ranges)
-    }
-
-    fn classes(classes: &[&[(char, char)]]) -> CharClass {
-        let mut cls = CharClass::empty();
-        for &ranges in classes {
-            cls.ranges.extend(class(ranges));
-        }
-        cls.canonicalize()
-    }
-
-    fn bclass(ranges: &[(u8, u8)]) -> ByteClass {
-        let ranges = ranges.iter().cloned()
-                           .map(|(c1, c2)| ByteRange::new(c1, c2)).collect();
-        ByteClass::new(ranges)
-    }
-
-    fn asciid() -> CharClass {
-        ascii_class("digit").unwrap()
-    }
-
-    fn asciis() -> CharClass {
-        ascii_class("space").unwrap()
-    }
-
-    fn asciiw() -> CharClass {
-        ascii_class("word").unwrap()
-    }
-
-    fn asciid_bytes() -> ByteClass {
-        asciid().to_byte_class()
-    }
-
-    fn asciis_bytes() -> ByteClass {
-        asciis().to_byte_class()
-    }
-
-    fn asciiw_bytes() -> ByteClass {
-        asciiw().to_byte_class()
-    }
-
-    #[test]
-    fn empty() {
-        assert_eq!(p(""), Expr::Empty);
-    }
-
-    #[test]
-    fn literal() {
-        assert_eq!(p("a"), lit('a'));
-        assert_eq!(pb("(?-u)a"), blit(b'a'));
-    }
-
-    #[test]
-    fn literal_string() {
-        assert_eq!(p("ab"), Expr::Concat(vec![lit('a'), lit('b')]));
-        assert_eq!(pb("(?-u)ab"), Expr::Concat(vec![blit(b'a'), blit(b'b')]));
-    }
-
-    #[test]
-    fn start_literal() {
-        assert_eq!(p("^a"), Expr::Concat(vec![
-            Expr::StartText,
-            Expr::Literal { chars: vec!['a'], casei: false },
-        ]));
-    }
-
-    #[test]
-    fn repeat_zero_or_one_greedy() {
-        assert_eq!(p("a?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrOne,
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_zero_or_one_greedy_concat() {
-        assert_eq!(p("ab?"), Expr::Concat(vec![
-            lit('a'),
-            Expr::Repeat {
-                e: b(lit('b')),
-                r: Repeater::ZeroOrOne,
-                greedy: true,
-            },
-        ]));
-    }
-
-    #[test]
-    fn repeat_zero_or_one_nongreedy() {
-        assert_eq!(p("a??"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrOne,
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_one_or_more_greedy() {
-        assert_eq!(p("a+"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::OneOrMore,
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_one_or_more_nongreedy() {
-        assert_eq!(p("a+?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::OneOrMore,
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_zero_or_more_greedy() {
-        assert_eq!(p("a*"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrMore,
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_zero_or_more_nongreedy() {
-        assert_eq!(p("a*?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrMore,
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_exact() {
-        assert_eq!(p("a{5}"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(5) },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_min() {
-        assert_eq!(p("a{5,}"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: None },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_min_max() {
-        assert_eq!(p("a{5,10}"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(10) },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_exact_nongreedy() {
-        assert_eq!(p("a{5}?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(5) },
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_min_nongreedy() {
-        assert_eq!(p("a{5,}?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: None },
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_min_max_nongreedy() {
-        assert_eq!(p("a{5,10}?"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(10) },
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn repeat_counted_whitespace() {
-        assert_eq!(p("a{ 5   }"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(5) },
-            greedy: true,
-        });
-        assert_eq!(p("a{ 5 , 10 }"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(10) },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn group_literal() {
-        assert_eq!(p("(a)"), Expr::Group {
-            e: b(lit('a')),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn group_literal_concat() {
-        assert_eq!(p("(ab)"), Expr::Group {
-            e: b(c(&[lit('a'), lit('b')])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn alt_two() {
-        assert_eq!(p("a|b"), Expr::Alternate(vec![lit('a'), lit('b')]));
-    }
-
-    #[test]
-    fn alt_many() {
-        assert_eq!(p("a|b|c"), Expr::Alternate(vec![
-            lit('a'), lit('b'), lit('c'),
-        ]));
-    }
-
-    #[test]
-    fn alt_many_concat() {
-        assert_eq!(p("ab|bc|cd"), Expr::Alternate(vec![
-            c(&[lit('a'), lit('b')]),
-            c(&[lit('b'), lit('c')]),
-            c(&[lit('c'), lit('d')]),
-        ]));
-    }
-
-    #[test]
-    fn alt_group_two() {
-        assert_eq!(p("(a|b)"), Expr::Group {
-            e: b(Expr::Alternate(vec![lit('a'), lit('b')])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn alt_group_many() {
-        assert_eq!(p("(a|b|c)"), Expr::Group {
-            e: b(Expr::Alternate(vec![lit('a'), lit('b'), lit('c')])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn alt_group_many_concat() {
-        assert_eq!(p("(ab|bc|cd)"), Expr::Group {
-            e: b(Expr::Alternate(vec![
-                c(&[lit('a'), lit('b')]),
-                c(&[lit('b'), lit('c')]),
-                c(&[lit('c'), lit('d')]),
-            ])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn alt_group_nested() {
-        assert_eq!(p("(ab|(bc|(cd)))"), Expr::Group {
-            e: b(Expr::Alternate(vec![
-                c(&[lit('a'), lit('b')]),
-                Expr::Group {
-                    e: b(Expr::Alternate(vec![
-                        c(&[lit('b'), lit('c')]),
-                        Expr::Group {
-                            e: b(c(&[lit('c'), lit('d')])),
-                            i: Some(3),
-                            name: None,
-                        }
-                    ])),
-                    i: Some(2),
-                    name: None,
-                },
-            ])),
-            i: Some(1),
-            name: None,
-        });
-    }
-
-    #[test]
-    fn group_name() {
-        assert_eq!(p("(?P<foo>a)"), Expr::Group {
-            e: b(lit('a')),
-            i: Some(1),
-            name: Some("foo".into()),
-        });
-    }
-
-    #[test]
-    fn group_no_capture() {
-        assert_eq!(p("(?:a)"), Expr::Group {
-            e: b(lit('a')),
-            i: None,
-            name: None,
-        });
-    }
-
-    #[test]
-    fn group_flags() {
-        assert_eq!(p("(?i:a)"), Expr::Group {
-            e: b(liti('a')),
-            i: None,
-            name: None,
-        });
-        assert_eq!(pb("(?i-u:a)"), Expr::Group {
-            e: b(bliti(b'a')),
-            i: None,
-            name: None,
-        });
-    }
-
-    #[test]
-    fn group_flags_returned() {
-        assert_eq!(p("(?i:a)a"), c(&[
-            Expr::Group {
-                e: b(liti('a')),
-                i: None,
-                name: None,
-            },
-            lit('a'),
-        ]));
-        assert_eq!(pb("(?i-u:a)a"), c(&[
-            Expr::Group {
-                e: b(bliti(b'a')),
-                i: None,
-                name: None,
-            },
-            lit('a'),
-        ]));
-    }
-
-    #[test]
-    fn group_flags_retained() {
-        assert_eq!(p("(?i)(?-i:a)a"), c(&[
-            Expr::Group {
-                e: b(lit('a')),
-                i: None,
-                name: None,
-            },
-            liti('a'),
-        ]));
-        assert_eq!(pb("(?i-u)(?u-i:a)a"), c(&[
-            Expr::Group {
-                e: b(lit('a')),
-                i: None,
-                name: None,
-            },
-            bliti(b'a'),
-        ]));
-    }
-
-    #[test]
-    fn flags_inline() {
-        assert_eq!(p("(?i)a"), liti('a'));
-    }
-
-    #[test]
-    fn flags_inline_multiple() {
-        assert_eq!(p("(?is)a."), c(&[liti('a'), Expr::AnyChar]));
-    }
-
-    #[test]
-    fn flags_inline_multiline() {
-        assert_eq!(p("(?m)^(?-m)$"), c(&[Expr::StartLine, Expr::EndText]));
-    }
-
-    #[test]
-    fn flags_inline_swap_greed() {
-        assert_eq!(p("(?U)a*a*?(?i-U)a*a*?"), c(&[
-            Expr::Repeat {
-                e: b(lit('a')),
-                r: Repeater::ZeroOrMore,
-                greedy: false,
-            },
-            Expr::Repeat {
-                e: b(lit('a')),
-                r: Repeater::ZeroOrMore,
-                greedy: true,
-            },
-            Expr::Repeat {
-                e: b(liti('a')),
-                r: Repeater::ZeroOrMore,
-                greedy: true,
-            },
-            Expr::Repeat {
-                e: b(liti('a')),
-                r: Repeater::ZeroOrMore,
-                greedy: false,
-            },
-        ]));
-    }
-
-    #[test]
-    fn flags_inline_multiple_negate_one() {
-        assert_eq!(p("(?is)a.(?i-s)a."), c(&[
-            liti('a'), Expr::AnyChar, liti('a'), Expr::AnyCharNoNL,
-        ]));
-    }
-
-    #[test]
-    fn any_byte() {
-        assert_eq!(
-            pb("(?-u).(?u)."), c(&[Expr::AnyByteNoNL, Expr::AnyCharNoNL]));
-        assert_eq!(
-            pb("(?s)(?-u).(?u)."), c(&[Expr::AnyByte, Expr::AnyChar]));
-    }
-
-    #[test]
-    fn flags_inline_negate() {
-        assert_eq!(p("(?i)a(?-i)a"), c(&[liti('a'), lit('a')]));
-    }
-
-    #[test]
-    fn flags_group_inline() {
-        assert_eq!(p("(a(?i)a)a"), c(&[
-            Expr::Group {
-                e: b(c(&[lit('a'), liti('a')])),
-                i: Some(1),
-                name: None,
-            },
-            lit('a'),
-        ]));
+    /// Set the nesting limit for this parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow for consumers that do structural induction on an `Ast` using
+    /// explicit recursion. While this crate never does this (instead using
+    /// constant stack space and moving the call stack to the heap), other
+    /// crates may.
+    ///
+    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since this parser
+    /// implementation will limit itself to heap space proportional to the
+    /// lenth of the pattern string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation, which results in a nest
+    /// depth of `1`. In general, a nest limit is not something that manifests
+    /// in an obvious way in the concrete syntax, therefore, it should not be
+    /// used in a granular way.
+    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
+        self.ast.nest_limit(limit);
+        self
     }
 
-    #[test]
-    fn flags_group_inline_retain() {
-        assert_eq!(p("(?i)((?-i)a)a"), c(&[
-            Expr::Group {
-                e: b(lit('a')),
-                i: Some(1),
-                name: None,
-            },
-            liti('a'),
-        ]));
-    }
-
-    #[test]
-    fn flags_default_casei() {
-        let flags = Flags { casei: true, .. Flags::default() };
-        assert_eq!(pf("a", flags), liti('a'));
-    }
-
-    #[test]
-    fn flags_default_multi() {
-        let flags = Flags { multi: true, .. Flags::default() };
-        assert_eq!(pf("^", flags), Expr::StartLine);
-    }
-
-    #[test]
-    fn flags_default_dotnl() {
-        let flags = Flags { dotnl: true, .. Flags::default() };
-        assert_eq!(pf(".", flags), Expr::AnyChar);
-    }
-
-    #[test]
-    fn flags_default_swap_greed() {
-        let flags = Flags { swap_greed: true, .. Flags::default() };
-        assert_eq!(pf("a+", flags), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::OneOrMore,
-            greedy: false,
-        });
-    }
-
-    #[test]
-    fn flags_default_ignore_space() {
-        let flags = Flags { ignore_space: true, .. Flags::default() };
-        assert_eq!(pf(" a ", flags), lit('a'));
-    }
-
-    #[test]
-    fn escape_simple() {
-        assert_eq!(p(r"\a\f\t\n\r\v"), c(&[
-            lit('\x07'), lit('\x0C'), lit('\t'),
-            lit('\n'), lit('\r'), lit('\x0B'),
-        ]));
-    }
-
-    #[test]
-    fn escape_boundaries() {
-        assert_eq!(p(r"\A\z\b\B"), c(&[
-            Expr::StartText, Expr::EndText,
-            Expr::WordBoundary, Expr::NotWordBoundary,
-        ]));
-        assert_eq!(pb(r"(?-u)\b\B"), c(&[
-            Expr::WordBoundaryAscii, Expr::NotWordBoundaryAscii,
-        ]));
-    }
-
-    #[test]
-    fn escape_punctuation() {
-        assert_eq!(p(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), c(&[
-            lit('\\'), lit('.'), lit('+'), lit('*'), lit('?'),
-            lit('('), lit(')'), lit('|'), lit('['), lit(']'),
-            lit('{'), lit('}'), lit('^'), lit('$'), lit('#'),
-        ]));
-    }
-
-    #[test]
-    fn escape_octal() {
-        assert_eq!(p(r"\123"), lit('S'));
-        assert_eq!(p(r"\1234"), c(&[lit('S'), lit('4')]));
-
-        assert_eq!(pb(r"(?-u)\377"), blit(0xFF));
-    }
-
-    #[test]
-    fn escape_hex2() {
-        assert_eq!(p(r"\x53"), lit('S'));
-        assert_eq!(p(r"\x534"), c(&[lit('S'), lit('4')]));
-
-        assert_eq!(pb(r"(?-u)\xff"), blit(0xFF));
-        assert_eq!(pb(r"(?-u)\x00"), blit(0x0));
-        assert_eq!(pb(r"(?-u)[\x00]"),
-                   Expr::ClassBytes(bclass(&[(b'\x00', b'\x00')])));
-        assert_eq!(pb(r"(?-u)[^\x00]"),
-                   Expr::ClassBytes(bclass(&[(b'\x01', b'\xFF')])));
-    }
-
-    #[test]
-    fn escape_hex() {
-        assert_eq!(p(r"\x{53}"), lit('S'));
-        assert_eq!(p(r"\x{53}4"), c(&[lit('S'), lit('4')]));
-        assert_eq!(p(r"\x{2603}"), lit('\u{2603}'));
-
-        assert_eq!(pb(r"(?-u)\x{00FF}"), blit(0xFF));
-    }
-
-    #[test]
-    fn escape_unicode_name() {
-        assert_eq!(p(r"\p{Yi}"), Expr::Class(class(YI)));
-    }
-
-    #[test]
-    fn escape_unicode_letter() {
-        assert_eq!(p(r"\pZ"), Expr::Class(class(&[
-            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
-            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
-            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
-        ])));
-    }
-
-    #[test]
-    fn escape_unicode_name_case_fold() {
-        assert_eq!(p(r"(?i)\p{Yi}"), Expr::Class(class(YI).case_fold()));
-    }
-
-    #[test]
-    fn escape_unicode_letter_case_fold() {
-        assert_eq!(p(r"(?i)\pZ"), Expr::Class(class(&[
-            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
-            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
-            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
-        ]).case_fold()));
-    }
-
-    #[test]
-    fn escape_unicode_name_negate() {
-        assert_eq!(p(r"\P{Yi}"), Expr::Class(class(YI).negate()));
-    }
-
-    #[test]
-    fn escape_unicode_letter_negate() {
-        assert_eq!(p(r"\PZ"), Expr::Class(class(&[
-            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
-            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
-            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
-        ]).negate()));
-    }
-
-    #[test]
-    fn escape_unicode_name_negate_case_fold() {
-        assert_eq!(p(r"(?i)\P{Yi}"),
-                   Expr::Class(class(YI).negate().case_fold()));
-    }
-
-    #[test]
-    fn escape_unicode_letter_negate_case_fold() {
-        assert_eq!(p(r"(?i)\PZ"), Expr::Class(class(&[
-            ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'),
-            ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
-            ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-            ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
-        ]).negate().case_fold()));
-    }
-
-    #[test]
-    fn escape_perl_d() {
-        assert_eq!(p(r"\d"), Expr::Class(class(PERLD)));
-        assert_eq!(pb(r"(?-u)\d"), Expr::Class(asciid()));
-    }
-
-    #[test]
-    fn escape_perl_s() {
-        assert_eq!(p(r"\s"), Expr::Class(class(PERLS)));
-        assert_eq!(pb(r"(?-u)\s"), Expr::Class(asciis()));
-    }
-
-    #[test]
-    fn escape_perl_w() {
-        assert_eq!(p(r"\w"), Expr::Class(class(PERLW)));
-        assert_eq!(pb(r"(?-u)\w"), Expr::Class(asciiw()));
-    }
-
-    #[test]
-    fn escape_perl_d_negate() {
-        assert_eq!(p(r"\D"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(pb(r"(?-u)\D"), Expr::Class(asciid().negate()));
-    }
-
-    #[test]
-    fn escape_perl_s_negate() {
-        assert_eq!(p(r"\S"), Expr::Class(class(PERLS).negate()));
-        assert_eq!(pb(r"(?-u)\S"), Expr::Class(asciis().negate()));
-    }
-
-    #[test]
-    fn escape_perl_w_negate() {
-        assert_eq!(p(r"\W"), Expr::Class(class(PERLW).negate()));
-        assert_eq!(pb(r"(?-u)\W"), Expr::Class(asciiw().negate()));
-    }
-
-    #[test]
-    fn escape_perl_d_case_fold() {
-        assert_eq!(p(r"(?i)\d"), Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(pb(r"(?i-u)\d"), Expr::Class(asciid().case_fold()));
-    }
-
-    #[test]
-    fn escape_perl_s_case_fold() {
-        assert_eq!(p(r"(?i)\s"), Expr::Class(class(PERLS).case_fold()));
-        assert_eq!(pb(r"(?i-u)\s"), Expr::Class(asciis().case_fold()));
-    }
-
-    #[test]
-    fn escape_perl_w_case_fold() {
-        assert_eq!(p(r"(?i)\w"), Expr::Class(class(PERLW).case_fold()));
-        assert_eq!(pb(r"(?i-u)\w"), Expr::Class(asciiw().case_fold()));
-    }
-
-    #[test]
-    fn escape_perl_d_case_fold_negate() {
-        assert_eq!(p(r"(?i)\D"),
-                   Expr::Class(class(PERLD).case_fold().negate()));
-        let bytes = asciid().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)\D"), Expr::Class(bytes));
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\0` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ast.octal(yes);
+        self
     }
 
-    #[test]
-    fn escape_perl_s_case_fold_negate() {
-        assert_eq!(p(r"(?i)\S"),
-                   Expr::Class(class(PERLS).case_fold().negate()));
-        let bytes = asciis().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)\S"), Expr::Class(bytes));
-    }
-
-    #[test]
-    fn escape_perl_w_case_fold_negate() {
-        assert_eq!(p(r"(?i)\W"),
-                   Expr::Class(class(PERLW).case_fold().negate()));
-        let bytes = asciiw().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)\W"), Expr::Class(bytes));
-    }
-
-    #[test]
-    fn class_singleton() {
-        assert_eq!(p(r"[a]"), Expr::Class(class(&[('a', 'a')])));
-        assert_eq!(p(r"[\x00]"), Expr::Class(class(&[('\x00', '\x00')])));
-        assert_eq!(p(r"[\n]"), Expr::Class(class(&[('\n', '\n')])));
-        assert_eq!(p("[\n]"), Expr::Class(class(&[('\n', '\n')])));
-
-        assert_eq!(pb(r"(?-u)[a]"), Expr::ClassBytes(bclass(&[(b'a', b'a')])));
-        assert_eq!(pb(r"(?-u)[\x00]"), Expr::ClassBytes(bclass(&[(0, 0)])));
-        assert_eq!(pb(r"(?-u)[\xFF]"),
-                   Expr::ClassBytes(bclass(&[(0xFF, 0xFF)])));
-        assert_eq!(pb("(?-u)[\n]"),
-                   Expr::ClassBytes(bclass(&[(b'\n', b'\n')])));
-        assert_eq!(pb(r"(?-u)[\n]"),
-                   Expr::ClassBytes(bclass(&[(b'\n', b'\n')])));
-    }
-
-    #[test]
-    fn class_singleton_negate() {
-        assert_eq!(p(r"[^a]"), Expr::Class(class(&[
-            ('\x00', '\x60'), ('\x62', '\u{10FFFF}'),
-        ])));
-        assert_eq!(p(r"[^\x00]"), Expr::Class(class(&[
-            ('\x01', '\u{10FFFF}'),
-        ])));
-        assert_eq!(p(r"[^\n]"), Expr::Class(class(&[
-            ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
-        ])));
-        assert_eq!(p("[^\n]"), Expr::Class(class(&[
-            ('\x00', '\x09'), ('\x0b', '\u{10FFFF}'),
-        ])));
-
-        assert_eq!(pb(r"(?-u)[^a]"), Expr::ClassBytes(bclass(&[
-            (0x00, 0x60), (0x62, 0xFF),
-        ])));
-        assert_eq!(pb(r"(?-u)[^\x00]"), Expr::ClassBytes(bclass(&[
-            (0x01, 0xFF),
-        ])));
-        assert_eq!(pb(r"(?-u)[^\n]"), Expr::ClassBytes(bclass(&[
-            (0x00, 0x09), (0x0B, 0xFF),
-        ])));
-        assert_eq!(pb("(?-u)[^\n]"), Expr::ClassBytes(bclass(&[
-            (0x00, 0x09), (0x0B, 0xFF),
-        ])));
-    }
-
-    #[test]
-    fn class_singleton_class() {
-        assert_eq!(p(r"[\d]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[\p{Yi}]"), Expr::Class(class(YI)));
-
-        let bytes = class(PERLD).to_byte_class();
-        assert_eq!(pb(r"(?-u)[\d]"), Expr::ClassBytes(bytes));
-    }
-
-    #[test]
-    fn class_singleton_class_negate() {
-        assert_eq!(p(r"[^\d]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[^\w]"), Expr::Class(class(PERLW).negate()));
-        assert_eq!(p(r"[^\s]"), Expr::Class(class(PERLS).negate()));
-
-        let bytes = asciid_bytes().negate();
-        assert_eq!(pb(r"(?-u)[^\d]"), Expr::ClassBytes(bytes));
-        let bytes = asciiw_bytes().negate();
-        assert_eq!(pb(r"(?-u)[^\w]"), Expr::ClassBytes(bytes));
-        let bytes = asciis_bytes().negate();
-        assert_eq!(pb(r"(?-u)[^\s]"), Expr::ClassBytes(bytes));
-    }
-
-    #[test]
-    fn class_singleton_class_negate_negate() {
-        assert_eq!(p(r"[^\D]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[^\W]"), Expr::Class(class(PERLW)));
-        assert_eq!(p(r"[^\S]"), Expr::Class(class(PERLS)));
-
-        assert_eq!(pb(r"(?-u)[^\D]"), Expr::ClassBytes(asciid_bytes()));
-        assert_eq!(pb(r"(?-u)[^\W]"), Expr::ClassBytes(asciiw_bytes()));
-        assert_eq!(pb(r"(?-u)[^\S]"), Expr::ClassBytes(asciis_bytes()));
-    }
-
-    #[test]
-    fn class_singleton_class_casei() {
-        assert_eq!(p(r"(?i)[\d]"), Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(p(r"(?i)[\p{Yi}]"), Expr::Class(class(YI).case_fold()));
-
-        assert_eq!(pb(r"(?i-u)[\d]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold()));
+    /// When enabled, the parser will permit the construction of a regular
+    /// expression that may match invalid UTF-8.
+    ///
+    /// When disabled (the default), the parser is guaranteed to produce
+    /// an expression that will only ever match valid UTF-8 (otherwise, the
+    /// parser will return an error).
+    ///
+    /// Note that currently, even when invalid UTF-8 is banned, the parser
+    /// will permit a negated ASCII word boundary (i.e., `(?-u:\B)`) even
+    /// though it can actually match at invalid UTF-8 boundaries. This bug
+    /// will be fixed on the next semver release.
+    pub fn allow_invalid_utf8(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.allow_invalid_utf8(yes);
+        self
     }
 
-    #[test]
-    fn class_singleton_class_negate_casei() {
-        assert_eq!(p(r"(?i)[^\d]"),
-                   Expr::Class(class(PERLD).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^\w]"),
-                   Expr::Class(class(PERLW).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^\s]"),
-                   Expr::Class(class(PERLS).case_fold().negate()));
-
-        let bytes = asciid_bytes().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)[^\d]"), Expr::ClassBytes(bytes));
-        let bytes = asciiw_bytes().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)[^\w]"), Expr::ClassBytes(bytes));
-        let bytes = asciis_bytes().case_fold().negate();
-        assert_eq!(pb(r"(?i-u)[^\s]"), Expr::ClassBytes(bytes));
-    }
-
-    #[test]
-    fn class_singleton_class_negate_negate_casei() {
-        assert_eq!(p(r"(?i)[^\D]"), Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(p(r"(?i)[^\W]"), Expr::Class(class(PERLW).case_fold()));
-        assert_eq!(p(r"(?i)[^\S]"), Expr::Class(class(PERLS).case_fold()));
-
-        assert_eq!(pb(r"(?i-u)[^\D]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold()));
-        assert_eq!(pb(r"(?i-u)[^\W]"),
-                   Expr::ClassBytes(asciiw_bytes().case_fold()));
-        assert_eq!(pb(r"(?i-u)[^\S]"),
-                   Expr::ClassBytes(asciis_bytes().case_fold()));
-    }
-
-    #[test]
-    fn class_multiple_class() {
-        assert_eq!(p(r"[\d\p{Yi}]"), Expr::Class(classes(&[
-            PERLD, YI,
-        ])));
-    }
-
-    #[test]
-    fn class_multiple_class_negate() {
-        assert_eq!(p(r"[^\d\p{Yi}]"), Expr::Class(classes(&[
-            PERLD, YI,
-        ]).negate()));
-    }
-
-    #[test]
-    fn class_multiple_class_negate_negate() {
-        let nperlw = class(PERLW).negate();
-        let nyi = class(YI).negate();
-        let cls = CharClass::empty().merge(nperlw).merge(nyi);
-        assert_eq!(p(r"[^\W\P{Yi}]"), Expr::Class(cls.negate()));
-    }
-
-    #[test]
-    fn class_multiple_class_casei() {
-        assert_eq!(p(r"(?i)[\d\p{Yi}]"), Expr::Class(classes(&[
-            PERLD, YI,
-        ]).case_fold()));
-    }
-
-    #[test]
-    fn class_multiple_class_negate_casei() {
-        assert_eq!(p(r"(?i)[^\d\p{Yi}]"), Expr::Class(classes(&[
-            PERLD, YI,
-        ]).case_fold().negate()));
-    }
-
-    #[test]
-    fn class_multiple_class_negate_negate_casei() {
-        let nperlw = class(PERLW).negate();
-        let nyi = class(YI).negate();
-        let class = CharClass::empty().merge(nperlw).merge(nyi);
-        assert_eq!(p(r"(?i)[^\W\P{Yi}]"),
-                   Expr::Class(class.case_fold().negate()));
-    }
-
-    #[test]
-    fn class_class_hypen() {
-        assert_eq!(p(r"[\p{Yi}-]"), Expr::Class(classes(&[
-            &[('-', '-')], YI,
-        ])));
-        assert_eq!(p(r"[\p{Yi}-a]"), Expr::Class(classes(&[
-            &[('-', '-')], &[('a', 'a')], YI,
-        ])));
-    }
-
-    #[test]
-    fn class_brackets() {
-        assert_eq!(p(r"[]]"), Expr::Class(class(&[(']', ']')])));
-        assert_eq!(p(r"[]\[]"), Expr::Class(class(&[('[', '['), (']', ']')])));
-        assert_eq!(p(r"[\[]]"), Expr::Concat(vec![
-            Expr::Class(class(&[('[', '[')])),
-            lit(']'),
-        ]));
-    }
-
-    #[test]
-    fn class_brackets_hypen() {
-        assert_eq!(p("[]-]"), Expr::Class(class(&[('-', '-'), (']', ']')])));
-        assert_eq!(p("[-]]"), Expr::Concat(vec![
-            Expr::Class(class(&[('-', '-')])),
-            lit(']'),
-        ]));
-    }
-
-    #[test]
-    fn class_nested_class_union() {
-        assert_eq!(p(r"[c[a-b]]"), Expr::Class(class(&[('a', 'c')])));
-        assert_eq!(p(r"[[a-b]]"), Expr::Class(class(&[('a', 'b')])));
-        assert_eq!(p(r"[[c][a-b]]"), Expr::Class(class(&[('a', 'c')])));
-
-        assert_eq!(pb(r"(?-u)[c[a-b]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'c')])));
-        assert_eq!(pb(r"(?-u)[[a-b]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'b')])));
-        assert_eq!(pb(r"(?-u)[[c][a-b]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'c')])));
-    }
-
-    #[test]
-    fn class_nested_class_union_casei() {
-        assert_eq!(p(r"(?i)[c[a-b]]"),
-                   Expr::Class(class(&[('a', 'c')]).case_fold()));
-        assert_eq!(p(r"(?i)[[a-b]]"),
-                   Expr::Class(class(&[('a', 'b')]).case_fold()));
-        assert_eq!(p(r"(?i)[[c][a-b]]"),
-                   Expr::Class(class(&[('a', 'c')]).case_fold()));
-
-        assert_eq!(pb(r"(?i-u)[[\d]]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold()));
+    /// Enable verbose mode in the regular expression.
+    ///
+    /// When enabled, verbose mode permits insigificant whitespace in many
+    /// places in the regular expression, as well as comments. Comments are
+    /// started using `#` and continue until the end of the line.
+    ///
+    /// By default, this is disabled. It may be selectively enabled in the
+    /// regular expression by using the `x` flag regardless of this setting.
+    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.ast.ignore_whitespace(yes);
+        self
     }
 
-    #[test]
-    fn class_nested_class_negate() {
-        assert_eq!(p(r"[^[\d]]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[[^\d]]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[^[^\d]]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[^[\w]]"), Expr::Class(class(PERLW).negate()));
-        assert_eq!(p(r"[[^\w]]"), Expr::Class(class(PERLW).negate()));
-        assert_eq!(p(r"[^[^\w]]"), Expr::Class(class(PERLW)));
-        assert_eq!(p(r"[a-b[^c]]"),
-                   Expr::Class(class(&[('\u{0}', 'b'), ('d', '\u{10FFFF}')])));
-
-        assert_eq!(pb(r"(?-u)[^[\d]]"),
-                   Expr::ClassBytes(asciid_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[[^\d]]"),
-                   Expr::ClassBytes(asciid_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[^[^\d]]"),
-                   Expr::ClassBytes(asciid_bytes()));
-        assert_eq!(pb(r"(?-u)[^[\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[[^\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[^[^\w]]"),
-                   Expr::ClassBytes(asciiw_bytes()));
-        assert_eq!(pb(r"(?-u)[a-b[^c]]"),
-                   Expr::ClassBytes(bclass(&[(b'\x00', b'b'), (b'd', b'\xFF')])))
-    }
-
-    #[test]
-    fn class_nested_class_negate_casei() {
-        assert_eq!(p(r"(?i)[^[\d]]"),
-                   Expr::Class(class(PERLD).case_fold().negate()));
-        assert_eq!(p(r"(?i)[[^\d]]"),
-                   Expr::Class(class(PERLD).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^[^\d]]"),
-                   Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(p(r"(?i)[^[\w]]"),
-                   Expr::Class(class(PERLW).case_fold().negate()));
-        assert_eq!(p(r"(?i)[[^\w]]"),
-                   Expr::Class(class(PERLW).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^[^\w]]"),
-                   Expr::Class(class(PERLW).case_fold()));
-        let mut cls = CharClass::empty().negate();
-        cls.remove('c');
-        cls.remove('C');
-        assert_eq!(p(r"(?i)[a-b[^c]]"), Expr::Class(cls));
-
-        assert_eq!(pb(r"(?i-u)[^[\d]]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[[^\d]]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[^[^\d]]"),
-                   Expr::ClassBytes(asciid_bytes().case_fold()));
-        assert_eq!(pb(r"(?i-u)[^[\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[[^\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[^[^\w]]"),
-                   Expr::ClassBytes(asciiw_bytes().case_fold()));
-        let mut bytes = ByteClass::new(vec![]).negate();
-        bytes.remove(b'c');
-        bytes.remove(b'C');
-        assert_eq!(pb(r"(?i-u)[a-b[^c]]"), Expr::ClassBytes(bytes));
-    }
-
-    #[test]
-    fn class_nested_class_brackets_hyphen() {
-        // This is confusing, but `]` is allowed if first character within a class
-        // It parses as a nested class with the `]` and `-` characters
-        assert_eq!(p(r"[[]-]]"), Expr::Class(class(&[('-', '-'), (']', ']')])));
-        assert_eq!(p(r"[[\[]]"), Expr::Class(class(&[('[', '[')])));
-        assert_eq!(p(r"[[\]]]"), Expr::Class(class(&[(']', ']')])));
-    }
-
-    #[test]
-    fn class_nested_class_deep_nesting() {
-        // Makes sure that implementation can handle deep nesting.
-        // With recursive parsing, this regex would blow the stack size.
-        use std::iter::repeat;
-        let nesting = 10_000;
-        let open: String = repeat("[").take(nesting).collect();
-        let close: String = repeat("]").take(nesting).collect();
-        let s  = format!("{}a{}", open, close);
-        assert_eq!(p(&s), Expr::Class(class(&[('a', 'a')])));
-    }
-
-    #[test]
-    fn class_intersection_ranges() {
-        assert_eq!(p(r"[abc&&b-c]"), Expr::Class(class(&[('b', 'c')])));
-        assert_eq!(p(r"[abc&&[b-c]]"), Expr::Class(class(&[('b', 'c')])));
-        assert_eq!(p(r"[[abc]&&[b-c]]"), Expr::Class(class(&[('b', 'c')])));
-        assert_eq!(p(r"[a-z&&b-y&&c-x]"), Expr::Class(class(&[('c', 'x')])));
-        assert_eq!(p(r"[c-da-b&&a-d]"), Expr::Class(class(&[('a', 'd')])));
-        assert_eq!(p(r"[a-d&&c-da-b]"), Expr::Class(class(&[('a', 'd')])));
-
-        assert_eq!(pb(r"(?-u)[abc&&b-c]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')])));
-        assert_eq!(pb(r"(?-u)[abc&&[b-c]]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')])));
-        assert_eq!(pb(r"(?-u)[[abc]&&[b-c]]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')])));
-        assert_eq!(pb(r"(?-u)[a-z&&b-y&&c-x]"),
-                   Expr::ClassBytes(bclass(&[(b'c', b'x')])));
-        assert_eq!(pb(r"(?-u)[c-da-b&&a-d]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'd')])));
+    /// Enable or disable the case insensitive flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `i` flag.
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.case_insensitive(yes);
+        self
     }
 
-    #[test]
-    fn class_intersection_ranges_casei() {
-        assert_eq!(p(r"(?i)[abc&&b-c]"),
-                   Expr::Class(class(&[('b', 'c')]).case_fold()));
-        assert_eq!(p(r"(?i)[abc&&[b-c]]"),
-                   Expr::Class(class(&[('b', 'c')]).case_fold()));
-        assert_eq!(p(r"(?i)[[abc]&&[b-c]]"),
-                   Expr::Class(class(&[('b', 'c')]).case_fold()));
-        assert_eq!(p(r"(?i)[a-z&&b-y&&c-x]"),
-                   Expr::Class(class(&[('c', 'x')]).case_fold()));
-        assert_eq!(p(r"(?i)[c-da-b&&a-d]"),
-                   Expr::Class(class(&[('a', 'd')]).case_fold()));
-
-        assert_eq!(pb(r"(?i-u)[abc&&b-c]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[abc&&[b-c]]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[[abc]&&[b-c]]"),
-                   Expr::ClassBytes(bclass(&[(b'b', b'c')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[a-z&&b-y&&c-x]"),
-                   Expr::ClassBytes(bclass(&[(b'c', b'x')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[c-da-b&&a-d]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'd')]).case_fold()));
-    }
-
-    #[test]
-    fn class_intersection_classes() {
-        assert_eq!(p(r"[\w&&\d]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[\w&&[[:ascii:]]]"), Expr::Class(asciiw()));
-        assert_eq!(p(r"[\x00-\xFF&&\pZ]"),
-                   Expr::Class(class(&[('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}')])));
-
-        assert_eq!(pb(r"(?-u)[\w&&\d]"), Expr::ClassBytes(asciid_bytes()));
-        assert_eq!(pb(r"(?-u)[\w&&[[:ascii:]]]"), Expr::ClassBytes(asciiw_bytes()));
-    }
-
-    #[test]
-    fn class_intersection_classes_casei() {
-        assert_eq!(p(r"(?i)[\w&&\d]"), Expr::Class(class(PERLD).case_fold()));
-        assert_eq!(p(r"(?i)[\w&&[[:ascii:]]]"), Expr::Class(asciiw().case_fold()));
-        assert_eq!(p(r"(?i)[\x00-\xFF&&\pZ]"),
-                   Expr::Class(class(&[('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}')])));
-
-        assert_eq!(pb(r"(?i-u)[\w&&\d]"), Expr::ClassBytes(asciid_bytes().case_fold()));
-        assert_eq!(pb(r"(?i-u)[\w&&[[:ascii:]]]"), Expr::ClassBytes(asciiw_bytes().case_fold()));
-    }
-
-    #[test]
-    fn class_intersection_negate() {
-        assert_eq!(p(r"[^\w&&\d]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[^[\w&&\d]]"), Expr::Class(class(PERLD).negate()));
-        assert_eq!(p(r"[^[^\w&&\d]]"), Expr::Class(class(PERLD)));
-        assert_eq!(p(r"[\w&&[^\d]]"),
-                   Expr::Class(class(PERLW).intersection(&class(PERLD).negate())));
-        assert_eq!(p(r"[[^\w]&&[^\d]]"),
-                   Expr::Class(class(PERLW).negate()));
-
-        assert_eq!(pb(r"(?-u)[^\w&&\d]"),
-                   Expr::ClassBytes(asciid_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[^[\w&&\d]]"),
-                   Expr::ClassBytes(asciid_bytes().negate()));
-        assert_eq!(pb(r"(?-u)[^[^\w&&\d]]"),
-                   Expr::ClassBytes(asciid_bytes()));
-        assert_eq!(pb(r"(?-u)[\w&&[^\d]]"),
-                   Expr::ClassBytes(asciiw().intersection(&asciid().negate()).to_byte_class()));
-        assert_eq!(pb(r"(?-u)[[^\w]&&[^\d]]"),
-                   Expr::ClassBytes(asciiw_bytes().negate()));
-    }
-
-    #[test]
-    fn class_intersection_negate_casei() {
-        assert_eq!(p(r"(?i)[^\w&&a-z]"),
-                   Expr::Class(class(&[('a', 'z')]).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^[\w&&a-z]]"),
-                   Expr::Class(class(&[('a', 'z')]).case_fold().negate()));
-        assert_eq!(p(r"(?i)[^[^\w&&a-z]]"),
-                   Expr::Class(class(&[('a', 'z')]).case_fold()));
-        assert_eq!(p(r"(?i)[\w&&[^a-z]]"),
-                   Expr::Class(
-                       class(PERLW).intersection(&class(&[('a', 'z')])
-                       .case_fold().negate())));
-        assert_eq!(p(r"(?i)[[^\w]&&[^a-z]]"),
-                   Expr::Class(class(PERLW).negate()));
-
-        assert_eq!(pb(r"(?i-u)[^\w&&a-z]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'z')]).case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[^[\w&&a-z]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'z')]).case_fold().negate()));
-        assert_eq!(pb(r"(?i-u)[^[^\w&&a-z]]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'z')]).case_fold()));
-        assert_eq!(pb(r"(?i-u)[\w&&[^a-z]]"),
-                   Expr::ClassBytes(bclass(&[(b'0', b'9'), (b'_', b'_')])));
-        assert_eq!(pb(r"(?i-u)[[^\w]&&[^a-z]]"),
-                   Expr::ClassBytes(asciiw_bytes().negate()));
-    }
-
-    #[test]
-    fn class_intersection_caret() {
-        // In `[a^]`, `^` does not need to be escaped, so it makes sense that
-        // `^` is also allowed to be unescaped after `&&`.
-        assert_eq!(p(r"[\^&&^]"), Expr::Class(class(&[('^', '^')])));
-    }
-
-    #[test]
-    fn class_intersection_brackets_hyphen() {
-        // `]` needs to be escaped after `&&` because it is not at the start of the class.
-        assert_eq!(p(r"[]&&\]]"), Expr::Class(class(&[(']', ']')])));
-
-        assert_eq!(p(r"[-&&-]"), Expr::Class(class(&[('-', '-')])));
-    }
-
-    #[test]
-    fn class_intersection_ampersand() {
-        // Unescaped `&` after `&&`
-        assert_eq!(p(r"[\&&&&]"), Expr::Class(class(&[('&', '&')])));
-        assert_eq!(p(r"[\&&&\&]"), Expr::Class(class(&[('&', '&')])));
+    /// Enable or disable the multi-line matching flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `m` flag.
+    pub fn multi_line(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.multi_line(yes);
+        self
     }
 
-    #[test]
-    fn class_intersection_precedence() {
-        assert_eq!(p(r"[a-w&&[^c-g]z]"), Expr::Class(class(&[('a', 'b'), ('h', 'w')])));
-    }
-
-    #[test]
-    fn class_special_escaped_set_chars() {
-        // These tests ensure that some special characters require escaping
-        // for use in character classes. The intention is to use these
-        // characters to implement sets as described in UTS#18 RL1.3. Once
-        // that's done, these tests should be removed and replaced with others.
-        assert_eq!(p(r"[\[]"), Expr::Class(class(&[('[', '[')])));
-        assert_eq!(p(r"[&]"), Expr::Class(class(&[('&', '&')])));
-        assert_eq!(p(r"[\&]"), Expr::Class(class(&[('&', '&')])));
-        assert_eq!(p(r"[\&\&]"), Expr::Class(class(&[('&', '&')])));
-        assert_eq!(p(r"[\x00-&]"), Expr::Class(class(&[('\u{0}', '&')])));
-        assert_eq!(p(r"[&-\xFF]"), Expr::Class(class(&[('&', '\u{FF}')])));
-
-        assert_eq!(p(r"[~]"), Expr::Class(class(&[('~', '~')])));
-        assert_eq!(p(r"[\~]"), Expr::Class(class(&[('~', '~')])));
-        assert_eq!(p(r"[\~\~]"), Expr::Class(class(&[('~', '~')])));
-        assert_eq!(p(r"[\x00-~]"), Expr::Class(class(&[('\u{0}', '~')])));
-        assert_eq!(p(r"[~-\xFF]"), Expr::Class(class(&[('~', '\u{FF}')])));
-
-        assert_eq!(p(r"[+-\-]"), Expr::Class(class(&[('+', '-')])));
-        assert_eq!(p(r"[a-a\--\xFF]"), Expr::Class(class(&[
-            ('-', '\u{FF}'),
-        ])));
-    }
-
-    #[test]
-    fn class_overlapping() {
-        assert_eq!(p("[a-fd-h]"), Expr::Class(class(&[('a', 'h')])));
-        assert_eq!(p("[a-fg-m]"), Expr::Class(class(&[('a', 'm')])));
-
-        assert_eq!(pb("(?-u)[a-fd-h]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'h')])));
-        assert_eq!(pb("(?-u)[a-fg-m]"),
-                   Expr::ClassBytes(bclass(&[(b'a', b'm')])));
-    }
-
-    #[test]
-    fn ascii_classes() {
-        assert_eq!(p("[:blank:]"), Expr::Class(class(&[
-            (':', ':'), ('a', 'b'), ('k', 'l'), ('n', 'n'),
-        ])));
-        assert_eq!(p("[[:upper:]]"), Expr::Class(class(UPPER)));
-
-        assert_eq!(pb("(?-u)[[:upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class()));
-    }
-
-    #[test]
-    fn ascii_classes_not() {
-        assert_eq!(p("[:abc:]"),
-                   Expr::Class(class(&[(':', ':'), ('a', 'c')])));
-        assert_eq!(pb("(?-u)[:abc:]"),
-                   Expr::ClassBytes(bclass(&[(b':', b':'), (b'a', b'c')])));
-    }
-
-    #[test]
-    fn ascii_classes_multiple() {
-        assert_eq!(p("[[:lower:][:upper:]]"),
-                   Expr::Class(classes(&[UPPER, LOWER])));
-
-        assert_eq!(pb("(?-u)[[:lower:][:upper:]]"),
-                   Expr::ClassBytes(classes(&[UPPER, LOWER]).to_byte_class()));
-    }
-
-    #[test]
-    fn ascii_classes_negate() {
-        assert_eq!(p("[[:^upper:]]"), Expr::Class(class(UPPER).negate()));
-        assert_eq!(p("[^[:^upper:]]"), Expr::Class(class(UPPER)));
-
-        assert_eq!(pb("(?-u)[[:^upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class().negate()));
-        assert_eq!(pb("(?-u)[^[:^upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class()));
-    }
-
-    #[test]
-    fn ascii_classes_negate_multiple() {
-        let (nlower, nword) = (class(LOWER).negate(), class(WORD).negate());
-        let cls = CharClass::empty().merge(nlower).merge(nword);
-        assert_eq!(p("[[:^lower:][:^word:]]"), Expr::Class(cls.clone()));
-        assert_eq!(p("[^[:^lower:][:^word:]]"), Expr::Class(cls.negate()));
-    }
-
-    #[test]
-    fn ascii_classes_case_fold() {
-        assert_eq!(p("(?i)[[:upper:]]"),
-                   Expr::Class(class(UPPER).case_fold()));
-
-        assert_eq!(pb("(?i-u)[[:upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class().case_fold()));
-    }
-
-    #[test]
-    fn ascii_classes_negate_case_fold() {
-        assert_eq!(p("(?i)[[:^upper:]]"),
-                   Expr::Class(class(UPPER).case_fold().negate()));
-        assert_eq!(p("(?i)[^[:^upper:]]"),
-                   Expr::Class(class(UPPER).case_fold()));
-
-        assert_eq!(pb("(?i-u)[[:^upper:]]"),
-                   Expr::ClassBytes(
-                       class(UPPER).to_byte_class().case_fold().negate()));
-        assert_eq!(pb("(?i-u)[^[:^upper:]]"),
-                   Expr::ClassBytes(class(UPPER).to_byte_class().case_fold()));
+    /// Enable or disable the "dot matches any character" flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `s` flag.
+    pub fn dot_matches_new_line(
+        &mut self,
+        yes: bool,
+    ) -> &mut ParserBuilder {
+        self.hir.dot_matches_new_line(yes);
+        self
     }
 
-    #[test]
-    fn single_class_negate_case_fold() {
-        assert_eq!(p("(?i)[^x]"),
-                   Expr::Class(class(&[('x', 'x')]).case_fold().negate()));
-
-        assert_eq!(pb("(?i-u)[^x]"),
-                   Expr::ClassBytes(
-                       class(&[('x', 'x')])
-                       .to_byte_class().case_fold().negate()));
-    }
-
-    #[test]
-    fn ignore_space_empty() {
-        assert_eq!(p("(?x) "), Expr::Empty);
-    }
-
-    #[test]
-    fn ignore_space_literal() {
-        assert_eq!(p("(?x) a b c"), Expr::Concat(vec![
-            lit('a'), lit('b'), lit('c'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_literal_off() {
-        assert_eq!(p("(?x) a b c(?-x) a"), Expr::Concat(vec![
-            lit('a'), lit('b'), lit('c'), lit(' '), lit('a'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_class() {
-        assert_eq!(p("(?x)[a
-        - z
-]"), Expr::Class(class(&[('a', 'z')])));
-        assert_eq!(p("(?x)[  ^   a
-        - z
-]"), Expr::Class(class(&[('a', 'z')]).negate()));
-    }
-
-    #[test]
-    fn ignore_space_escape_octal() {
-        assert_eq!(p(r"(?x)\12 3"), Expr::Concat(vec![
-            lit('\n'),
-            lit('3'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_escape_hex() {
-        assert_eq!(p(r"(?x)\x { 53 }"), lit('S'));
-        assert_eq!(p(r"(?x)\x # comment
-{ # comment
-    53 # comment
-} # comment"), lit('S'));
-    }
-
-    #[test]
-    fn ignore_space_escape_hex2() {
-        assert_eq!(p(r"(?x)\x 53"), lit('S'));
-        assert_eq!(p(r"(?x)\x # comment
-        53 # comment"), lit('S'));
-    }
-
-    #[test]
-    fn ignore_space_escape_unicode_name() {
-        assert_eq!(p(r"(?x)\p # comment
-{ # comment
-    Yi # comment
-} # comment"), Expr::Class(class(YI)));
-    }
-
-    #[test]
-    fn ignore_space_repeat_counted() {
-        assert_eq!(p("(?x)a # comment
-{ # comment
-    5 # comment
-    , # comment
-    10 # comment
-}"), Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::Range { min: 5, max: Some(10) },
-            greedy: true,
-        });
-    }
-
-    #[test]
-    fn ignore_space_comments() {
-        assert_eq!(p(r"(?x)(?P<foo>
-    a # comment 1
-)(?P<bar>
-    z # comment 2
-)"), Expr::Concat(vec![
-        Expr::Group {
-            e: Box::new(lit('a')),
-            i: Some(1),
-            name: Some("foo".into()),
-        },
-        Expr::Group {
-            e: Box::new(lit('z')),
-            i: Some(2),
-            name: Some("bar".into()),
-        },
-    ]));
-    }
-
-    #[test]
-    fn ignore_space_comments_re_enable() {
-        assert_eq!(p(r"(?x)a # hi
-(?-x:#) # sweet"), Expr::Concat(vec![
-            lit('a'),
-            Expr::Group {
-                e: Box::new(lit('#')),
-                i: None,
-                name: None,
-            },
-        ]));
+    /// Enable or disable the "swap greed" flag by default.
+    ///
+    /// By default this is disabled. It may alternatively be selectively
+    /// enabled in the regular expression itself via the `U` flag.
+    pub fn swap_greed(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.swap_greed(yes);
+        self
     }
 
-    #[test]
-    fn ignore_space_escape_punctuation() {
-        assert_eq!(p(r"(?x)\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#"), c(&[
-            lit('\\'), lit('.'), lit('+'), lit('*'), lit('?'),
-            lit('('), lit(')'), lit('|'), lit('['), lit(']'),
-            lit('{'), lit('}'), lit('^'), lit('$'), lit('#'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_escape_hash() {
-        assert_eq!(p(r"(?x)a\# # hi there"), Expr::Concat(vec![
-            lit('a'),
-            lit('#'),
-        ]));
-    }
-
-    #[test]
-    fn ignore_space_escape_space() {
-        assert_eq!(p(r"(?x)a\  # hi there"), Expr::Concat(vec![
-            lit('a'),
-            lit(' '),
-        ]));
-    }
-
-    // Test every single possible error case.
-
-    macro_rules! test_err {
-        ($re:expr, $pos:expr, $kind:expr) => {
-            test_err!($re, $pos, $kind, Flags::default());
-        };
-        ($re:expr, $pos:expr, $kind:expr, $flags:expr) => {{
-            let err = Parser::parse($re, $flags).unwrap_err();
-            assert_eq!($pos, err.pos);
-            assert_eq!($kind, err.kind);
-            assert!($re.contains(&err.surround));
-        }}
-    }
-
-    #[test]
-    fn invalid_utf8_not_allowed() {
-        // let flags = Flags { unicode: false, .. Flags::default() };
-        test_err!(r"(?-u)\xFF", 9, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u).", 5, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u)(?s).", 9, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u)[\x00-\x80]", 15, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u)\222", 9, ErrorKind::InvalidUtf8);
-        test_err!(r"(?-u)\x{0080}", 13, ErrorKind::InvalidUtf8);
+    /// Enable or disable the Unicode flag (`u`) by default.
+    ///
+    /// By default this is **enabled**. It may alternatively be selectively
+    /// disabled in the regular expression itself via the `u` flag.
+    ///
+    /// Note that unless `allow_invalid_utf8` is enabled (it's disabled by
+    /// default), a regular expression will fail to parse if Unicode mode is
+    /// disabled and a sub-expression could possibly match invalid UTF-8.
+    pub fn unicode(&mut self, yes: bool) -> &mut ParserBuilder {
+        self.hir.unicode(yes);
+        self
     }
-
-    #[test]
-    fn unicode_char_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!("☃(?-u:☃)", 7, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn unicode_class_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"☃(?-u:\pL)", 9, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn unicode_class_literal_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)[☃]", 6, ErrorKind::UnicodeNotAllowed, flags);
-        test_err!(r"(?-u)[☃-☃]", 6, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn unicode_hex_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)\x{FFFF}", 13, ErrorKind::UnicodeNotAllowed, flags);
-        test_err!(r"(?-u)\x{100}", 12, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn unicode_octal_not_allowed() {
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)\400", 9, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn error_repeat_no_expr_simple() {
-        test_err!("(*", 1, ErrorKind::RepeaterExpectsExpr);
-    }
-
-    #[test]
-    fn error_repeat_no_expr_counted() {
-        test_err!("({5}", 1, ErrorKind::RepeaterExpectsExpr);
-    }
-
-    #[test]
-    fn error_repeat_beginning_counted() {
-        test_err!("{5}", 0, ErrorKind::RepeaterExpectsExpr);
-    }
-
-    #[test]
-    fn error_repeat_illegal_exprs_simple() {
-        test_err!("a**", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrMore,
-            greedy: true,
-        }));
-        test_err!("a|*", 2,
-            ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
-        );
-    }
+}
 
-    #[test]
-    fn error_repeat_illegal_exprs_counted() {
-        test_err!("a*{5}", 2, ErrorKind::RepeaterUnexpectedExpr(Expr::Repeat {
-            e: b(lit('a')),
-            r: Repeater::ZeroOrMore,
-            greedy: true,
-        }));
-        test_err!("a|{5}", 2,
-            ErrorKind::RepeaterUnexpectedExpr(Expr::Alternate(vec![lit('a')]))
-        );
-    }
-
-    #[test]
-    fn error_repeat_empty_number() {
-        test_err!("a{}", 2, ErrorKind::MissingBase10);
-    }
-
-    #[test]
-    fn error_repeat_eof() {
-        test_err!("a{5", 3, ErrorKind::UnclosedRepeat);
-    }
-
-    #[test]
-    fn error_repeat_empty_number_eof() {
-        test_err!("a{xyz", 5, ErrorKind::InvalidBase10("xyz".into()));
-        test_err!("a{12,xyz", 8, ErrorKind::InvalidBase10("xyz".into()));
-    }
-
-    #[test]
-    fn error_repeat_invalid_number() {
-        test_err!("a{9999999999}", 12,
-                  ErrorKind::InvalidBase10("9999999999".into()));
-        test_err!("a{1,9999999999}", 14,
-                  ErrorKind::InvalidBase10("9999999999".into()));
-    }
-
-    #[test]
-    fn error_repeat_invalid_number_extra() {
-        test_err!("a{12x}", 5, ErrorKind::InvalidBase10("12x".into()));
-        test_err!("a{1,12x}", 7, ErrorKind::InvalidBase10("12x".into()));
-    }
-
-    #[test]
-    fn error_repeat_invalid_range() {
-        test_err!("a{2,1}", 5,
-                  ErrorKind::InvalidRepeatRange { min: 2, max: 1 });
-    }
-
-    #[test]
-    fn error_alternate_empty() {
-        test_err!("|a", 0, ErrorKind::EmptyAlternate);
-    }
-
-    #[test]
-    fn error_alternate_empty_with_group() {
-        test_err!("(|a)", 1, ErrorKind::EmptyAlternate);
-    }
+/// A convenience parser for regular expressions.
+///
+/// This parser takes as input a regular expression pattern string (the
+/// "concrete syntax") and returns a high-level intermediate representation
+/// (the HIR) suitable for most types of analysis. In particular, this parser
+/// hides the intermediate state of producing an AST (the "abstract syntax").
+/// The AST is itself far more complex than the HIR, so this parser serves as a
+/// convenience for never having to deal with it at all.
+///
+/// If callers have more fine grained use cases that need an AST, then please
+/// see the [`ast::parse`](ast/parse/index.html) module.
+///
+/// A `Parser` can be configured in more detail via a
+/// [`ParserBuilder`](struct.ParserBuilder.html).
+#[derive(Clone, Debug)]
+pub struct Parser {
+    ast: ast::parse::Parser,
+    hir: hir::translate::Translator,
+}
 
-    #[test]
-    fn error_alternate_empty_with_alternate() {
-        test_err!("a||", 2, ErrorKind::EmptyAlternate);
-    }
-
-    #[test]
-    fn error_close_paren_unopened_empty() {
-        test_err!(")", 0, ErrorKind::UnopenedParen);
-    }
-
-    #[test]
-    fn error_close_paren_unopened() {
-        test_err!("ab)", 2, ErrorKind::UnopenedParen);
-    }
-
-    #[test]
-    fn error_close_paren_unopened_with_alt() {
-        test_err!("a|b)", 3, ErrorKind::UnopenedParen);
-    }
-
-    #[test]
-    fn error_close_paren_unclosed_with_alt() {
-        test_err!("(a|b", 0, ErrorKind::UnclosedParen);
-    }
-
-    #[test]
-    fn error_close_paren_empty_alt() {
-        test_err!("(a|)", 3, ErrorKind::EmptyAlternate);
-    }
-
-    #[test]
-    fn error_close_paren_empty_group() {
-        test_err!("()", 1, ErrorKind::EmptyGroup);
-    }
-
-    #[test]
-    fn error_close_paren_empty_group_with_name() {
-        test_err!("(?P<foo>)", 8, ErrorKind::EmptyGroup);
-    }
-
-    #[test]
-    fn error_finish_concat_unclosed() {
-        test_err!("ab(xy", 2, ErrorKind::UnclosedParen);
-    }
-
-    #[test]
-    fn error_finish_concat_empty_alt() {
-        test_err!("a|", 2, ErrorKind::EmptyAlternate);
-    }
-
-    #[test]
-    fn error_group_name_invalid() {
-        test_err!("(?P<a#>x)", 6, ErrorKind::InvalidCaptureName("a#".into()));
-    }
-
-    #[test]
-    fn error_group_name_invalid_leading() {
-        test_err!("(?P<1a>a)", 6, ErrorKind::InvalidCaptureName("1a".into()));
+impl Parser {
+    /// Create a new parser with a default configuration.
+    ///
+    /// The parser can be run with `parse` method. The parse method returns
+    /// a high level intermediate representation of the given regular
+    /// expression.
+    ///
+    /// To set configuration options on the parser, use
+    /// [`ParserBuilder`](struct.ParserBuilder.html).
+    pub fn new() -> Parser {
+        ParserBuilder::new().build()
     }
 
-    #[test]
-    fn error_group_name_unexpected_eof() {
-        test_err!("(?P<a", 5, ErrorKind::UnclosedCaptureName("a".into()));
-    }
-
-    #[test]
-    fn error_group_name_empty() {
-        test_err!("(?P<>a)", 4, ErrorKind::EmptyCaptureName);
-    }
-
-    #[test]
-    fn error_group_opts_unrecognized_flag() {
-        test_err!("(?z:a)", 2, ErrorKind::UnrecognizedFlag('z'));
-    }
-
-    #[test]
-    fn error_group_opts_unexpected_eof() {
-        test_err!("(?i", 3, ErrorKind::UnexpectedFlagEof);
-    }
-
-    #[test]
-    fn error_group_opts_double_negation() {
-        test_err!("(?-i-s:a)", 4, ErrorKind::DoubleFlagNegation);
-    }
-
-    #[test]
-    fn error_group_opts_empty_negation() {
-        test_err!("(?i-:a)", 4, ErrorKind::EmptyFlagNegation);
-    }
-
-    #[test]
-    fn error_group_opts_empty() {
-        test_err!("(?)", 2, ErrorKind::EmptyFlagNegation);
-    }
-
-    #[test]
-    fn error_escape_unexpected_eof() {
-        test_err!(r"\", 1, ErrorKind::UnexpectedEscapeEof);
-    }
-
-    #[test]
-    fn error_escape_unrecognized() {
-        test_err!(r"\m", 1, ErrorKind::UnrecognizedEscape('m'));
-    }
-
-    #[test]
-    fn error_escape_hex2_eof0() {
-        test_err!(r"\x", 2, ErrorKind::UnexpectedTwoDigitHexEof);
-    }
-
-    #[test]
-    fn error_escape_hex2_eof1() {
-        test_err!(r"\xA", 3, ErrorKind::UnexpectedTwoDigitHexEof);
-    }
-
-    #[test]
-    fn error_escape_hex2_invalid() {
-        test_err!(r"\xAG", 4, ErrorKind::InvalidBase16("AG".into()));
-    }
-
-    #[test]
-    fn error_escape_hex_eof0() {
-        test_err!(r"\x{", 3, ErrorKind::InvalidBase16("".into()));
-    }
-
-    #[test]
-    fn error_escape_hex_eof1() {
-        test_err!(r"\x{A", 4, ErrorKind::UnclosedHex);
-    }
-
-    #[test]
-    fn error_escape_hex_invalid() {
-        test_err!(r"\x{AG}", 5, ErrorKind::InvalidBase16("AG".into()));
-    }
-
-    #[test]
-    fn error_escape_hex_invalid_scalar_value_surrogate() {
-        test_err!(r"\x{D800}", 8, ErrorKind::InvalidScalarValue(0xD800));
-    }
-
-    #[test]
-    fn error_escape_hex_invalid_scalar_value_high() {
-        test_err!(r"\x{110000}", 10, ErrorKind::InvalidScalarValue(0x110000));
-    }
-
-    #[test]
-    fn error_escape_hex_invalid_u32() {
-        test_err!(r"\x{9999999999}", 13,
-                  ErrorKind::InvalidBase16("9999999999".into()));
-    }
-
-    #[test]
-    fn error_unicode_unclosed() {
-        test_err!(r"\p{", 3, ErrorKind::UnclosedUnicodeName);
-        test_err!(r"\p{Greek", 8, ErrorKind::UnclosedUnicodeName);
-    }
-
-    #[test]
-    fn error_unicode_no_letter() {
-        test_err!(r"\p", 2, ErrorKind::UnexpectedEscapeEof);
-    }
-
-    #[test]
-    fn error_unicode_unknown_letter() {
-        test_err!(r"\pA", 3, ErrorKind::UnrecognizedUnicodeClass("A".into()));
-    }
-
-    #[test]
-    fn error_unicode_unknown_name() {
-        test_err!(r"\p{Yii}", 7,
-                  ErrorKind::UnrecognizedUnicodeClass("Yii".into()));
-    }
-
-    #[test]
-    fn error_class_eof_empty() {
-        test_err!("[", 1, ErrorKind::UnexpectedClassEof);
-        test_err!("[^", 2, ErrorKind::UnexpectedClassEof);
-    }
-
-    #[test]
-    fn error_class_eof_non_empty() {
-        test_err!("[a", 2, ErrorKind::UnexpectedClassEof);
-        test_err!("[^a", 3, ErrorKind::UnexpectedClassEof);
-    }
-
-    #[test]
-    fn error_class_eof_range() {
-        test_err!("[a-", 3, ErrorKind::UnexpectedClassEof);
-        test_err!("[^a-", 4, ErrorKind::UnexpectedClassEof);
-        test_err!("[---", 4, ErrorKind::UnexpectedClassEof);
-    }
-
-    #[test]
-    fn error_class_invalid_escape() {
-        test_err!(r"[\pA]", 4,
-                  ErrorKind::UnrecognizedUnicodeClass("A".into()));
-    }
-
-    #[test]
-    fn error_class_valid_escape_not_allowed() {
-        test_err!(r"[\A]", 3, ErrorKind::InvalidClassEscape(Expr::StartText));
-    }
-
-    #[test]
-    fn error_class_range_valid_escape_not_allowed() {
-        test_err!(r"[a-\d]", 5,
-                  ErrorKind::InvalidClassEscape(Expr::Class(class(PERLD))));
-        test_err!(r"[a-\A]", 5,
-                  ErrorKind::InvalidClassEscape(Expr::StartText));
-        test_err!(r"[\A-a]", 3,
-                  ErrorKind::InvalidClassEscape(Expr::StartText));
-    }
-
-    #[test]
-    fn error_class_invalid_range() {
-        test_err!("[z-a]", 4, ErrorKind::InvalidClassRange {
-            start: 'z',
-            end: 'a',
-        });
-    }
-
-    #[test]
-    fn error_class_empty_range() {
-        test_err!("[]", 2, ErrorKind::UnexpectedClassEof);
-        test_err!("[^]", 3, ErrorKind::UnexpectedClassEof);
-        test_err!(r"[^\d\D]", 7, ErrorKind::EmptyClass);
-
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)[^\x00-\xFF]", 17, ErrorKind::EmptyClass, flags);
-    }
-
-    #[test]
-    fn error_class_unsupported_char() {
-        // These tests ensure that some unescaped special characters are
-        // rejected in character classes. The intention is to use these
-        // characters to implement sets as described in UTS#18 RL1.3. Once
-        // that's done, these tests should be removed and replaced with others.
-        test_err!("[~~]", 2, ErrorKind::UnsupportedClassChar('~'));
-        test_err!("[+--]", 4, ErrorKind::UnsupportedClassChar('-'));
-        test_err!(r"[a-a--\xFF]", 5, ErrorKind::UnsupportedClassChar('-'));
-        test_err!(r"[a&&~~]", 5, ErrorKind::UnsupportedClassChar('~'));
-        test_err!(r"[a&&--]", 5, ErrorKind::UnsupportedClassChar('-'));
-    }
-
-    #[test]
-    fn error_class_nested_class() {
-        test_err!(r"[[]]", 4, ErrorKind::UnexpectedClassEof);
-        test_err!(r"[[][]]", 6, ErrorKind::UnexpectedClassEof);
-        test_err!(r"[[^\d\D]]", 8, ErrorKind::EmptyClass);
-        test_err!(r"[[]", 3, ErrorKind::UnexpectedClassEof);
-        test_err!(r"[[^]", 4, ErrorKind::UnexpectedClassEof);
-    }
-
-    #[test]
-    fn error_class_intersection() {
-        test_err!(r"[&&]", 4, ErrorKind::EmptyClass);
-        test_err!(r"[a&&]", 5, ErrorKind::EmptyClass);
-        test_err!(r"[&&&&]", 6, ErrorKind::EmptyClass);
-        // `]` after `&&` is not the same as in (`[]]`), because it's also not
-        // allowed unescaped in `[a]]`.
-        test_err!(r"[]&&]]", 5, ErrorKind::EmptyClass);
-
-        let flags = Flags { allow_bytes: true, .. Flags::default() };
-        test_err!(r"(?-u)[a&&\pZ]", 12, ErrorKind::UnicodeNotAllowed, flags);
-    }
-
-    #[test]
-    fn error_duplicate_capture_name() {
-        test_err!("(?P<a>.)(?P<a>.)", 14,
-                  ErrorKind::DuplicateCaptureName("a".into()));
-    }
-
-    #[test]
-    fn error_ignore_space_escape_hex() {
-        test_err!(r"(?x)\x{ 5 3 }", 10, ErrorKind::UnclosedHex);
-    }
-
-    #[test]
-    fn error_ignore_space_escape_hex2() {
-        test_err!(r"(?x)\x 5 3", 9, ErrorKind::InvalidBase16("5 ".into()));
-    }
-
-    #[test]
-    fn error_ignore_space_escape_unicode_name() {
-        test_err!(r"(?x)\p{Y i}", 9, ErrorKind::UnclosedUnicodeName);
+    /// Parse the regular expression into a high level intermediate
+    /// representation.
+    pub fn parse(&mut self, pattern: &str) -> Result<hir::Hir> {
+        let ast = self.ast.parse(pattern)?;
+        let hir = self.hir.translate(pattern, &ast)?;
+        Ok(hir)
     }
 }
--- a/third_party/rust/regex-syntax/src/unicode.rs
+++ b/third_party/rust/regex-syntax/src/unicode.rs
@@ -1,5715 +1,436 @@
-// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
+use std::cmp::Ordering;
+use std::result;
 
-// NOTE: The following code was generated by "scripts/unicode.py", do not edit
-// directly
+use ucd_util::{self, PropertyValues};
 
-#![allow(warnings)]
-pub mod general_category {
-    pub const C_table: &'static [(char, char)] = &[
-        ('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}'), ('\u{ad}', '\u{ad}'),
-        ('\u{378}', '\u{379}'), ('\u{380}', '\u{383}'), ('\u{38b}', '\u{38b}'),
-        ('\u{38d}', '\u{38d}'), ('\u{3a2}', '\u{3a2}'), ('\u{530}', '\u{530}'),
-        ('\u{557}', '\u{558}'), ('\u{560}', '\u{560}'), ('\u{588}', '\u{588}'),
-        ('\u{58b}', '\u{58c}'), ('\u{590}', '\u{590}'), ('\u{5c8}', '\u{5cf}'),
-        ('\u{5eb}', '\u{5ef}'), ('\u{5f5}', '\u{605}'), ('\u{61c}', '\u{61d}'),
-        ('\u{6dd}', '\u{6dd}'), ('\u{70e}', '\u{70f}'), ('\u{74b}', '\u{74c}'),
-        ('\u{7b2}', '\u{7bf}'), ('\u{7fb}', '\u{7ff}'), ('\u{82e}', '\u{82f}'),
-        ('\u{83f}', '\u{83f}'), ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{89f}'),
-        ('\u{8b5}', '\u{8e2}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'),
-        ('\u{991}', '\u{992}'), ('\u{9a9}', '\u{9a9}'), ('\u{9b1}', '\u{9b1}'),
-        ('\u{9b3}', '\u{9b5}'), ('\u{9ba}', '\u{9bb}'), ('\u{9c5}', '\u{9c6}'),
-        ('\u{9c9}', '\u{9ca}'), ('\u{9cf}', '\u{9d6}'), ('\u{9d8}', '\u{9db}'),
-        ('\u{9de}', '\u{9de}'), ('\u{9e4}', '\u{9e5}'), ('\u{9fc}', '\u{a00}'),
-        ('\u{a04}', '\u{a04}'), ('\u{a0b}', '\u{a0e}'), ('\u{a11}', '\u{a12}'),
-        ('\u{a29}', '\u{a29}'), ('\u{a31}', '\u{a31}'), ('\u{a34}', '\u{a34}'),
-        ('\u{a37}', '\u{a37}'), ('\u{a3a}', '\u{a3b}'), ('\u{a3d}', '\u{a3d}'),
-        ('\u{a43}', '\u{a46}'), ('\u{a49}', '\u{a4a}'), ('\u{a4e}', '\u{a50}'),
-        ('\u{a52}', '\u{a58}'), ('\u{a5d}', '\u{a5d}'), ('\u{a5f}', '\u{a65}'),
-        ('\u{a76}', '\u{a80}'), ('\u{a84}', '\u{a84}'), ('\u{a8e}', '\u{a8e}'),
-        ('\u{a92}', '\u{a92}'), ('\u{aa9}', '\u{aa9}'), ('\u{ab1}', '\u{ab1}'),
-        ('\u{ab4}', '\u{ab4}'), ('\u{aba}', '\u{abb}'), ('\u{ac6}', '\u{ac6}'),
-        ('\u{aca}', '\u{aca}'), ('\u{ace}', '\u{acf}'), ('\u{ad1}', '\u{adf}'),
-        ('\u{ae4}', '\u{ae5}'), ('\u{af2}', '\u{af8}'), ('\u{afa}', '\u{b00}'),
-        ('\u{b04}', '\u{b04}'), ('\u{b0d}', '\u{b0e}'), ('\u{b11}', '\u{b12}'),
-        ('\u{b29}', '\u{b29}'), ('\u{b31}', '\u{b31}'), ('\u{b34}', '\u{b34}'),
-        ('\u{b3a}', '\u{b3b}'), ('\u{b45}', '\u{b46}'), ('\u{b49}', '\u{b4a}'),
-        ('\u{b4e}', '\u{b55}'), ('\u{b58}', '\u{b5b}'), ('\u{b5e}', '\u{b5e}'),
-        ('\u{b64}', '\u{b65}'), ('\u{b78}', '\u{b81}'), ('\u{b84}', '\u{b84}'),
-        ('\u{b8b}', '\u{b8d}'), ('\u{b91}', '\u{b91}'), ('\u{b96}', '\u{b98}'),
-        ('\u{b9b}', '\u{b9b}'), ('\u{b9d}', '\u{b9d}'), ('\u{ba0}', '\u{ba2}'),
-        ('\u{ba5}', '\u{ba7}'), ('\u{bab}', '\u{bad}'), ('\u{bba}', '\u{bbd}'),
-        ('\u{bc3}', '\u{bc5}'), ('\u{bc9}', '\u{bc9}'), ('\u{bce}', '\u{bcf}'),
-        ('\u{bd1}', '\u{bd6}'), ('\u{bd8}', '\u{be5}'), ('\u{bfb}', '\u{bff}'),
-        ('\u{c04}', '\u{c04}'), ('\u{c0d}', '\u{c0d}'), ('\u{c11}', '\u{c11}'),
-        ('\u{c29}', '\u{c29}'), ('\u{c3a}', '\u{c3c}'), ('\u{c45}', '\u{c45}'),
-        ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'), ('\u{c57}', '\u{c57}'),
-        ('\u{c5b}', '\u{c5f}'), ('\u{c64}', '\u{c65}'), ('\u{c70}', '\u{c77}'),
-        ('\u{c80}', '\u{c80}'), ('\u{c84}', '\u{c84}'), ('\u{c8d}', '\u{c8d}'),
-        ('\u{c91}', '\u{c91}'), ('\u{ca9}', '\u{ca9}'), ('\u{cb4}', '\u{cb4}'),
-        ('\u{cba}', '\u{cbb}'), ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'),
-        ('\u{cce}', '\u{cd4}'), ('\u{cd7}', '\u{cdd}'), ('\u{cdf}', '\u{cdf}'),
-        ('\u{ce4}', '\u{ce5}'), ('\u{cf0}', '\u{cf0}'), ('\u{cf3}', '\u{d00}'),
-        ('\u{d04}', '\u{d04}'), ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'),
-        ('\u{d3b}', '\u{d3c}'), ('\u{d45}', '\u{d45}'), ('\u{d49}', '\u{d49}'),
-        ('\u{d4f}', '\u{d56}'), ('\u{d58}', '\u{d5e}'), ('\u{d64}', '\u{d65}'),
-        ('\u{d76}', '\u{d78}'), ('\u{d80}', '\u{d81}'), ('\u{d84}', '\u{d84}'),
-        ('\u{d97}', '\u{d99}'), ('\u{db2}', '\u{db2}'), ('\u{dbc}', '\u{dbc}'),
-        ('\u{dbe}', '\u{dbf}'), ('\u{dc7}', '\u{dc9}'), ('\u{dcb}', '\u{dce}'),
-        ('\u{dd5}', '\u{dd5}'), ('\u{dd7}', '\u{dd7}'), ('\u{de0}', '\u{de5}'),
-        ('\u{df0}', '\u{df1}'), ('\u{df5}', '\u{e00}'), ('\u{e3b}', '\u{e3e}'),
-        ('\u{e5c}', '\u{e80}'), ('\u{e83}', '\u{e83}'), ('\u{e85}', '\u{e86}'),
-        ('\u{e89}', '\u{e89}'), ('\u{e8b}', '\u{e8c}'), ('\u{e8e}', '\u{e93}'),
-        ('\u{e98}', '\u{e98}'), ('\u{ea0}', '\u{ea0}'), ('\u{ea4}', '\u{ea4}'),
-        ('\u{ea6}', '\u{ea6}'), ('\u{ea8}', '\u{ea9}'), ('\u{eac}', '\u{eac}'),
-        ('\u{eba}', '\u{eba}'), ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'),
-        ('\u{ec7}', '\u{ec7}'), ('\u{ece}', '\u{ecf}'), ('\u{eda}', '\u{edb}'),
-        ('\u{ee0}', '\u{eff}'), ('\u{f48}', '\u{f48}'), ('\u{f6d}', '\u{f70}'),
-        ('\u{f98}', '\u{f98}'), ('\u{fbd}', '\u{fbd}'), ('\u{fcd}', '\u{fcd}'),
-        ('\u{fdb}', '\u{fff}'), ('\u{10c6}', '\u{10c6}'), ('\u{10c8}',
-        '\u{10cc}'), ('\u{10ce}', '\u{10cf}'), ('\u{1249}', '\u{1249}'),
-        ('\u{124e}', '\u{124f}'), ('\u{1257}', '\u{1257}'), ('\u{1259}',
-        '\u{1259}'), ('\u{125e}', '\u{125f}'), ('\u{1289}', '\u{1289}'),
-        ('\u{128e}', '\u{128f}'), ('\u{12b1}', '\u{12b1}'), ('\u{12b6}',
-        '\u{12b7}'), ('\u{12bf}', '\u{12bf}'), ('\u{12c1}', '\u{12c1}'),
-        ('\u{12c6}', '\u{12c7}'), ('\u{12d7}', '\u{12d7}'), ('\u{1311}',
-        '\u{1311}'), ('\u{1316}', '\u{1317}'), ('\u{135b}', '\u{135c}'),
-        ('\u{137d}', '\u{137f}'), ('\u{139a}', '\u{139f}'), ('\u{13f6}',
-        '\u{13f7}'), ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'),
-        ('\u{16f9}', '\u{16ff}'), ('\u{170d}', '\u{170d}'), ('\u{1715}',
-        '\u{171f}'), ('\u{1737}', '\u{173f}'), ('\u{1754}', '\u{175f}'),
-        ('\u{176d}', '\u{176d}'), ('\u{1771}', '\u{1771}'), ('\u{1774}',
-        '\u{177f}'), ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'),
-        ('\u{17fa}', '\u{17ff}'), ('\u{180e}', '\u{180f}'), ('\u{181a}',
-        '\u{181f}'), ('\u{1878}', '\u{187f}'), ('\u{18ab}', '\u{18af}'),
-        ('\u{18f6}', '\u{18ff}'), ('\u{191f}', '\u{191f}'), ('\u{192c}',
-        '\u{192f}'), ('\u{193c}', '\u{193f}'), ('\u{1941}', '\u{1943}'),
-        ('\u{196e}', '\u{196f}'), ('\u{1975}', '\u{197f}'), ('\u{19ac}',
-        '\u{19af}'), ('\u{19ca}', '\u{19cf}'), ('\u{19db}', '\u{19dd}'),
-        ('\u{1a1c}', '\u{1a1d}'), ('\u{1a5f}', '\u{1a5f}'), ('\u{1a7d}',
-        '\u{1a7e}'), ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'),
-        ('\u{1aae}', '\u{1aaf}'), ('\u{1abf}', '\u{1aff}'), ('\u{1b4c}',
-        '\u{1b4f}'), ('\u{1b7d}', '\u{1b7f}'), ('\u{1bf4}', '\u{1bfb}'),
-        ('\u{1c38}', '\u{1c3a}'), ('\u{1c4a}', '\u{1c4c}'), ('\u{1c80}',
-        '\u{1cbf}'), ('\u{1cc8}', '\u{1ccf}'), ('\u{1cf7}', '\u{1cf7}'),
-        ('\u{1cfa}', '\u{1cff}'), ('\u{1df6}', '\u{1dfb}'), ('\u{1f16}',
-        '\u{1f17}'), ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'),
-        ('\u{1f4e}', '\u{1f4f}'), ('\u{1f58}', '\u{1f58}'), ('\u{1f5a}',
-        '\u{1f5a}'), ('\u{1f5c}', '\u{1f5c}'), ('\u{1f5e}', '\u{1f5e}'),
-        ('\u{1f7e}', '\u{1f7f}'), ('\u{1fb5}', '\u{1fb5}'), ('\u{1fc5}',
-        '\u{1fc5}'), ('\u{1fd4}', '\u{1fd5}'), ('\u{1fdc}', '\u{1fdc}'),
-        ('\u{1ff0}', '\u{1ff1}'), ('\u{1ff5}', '\u{1ff5}'), ('\u{1fff}',
-        '\u{1fff}'), ('\u{200b}', '\u{200f}'), ('\u{202a}', '\u{202e}'),
-        ('\u{2060}', '\u{206f}'), ('\u{2072}', '\u{2073}'), ('\u{208f}',
-        '\u{208f}'), ('\u{209d}', '\u{209f}'), ('\u{20bf}', '\u{20cf}'),
-        ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'), ('\u{23fb}',
-        '\u{23ff}'), ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'),
-        ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b97}'), ('\u{2bba}',
-        '\u{2bbc}'), ('\u{2bc9}', '\u{2bc9}'), ('\u{2bd2}', '\u{2beb}'),
-        ('\u{2bf0}', '\u{2bff}'), ('\u{2c2f}', '\u{2c2f}'), ('\u{2c5f}',
-        '\u{2c5f}'), ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'),
-        ('\u{2d28}', '\u{2d2c}'), ('\u{2d2e}', '\u{2d2f}'), ('\u{2d68}',
-        '\u{2d6e}'), ('\u{2d71}', '\u{2d7e}'), ('\u{2d97}', '\u{2d9f}'),
-        ('\u{2da7}', '\u{2da7}'), ('\u{2daf}', '\u{2daf}'), ('\u{2db7}',
-        '\u{2db7}'), ('\u{2dbf}', '\u{2dbf}'), ('\u{2dc7}', '\u{2dc7}'),
-        ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'), ('\u{2ddf}',
-        '\u{2ddf}'), ('\u{2e43}', '\u{2e7f}'), ('\u{2e9a}', '\u{2e9a}'),
-        ('\u{2ef4}', '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), ('\u{2ffc}',
-        '\u{2fff}'), ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'),
-        ('\u{3100}', '\u{3104}'), ('\u{312e}', '\u{3130}'), ('\u{318f}',
-        '\u{318f}'), ('\u{31bb}', '\u{31bf}'), ('\u{31e4}', '\u{31ef}'),
-        ('\u{321f}', '\u{321f}'), ('\u{32ff}', '\u{32ff}'), ('\u{4db6}',
-        '\u{4dbf}'), ('\u{9fd6}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'),
-        ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}',
-        '\u{a6ff}'), ('\u{a7ae}', '\u{a7af}'), ('\u{a7b8}', '\u{a7f6}'),
-        ('\u{a82c}', '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}',
-        '\u{a87f}'), ('\u{a8c5}', '\u{a8cd}'), ('\u{a8da}', '\u{a8df}'),
-        ('\u{a8fe}', '\u{a8ff}'), ('\u{a954}', '\u{a95e}'), ('\u{a97d}',
-        '\u{a97f}'), ('\u{a9ce}', '\u{a9ce}'), ('\u{a9da}', '\u{a9dd}'),
-        ('\u{a9ff}', '\u{a9ff}'), ('\u{aa37}', '\u{aa3f}'), ('\u{aa4e}',
-        '\u{aa4f}'), ('\u{aa5a}', '\u{aa5b}'), ('\u{aac3}', '\u{aada}'),
-        ('\u{aaf7}', '\u{ab00}'), ('\u{ab07}', '\u{ab08}'), ('\u{ab0f}',
-        '\u{ab10}'), ('\u{ab17}', '\u{ab1f}'), ('\u{ab27}', '\u{ab27}'),
-        ('\u{ab2f}', '\u{ab2f}'), ('\u{ab66}', '\u{ab6f}'), ('\u{abee}',
-        '\u{abef}'), ('\u{abfa}', '\u{abff}'), ('\u{d7a4}', '\u{d7af}'),
-        ('\u{d7c7}', '\u{d7ca}'), ('\u{d7fc}', '\u{d7ff}'), ('\u{e000}',
-        '\u{f8ff}'), ('\u{fa6e}', '\u{fa6f}'), ('\u{fada}', '\u{faff}'),
-        ('\u{fb07}', '\u{fb12}'), ('\u{fb18}', '\u{fb1c}'), ('\u{fb37}',
-        '\u{fb37}'), ('\u{fb3d}', '\u{fb3d}'), ('\u{fb3f}', '\u{fb3f}'),
-        ('\u{fb42}', '\u{fb42}'), ('\u{fb45}', '\u{fb45}'), ('\u{fbc2}',
-        '\u{fbd2}'), ('\u{fd40}', '\u{fd4f}'), ('\u{fd90}', '\u{fd91}'),
-        ('\u{fdc8}', '\u{fdef}'), ('\u{fdfe}', '\u{fdff}'), ('\u{fe1a}',
-        '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'), ('\u{fe67}', '\u{fe67}'),
-        ('\u{fe6c}', '\u{fe6f}'), ('\u{fe75}', '\u{fe75}'), ('\u{fefd}',
-        '\u{ff00}'), ('\u{ffbf}', '\u{ffc1}'), ('\u{ffc8}', '\u{ffc9}'),
-        ('\u{ffd0}', '\u{ffd1}'), ('\u{ffd8}', '\u{ffd9}'), ('\u{ffdd}',
-        '\u{ffdf}'), ('\u{ffe7}', '\u{ffe7}'), ('\u{ffef}', '\u{fffb}'),
-        ('\u{fffe}', '\u{ffff}'), ('\u{1000c}', '\u{1000c}'), ('\u{10027}',
-        '\u{10027}'), ('\u{1003b}', '\u{1003b}'), ('\u{1003e}', '\u{1003e}'),
-        ('\u{1004e}', '\u{1004f}'), ('\u{1005e}', '\u{1007f}'), ('\u{100fb}',
-        '\u{100ff}'), ('\u{10103}', '\u{10106}'), ('\u{10134}', '\u{10136}'),
-        ('\u{1018d}', '\u{1018f}'), ('\u{1019c}', '\u{1019f}'), ('\u{101a1}',
-        '\u{101cf}'), ('\u{101fe}', '\u{1027f}'), ('\u{1029d}', '\u{1029f}'),
-        ('\u{102d1}', '\u{102df}'), ('\u{102fc}', '\u{102ff}'), ('\u{10324}',
-        '\u{1032f}'), ('\u{1034b}', '\u{1034f}'), ('\u{1037b}', '\u{1037f}'),
-        ('\u{1039e}', '\u{1039e}'), ('\u{103c4}', '\u{103c7}'), ('\u{103d6}',
-        '\u{103ff}'), ('\u{1049e}', '\u{1049f}'), ('\u{104aa}', '\u{104ff}'),
-        ('\u{10528}', '\u{1052f}'), ('\u{10564}', '\u{1056e}'), ('\u{10570}',
-        '\u{105ff}'), ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'),
-        ('\u{10768}', '\u{107ff}'), ('\u{10806}', '\u{10807}'), ('\u{10809}',
-        '\u{10809}'), ('\u{10836}', '\u{10836}'), ('\u{10839}', '\u{1083b}'),
-        ('\u{1083d}', '\u{1083e}'), ('\u{10856}', '\u{10856}'), ('\u{1089f}',
-        '\u{108a6}'), ('\u{108b0}', '\u{108df}'), ('\u{108f3}', '\u{108f3}'),
-        ('\u{108f6}', '\u{108fa}'), ('\u{1091c}', '\u{1091e}'), ('\u{1093a}',
-        '\u{1093e}'), ('\u{10940}', '\u{1097f}'), ('\u{109b8}', '\u{109bb}'),
-        ('\u{109d0}', '\u{109d1}'), ('\u{10a04}', '\u{10a04}'), ('\u{10a07}',
-        '\u{10a0b}'), ('\u{10a14}', '\u{10a14}'), ('\u{10a18}', '\u{10a18}'),
-        ('\u{10a34}', '\u{10a37}'), ('\u{10a3b}', '\u{10a3e}'), ('\u{10a48}',
-        '\u{10a4f}'), ('\u{10a59}', '\u{10a5f}'), ('\u{10aa0}', '\u{10abf}'),
-        ('\u{10ae7}', '\u{10aea}'), ('\u{10af7}', '\u{10aff}'), ('\u{10b36}',
-        '\u{10b38}'), ('\u{10b56}', '\u{10b57}'), ('\u{10b73}', '\u{10b77}'),
-        ('\u{10b92}', '\u{10b98}'), ('\u{10b9d}', '\u{10ba8}'), ('\u{10bb0}',
-        '\u{10bff}'), ('\u{10c49}', '\u{10c7f}'), ('\u{10cb3}', '\u{10cbf}'),
-        ('\u{10cf3}', '\u{10cf9}'), ('\u{10d00}', '\u{10e5f}'), ('\u{10e7f}',
-        '\u{10fff}'), ('\u{1104e}', '\u{11051}'), ('\u{11070}', '\u{1107e}'),
-        ('\u{110bd}', '\u{110bd}'), ('\u{110c2}', '\u{110cf}'), ('\u{110e9}',
-        '\u{110ef}'), ('\u{110fa}', '\u{110ff}'), ('\u{11135}', '\u{11135}'),
-        ('\u{11144}', '\u{1114f}'), ('\u{11177}', '\u{1117f}'), ('\u{111ce}',
-        '\u{111cf}'), ('\u{111e0}', '\u{111e0}'), ('\u{111f5}', '\u{111ff}'),
-        ('\u{11212}', '\u{11212}'), ('\u{1123e}', '\u{1127f}'), ('\u{11287}',
-        '\u{11287}'), ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'),
-        ('\u{1129e}', '\u{1129e}'), ('\u{112aa}', '\u{112af}'), ('\u{112eb}',
-        '\u{112ef}'), ('\u{112fa}', '\u{112ff}'), ('\u{11304}', '\u{11304}'),
-        ('\u{1130d}', '\u{1130e}'), ('\u{11311}', '\u{11312}'), ('\u{11329}',
-        '\u{11329}'), ('\u{11331}', '\u{11331}'), ('\u{11334}', '\u{11334}'),
-        ('\u{1133a}', '\u{1133b}'), ('\u{11345}', '\u{11346}'), ('\u{11349}',
-        '\u{1134a}'), ('\u{1134e}', '\u{1134f}'), ('\u{11351}', '\u{11356}'),
-        ('\u{11358}', '\u{1135c}'), ('\u{11364}', '\u{11365}'), ('\u{1136d}',
-        '\u{1136f}'), ('\u{11375}', '\u{1147f}'), ('\u{114c8}', '\u{114cf}'),
-        ('\u{114da}', '\u{1157f}'), ('\u{115b6}', '\u{115b7}'), ('\u{115de}',
-        '\u{115ff}'), ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1167f}'),
-        ('\u{116b8}', '\u{116bf}'), ('\u{116ca}', '\u{116ff}'), ('\u{1171a}',
-        '\u{1171c}'), ('\u{1172c}', '\u{1172f}'), ('\u{11740}', '\u{1189f}'),
-        ('\u{118f3}', '\u{118fe}'), ('\u{11900}', '\u{11abf}'), ('\u{11af9}',
-        '\u{11fff}'), ('\u{1239a}', '\u{123ff}'), ('\u{1246f}', '\u{1246f}'),
-        ('\u{12475}', '\u{1247f}'), ('\u{12544}', '\u{12fff}'), ('\u{1342f}',
-        '\u{143ff}'), ('\u{14647}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'),
-        ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'), ('\u{16a70}',
-        '\u{16acf}'), ('\u{16aee}', '\u{16aef}'), ('\u{16af6}', '\u{16aff}'),
-        ('\u{16b46}', '\u{16b4f}'), ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}',
-        '\u{16b62}'), ('\u{16b78}', '\u{16b7c}'), ('\u{16b90}', '\u{16eff}'),
-        ('\u{16f45}', '\u{16f4f}'), ('\u{16f7f}', '\u{16f8e}'), ('\u{16fa0}',
-        '\u{1afff}'), ('\u{1b002}', '\u{1bbff}'), ('\u{1bc6b}', '\u{1bc6f}'),
-        ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}',
-        '\u{1bc9b}'), ('\u{1bca0}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'),
-        ('\u{1d127}', '\u{1d128}'), ('\u{1d173}', '\u{1d17a}'), ('\u{1d1e9}',
-        '\u{1d1ff}'), ('\u{1d246}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'),
-        ('\u{1d372}', '\u{1d3ff}'), ('\u{1d455}', '\u{1d455}'), ('\u{1d49d}',
-        '\u{1d49d}'), ('\u{1d4a0}', '\u{1d4a1}'), ('\u{1d4a3}', '\u{1d4a4}'),
-        ('\u{1d4a7}', '\u{1d4a8}'), ('\u{1d4ad}', '\u{1d4ad}'), ('\u{1d4ba}',
-        '\u{1d4ba}'), ('\u{1d4bc}', '\u{1d4bc}'), ('\u{1d4c4}', '\u{1d4c4}'),
-        ('\u{1d506}', '\u{1d506}'), ('\u{1d50b}', '\u{1d50c}'), ('\u{1d515}',
-        '\u{1d515}'), ('\u{1d51d}', '\u{1d51d}'), ('\u{1d53a}', '\u{1d53a}'),
-        ('\u{1d53f}', '\u{1d53f}'), ('\u{1d545}', '\u{1d545}'), ('\u{1d547}',
-        '\u{1d549}'), ('\u{1d551}', '\u{1d551}'), ('\u{1d6a6}', '\u{1d6a7}'),
-        ('\u{1d7cc}', '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}',
-        '\u{1daa0}'), ('\u{1dab0}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'),
-        ('\u{1e8d7}', '\u{1edff}'), ('\u{1ee04}', '\u{1ee04}'), ('\u{1ee20}',
-        '\u{1ee20}'), ('\u{1ee23}', '\u{1ee23}'), ('\u{1ee25}', '\u{1ee26}'),
-        ('\u{1ee28}', '\u{1ee28}'), ('\u{1ee33}', '\u{1ee33}'), ('\u{1ee38}',
-        '\u{1ee38}'), ('\u{1ee3a}', '\u{1ee3a}'), ('\u{1ee3c}', '\u{1ee41}'),
-        ('\u{1ee43}', '\u{1ee46}'), ('\u{1ee48}', '\u{1ee48}'), ('\u{1ee4a}',
-        '\u{1ee4a}'), ('\u{1ee4c}', '\u{1ee4c}'), ('\u{1ee50}', '\u{1ee50}'),
-        ('\u{1ee53}', '\u{1ee53}'), ('\u{1ee55}', '\u{1ee56}'), ('\u{1ee58}',
-        '\u{1ee58}'), ('\u{1ee5a}', '\u{1ee5a}'), ('\u{1ee5c}', '\u{1ee5c}'),
-        ('\u{1ee5e}', '\u{1ee5e}'), ('\u{1ee60}', '\u{1ee60}'), ('\u{1ee63}',
-        '\u{1ee63}'), ('\u{1ee65}', '\u{1ee66}'), ('\u{1ee6b}', '\u{1ee6b}'),
-        ('\u{1ee73}', '\u{1ee73}'), ('\u{1ee78}', '\u{1ee78}'), ('\u{1ee7d}',
-        '\u{1ee7d}'), ('\u{1ee7f}', '\u{1ee7f}'), ('\u{1ee8a}', '\u{1ee8a}'),
-        ('\u{1ee9c}', '\u{1eea0}'), ('\u{1eea4}', '\u{1eea4}'), ('\u{1eeaa}',
-        '\u{1eeaa}'), ('\u{1eebc}', '\u{1eeef}'), ('\u{1eef2}', '\u{1efff}'),
-        ('\u{1f02c}', '\u{1f02f}'), ('\u{1f094}', '\u{1f09f}'), ('\u{1f0af}',
-        '\u{1f0b0}'), ('\u{1f0c0}', '\u{1f0c0}'), ('\u{1f0d0}', '\u{1f0d0}'),
-        ('\u{1f0f6}', '\u{1f0ff}'), ('\u{1f10d}', '\u{1f10f}'), ('\u{1f12f}',
-        '\u{1f12f}'), ('\u{1f16c}', '\u{1f16f}'), ('\u{1f19b}', '\u{1f1e5}'),
-        ('\u{1f203}', '\u{1f20f}'), ('\u{1f23b}', '\u{1f23f}'), ('\u{1f249}',
-        '\u{1f24f}'), ('\u{1f252}', '\u{1f2ff}'), ('\u{1f57a}', '\u{1f57a}'),
-        ('\u{1f5a4}', '\u{1f5a4}'), ('\u{1f6d1}', '\u{1f6df}'), ('\u{1f6ed}',
-        '\u{1f6ef}'), ('\u{1f6f4}', '\u{1f6ff}'), ('\u{1f774}', '\u{1f77f}'),
-        ('\u{1f7d5}', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}',
-        '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'),
-        ('\u{1f8ae}', '\u{1f90f}'), ('\u{1f919}', '\u{1f97f}'), ('\u{1f985}',
-        '\u{1f9bf}'), ('\u{1f9c1}', '\u{1ffff}'), ('\u{2a6d7}', '\u{2a6ff}'),
-        ('\u{2b735}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}',
-        '\u{2f7ff}'), ('\u{2fa1e}', '\u{e00ff}'), ('\u{e01f0}', '\u{10ffff}')
-    ];
+use hir;
+use unicode_tables::age;
+use unicode_tables::case_folding_simple::CASE_FOLDING_SIMPLE;
+use unicode_tables::general_category;
+use unicode_tables::property_bool;
+use unicode_tables::property_names::PROPERTY_NAMES;
+use unicode_tables::property_values::PROPERTY_VALUES;
+use unicode_tables::script;
+use unicode_tables::script_extension;
+
+type Result<T> = result::Result<T, Error>;
 
-    pub const Cc_table: &'static [(char, char)] = &[
-        ('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}')
-    ];
-
-    pub const Cf_table: &'static [(char, char)] = &[
-        ('\u{ad}', '\u{ad}'), ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'),
-        ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), ('\u{180e}',
-        '\u{180e}'), ('\u{200b}', '\u{200f}'), ('\u{202a}', '\u{202e}'),
-        ('\u{2060}', '\u{2064}'), ('\u{2066}', '\u{206f}'), ('\u{feff}',
-        '\u{feff}'), ('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'),
-        ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'), ('\u{e0001}',
-        '\u{e0001}'), ('\u{e0020}', '\u{e007f}')
-    ];
+/// An error that occurs when dealing with Unicode.
+///
+/// We don't impl the Error trait here because these always get converted
+/// into other public errors. (This error type isn't exported.)
+#[derive(Debug)]
+pub enum Error {
+    PropertyNotFound,
+    PropertyValueNotFound,
+}
 
-    pub const Cn_table: &'static [(char, char)] = &[
-        ('\u{378}', '\u{379}'), ('\u{380}', '\u{383}'), ('\u{38b}', '\u{38b}'),
-        ('\u{38d}', '\u{38d}'), ('\u{3a2}', '\u{3a2}'), ('\u{530}', '\u{530}'),
-        ('\u{557}', '\u{558}'), ('\u{560}', '\u{560}'), ('\u{588}', '\u{588}'),
-        ('\u{58b}', '\u{58c}'), ('\u{590}', '\u{590}'), ('\u{5c8}', '\u{5cf}'),
-        ('\u{5eb}', '\u{5ef}'), ('\u{5f5}', '\u{5ff}'), ('\u{61d}', '\u{61d}'),
-        ('\u{70e}', '\u{70e}'), ('\u{74b}', '\u{74c}'), ('\u{7b2}', '\u{7bf}'),
-        ('\u{7fb}', '\u{7ff}'), ('\u{82e}', '\u{82f}'), ('\u{83f}', '\u{83f}'),
-        ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{89f}'), ('\u{8b5}', '\u{8e2}'),
-        ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'), ('\u{991}', '\u{992}'),
-        ('\u{9a9}', '\u{9a9}'), ('\u{9b1}', '\u{9b1}'), ('\u{9b3}', '\u{9b5}'),
-        ('\u{9ba}', '\u{9bb}'), ('\u{9c5}', '\u{9c6}'), ('\u{9c9}', '\u{9ca}'),
-        ('\u{9cf}', '\u{9d6}'), ('\u{9d8}', '\u{9db}'), ('\u{9de}', '\u{9de}'),
-        ('\u{9e4}', '\u{9e5}'), ('\u{9fc}', '\u{a00}'), ('\u{a04}', '\u{a04}'),
-        ('\u{a0b}', '\u{a0e}'), ('\u{a11}', '\u{a12}'), ('\u{a29}', '\u{a29}'),
-        ('\u{a31}', '\u{a31}'), ('\u{a34}', '\u{a34}'), ('\u{a37}', '\u{a37}'),
-        ('\u{a3a}', '\u{a3b}'), ('\u{a3d}', '\u{a3d}'), ('\u{a43}', '\u{a46}'),
-        ('\u{a49}', '\u{a4a}'), ('\u{a4e}', '\u{a50}'), ('\u{a52}', '\u{a58}'),
-        ('\u{a5d}', '\u{a5d}'), ('\u{a5f}', '\u{a65}'), ('\u{a76}', '\u{a80}'),
-        ('\u{a84}', '\u{a84}'), ('\u{a8e}', '\u{a8e}'), ('\u{a92}', '\u{a92}'),
-        ('\u{aa9}', '\u{aa9}'), ('\u{ab1}', '\u{ab1}'), ('\u{ab4}', '\u{ab4}'),
-        ('\u{aba}', '\u{abb}'), ('\u{ac6}', '\u{ac6}'), ('\u{aca}', '\u{aca}'),
-        ('\u{ace}', '\u{acf}'), ('\u{ad1}', '\u{adf}'), ('\u{ae4}', '\u{ae5}'),
-        ('\u{af2}', '\u{af8}'), ('\u{afa}', '\u{b00}'), ('\u{b04}', '\u{b04}'),
-        ('\u{b0d}', '\u{b0e}'), ('\u{b11}', '\u{b12}'), ('\u{b29}', '\u{b29}'),
-        ('\u{b31}', '\u{b31}'), ('\u{b34}', '\u{b34}'), ('\u{b3a}', '\u{b3b}'),
-        ('\u{b45}', '\u{b46}'), ('\u{b49}', '\u{b4a}'), ('\u{b4e}', '\u{b55}'),
-        ('\u{b58}', '\u{b5b}'), ('\u{b5e}', '\u{b5e}'), ('\u{b64}', '\u{b65}'),
-        ('\u{b78}', '\u{b81}'), ('\u{b84}', '\u{b84}'), ('\u{b8b}', '\u{b8d}'),
-        ('\u{b91}', '\u{b91}'), ('\u{b96}', '\u{b98}'), ('\u{b9b}', '\u{b9b}'),
-        ('\u{b9d}', '\u{b9d}'), ('\u{ba0}', '\u{ba2}'), ('\u{ba5}', '\u{ba7}'),
-        ('\u{bab}', '\u{bad}'), ('\u{bba}', '\u{bbd}'), ('\u{bc3}', '\u{bc5}'),
-        ('\u{bc9}', '\u{bc9}'), ('\u{bce}', '\u{bcf}'), ('\u{bd1}', '\u{bd6}'),
-        ('\u{bd8}', '\u{be5}'), ('\u{bfb}', '\u{bff}'), ('\u{c04}', '\u{c04}'),
-        ('\u{c0d}', '\u{c0d}'), ('\u{c11}', '\u{c11}'), ('\u{c29}', '\u{c29}'),
-        ('\u{c3a}', '\u{c3c}'), ('\u{c45}', '\u{c45}'), ('\u{c49}', '\u{c49}'),
-        ('\u{c4e}', '\u{c54}'), ('\u{c57}', '\u{c57}'), ('\u{c5b}', '\u{c5f}'),
-        ('\u{c64}', '\u{c65}'), ('\u{c70}', '\u{c77}'), ('\u{c80}', '\u{c80}'),
-        ('\u{c84}', '\u{c84}'), ('\u{c8d}', '\u{c8d}'), ('\u{c91}', '\u{c91}'),
-        ('\u{ca9}', '\u{ca9}'), ('\u{cb4}', '\u{cb4}'), ('\u{cba}', '\u{cbb}'),
-        ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'), ('\u{cce}', '\u{cd4}'),
-        ('\u{cd7}', '\u{cdd}'), ('\u{cdf}', '\u{cdf}'), ('\u{ce4}', '\u{ce5}'),
-        ('\u{cf0}', '\u{cf0}'), ('\u{cf3}', '\u{d00}'), ('\u{d04}', '\u{d04}'),
-        ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'), ('\u{d3b}', '\u{d3c}'),
-        ('\u{d45}', '\u{d45}'), ('\u{d49}', '\u{d49}'), ('\u{d4f}', '\u{d56}'),
-        ('\u{d58}', '\u{d5e}'), ('\u{d64}', '\u{d65}'), ('\u{d76}', '\u{d78}'),
-        ('\u{d80}', '\u{d81}'), ('\u{d84}', '\u{d84}'), ('\u{d97}', '\u{d99}'),
-        ('\u{db2}', '\u{db2}'), ('\u{dbc}', '\u{dbc}'), ('\u{dbe}', '\u{dbf}'),
-        ('\u{dc7}', '\u{dc9}'), ('\u{dcb}', '\u{dce}'), ('\u{dd5}', '\u{dd5}'),
-        ('\u{dd7}', '\u{dd7}'), ('\u{de0}', '\u{de5}'), ('\u{df0}', '\u{df1}'),
-        ('\u{df5}', '\u{e00}'), ('\u{e3b}', '\u{e3e}'), ('\u{e5c}', '\u{e80}'),
-        ('\u{e83}', '\u{e83}'), ('\u{e85}', '\u{e86}'), ('\u{e89}', '\u{e89}'),
-        ('\u{e8b}', '\u{e8c}'), ('\u{e8e}', '\u{e93}'), ('\u{e98}', '\u{e98}'),
-        ('\u{ea0}', '\u{ea0}'), ('\u{ea4}', '\u{ea4}'), ('\u{ea6}', '\u{ea6}'),
-        ('\u{ea8}', '\u{ea9}'), ('\u{eac}', '\u{eac}'), ('\u{eba}', '\u{eba}'),
-        ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'), ('\u{ec7}', '\u{ec7}'),
-        ('\u{ece}', '\u{ecf}'), ('\u{eda}', '\u{edb}'), ('\u{ee0}', '\u{eff}'),
-        ('\u{f48}', '\u{f48}'), ('\u{f6d}', '\u{f70}'), ('\u{f98}', '\u{f98}'),
-        ('\u{fbd}', '\u{fbd}'), ('\u{fcd}', '\u{fcd}'), ('\u{fdb}', '\u{fff}'),
-        ('\u{10c6}', '\u{10c6}'), ('\u{10c8}', '\u{10cc}'), ('\u{10ce}',
-        '\u{10cf}'), ('\u{1249}', '\u{1249}'), ('\u{124e}', '\u{124f}'),
-        ('\u{1257}', '\u{1257}'), ('\u{1259}', '\u{1259}'), ('\u{125e}',
-        '\u{125f}'), ('\u{1289}', '\u{1289}'), ('\u{128e}', '\u{128f}'),
-        ('\u{12b1}', '\u{12b1}'), ('\u{12b6}', '\u{12b7}'), ('\u{12bf}',
-        '\u{12bf}'), ('\u{12c1}', '\u{12c1}'), ('\u{12c6}', '\u{12c7}'),
-        ('\u{12d7}', '\u{12d7}'), ('\u{1311}', '\u{1311}'), ('\u{1316}',
-        '\u{1317}'), ('\u{135b}', '\u{135c}'), ('\u{137d}', '\u{137f}'),
-        ('\u{139a}', '\u{139f}'), ('\u{13f6}', '\u{13f7}'), ('\u{13fe}',
-        '\u{13ff}'), ('\u{169d}', '\u{169f}'), ('\u{16f9}', '\u{16ff}'),
-        ('\u{170d}', '\u{170d}'), ('\u{1715}', '\u{171f}'), ('\u{1737}',
-        '\u{173f}'), ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'),
-        ('\u{1771}', '\u{1771}'), ('\u{1774}', '\u{177f}'), ('\u{17de}',
-        '\u{17df}'), ('\u{17ea}', '\u{17ef}'), ('\u{17fa}', '\u{17ff}'),
-        ('\u{180f}', '\u{180f}'), ('\u{181a}', '\u{181f}'), ('\u{1878}',
-        '\u{187f}'), ('\u{18ab}', '\u{18af}'), ('\u{18f6}', '\u{18ff}'),
-        ('\u{191f}', '\u{191f}'), ('\u{192c}', '\u{192f}'), ('\u{193c}',
-        '\u{193f}'), ('\u{1941}', '\u{1943}'), ('\u{196e}', '\u{196f}'),
-        ('\u{1975}', '\u{197f}'), ('\u{19ac}', '\u{19af}'), ('\u{19ca}',
-        '\u{19cf}'), ('\u{19db}', '\u{19dd}'), ('\u{1a1c}', '\u{1a1d}'),
-        ('\u{1a5f}', '\u{1a5f}'), ('\u{1a7d}', '\u{1a7e}'), ('\u{1a8a}',
-        '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'), ('\u{1aae}', '\u{1aaf}'),
-        ('\u{1abf}', '\u{1aff}'), ('\u{1b4c}', '\u{1b4f}'), ('\u{1b7d}',
-        '\u{1b7f}'), ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'),
-        ('\u{1c4a}', '\u{1c4c}'), ('\u{1c80}', '\u{1cbf}'), ('\u{1cc8}',
-        '\u{1ccf}'), ('\u{1cf7}', '\u{1cf7}'), ('\u{1cfa}', '\u{1cff}'),
-        ('\u{1df6}', '\u{1dfb}'), ('\u{1f16}', '\u{1f17}'), ('\u{1f1e}',
-        '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'), ('\u{1f4e}', '\u{1f4f}'),
-        ('\u{1f58}', '\u{1f58}'), ('\u{1f5a}', '\u{1f5a}'), ('\u{1f5c}',
-        '\u{1f5c}'), ('\u{1f5e}', '\u{1f5e}'), ('\u{1f7e}', '\u{1f7f}'),
-        ('\u{1fb5}', '\u{1fb5}'), ('\u{1fc5}', '\u{1fc5}'), ('\u{1fd4}',
-        '\u{1fd5}'), ('\u{1fdc}', '\u{1fdc}'), ('\u{1ff0}', '\u{1ff1}'),
-        ('\u{1ff5}', '\u{1ff5}'), ('\u{1fff}', '\u{1fff}'), ('\u{2065}',
-        '\u{2065}'), ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'),
-        ('\u{209d}', '\u{209f}'), ('\u{20bf}', '\u{20cf}'), ('\u{20f1}',
-        '\u{20ff}'), ('\u{218c}', '\u{218f}'), ('\u{23fb}', '\u{23ff}'),
-        ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'), ('\u{2b74}',
-        '\u{2b75}'), ('\u{2b96}', '\u{2b97}'), ('\u{2bba}', '\u{2bbc}'),
-        ('\u{2bc9}', '\u{2bc9}'), ('\u{2bd2}', '\u{2beb}'), ('\u{2bf0}',
-        '\u{2bff}'), ('\u{2c2f}', '\u{2c2f}'), ('\u{2c5f}', '\u{2c5f}'),
-        ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'), ('\u{2d28}',
-        '\u{2d2c}'), ('\u{2d2e}', '\u{2d2f}'), ('\u{2d68}', '\u{2d6e}'),
-        ('\u{2d71}', '\u{2d7e}'), ('\u{2d97}', '\u{2d9f}'), ('\u{2da7}',
-        '\u{2da7}'), ('\u{2daf}', '\u{2daf}'), ('\u{2db7}', '\u{2db7}'),
-        ('\u{2dbf}', '\u{2dbf}'), ('\u{2dc7}', '\u{2dc7}'), ('\u{2dcf}',
-        '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'), ('\u{2ddf}', '\u{2ddf}'),
-        ('\u{2e43}', '\u{2e7f}'), ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}',
-        '\u{2eff}'), ('\u{2fd6}', '\u{2fef}'), ('\u{2ffc}', '\u{2fff}'),
-        ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'), ('\u{3100}',
-        '\u{3104}'), ('\u{312e}', '\u{3130}'), ('\u{318f}', '\u{318f}'),
-        ('\u{31bb}', '\u{31bf}'), ('\u{31e4}', '\u{31ef}'), ('\u{321f}',
-        '\u{321f}'), ('\u{32ff}', '\u{32ff}'), ('\u{4db6}', '\u{4dbf}'),
-        ('\u{9fd6}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'), ('\u{a4c7}',
-        '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'), ('\u{a6f8}', '\u{a6ff}'),
-        ('\u{a7ae}', '\u{a7af}'), ('\u{a7b8}', '\u{a7f6}'), ('\u{a82c}',
-        '\u{a82f}'), ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'),
-        ('\u{a8c5}', '\u{a8cd}'), ('\u{a8da}', '\u{a8df}'), ('\u{a8fe}',
-        '\u{a8ff}'), ('\u{a954}', '\u{a95e}'), ('\u{a97d}', '\u{a97f}'),
-        ('\u{a9ce}', '\u{a9ce}'), ('\u{a9da}', '\u{a9dd}'), ('\u{a9ff}',
-        '\u{a9ff}'), ('\u{aa37}', '\u{aa3f}'), ('\u{aa4e}', '\u{aa4f}'),
-        ('\u{aa5a}', '\u{aa5b}'), ('\u{aac3}', '\u{aada}'), ('\u{aaf7}',
-        '\u{ab00}'), ('\u{ab07}', '\u{ab08}'), ('\u{ab0f}', '\u{ab10}'),
-        ('\u{ab17}', '\u{ab1f}'), ('\u{ab27}', '\u{ab27}'), ('\u{ab2f}',
-        '\u{ab2f}'), ('\u{ab66}', '\u{ab6f}'), ('\u{abee}', '\u{abef}'),
-        ('\u{abfa}', '\u{abff}'), ('\u{d7a4}', '\u{d7af}'), ('\u{d7c7}',
-        '\u{d7ca}'), ('\u{d7fc}', '\u{d7ff}'), ('\u{fa6e}', '\u{fa6f}'),
-        ('\u{fada}', '\u{faff}'), ('\u{fb07}', '\u{fb12}'), ('\u{fb18}',
-        '\u{fb1c}'), ('\u{fb37}', '\u{fb37}'), ('\u{fb3d}', '\u{fb3d}'),
-        ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}', '\u{fb42}'), ('\u{fb45}',
-        '\u{fb45}'), ('\u{fbc2}', '\u{fbd2}'), ('\u{fd40}', '\u{fd4f}'),
-        ('\u{fd90}', '\u{fd91}'), ('\u{fdc8}', '\u{fdef}'), ('\u{fdfe}',
-        '\u{fdff}'), ('\u{fe1a}', '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'),
-        ('\u{fe67}', '\u{fe67}'), ('\u{fe6c}', '\u{fe6f}'), ('\u{fe75}',
-        '\u{fe75}'), ('\u{fefd}', '\u{fefe}'), ('\u{ff00}', '\u{ff00}'),
-        ('\u{ffbf}', '\u{ffc1}'), ('\u{ffc8}', '\u{ffc9}'), ('\u{ffd0}',
-        '\u{ffd1}'), ('\u{ffd8}', '\u{ffd9}'), ('\u{ffdd}', '\u{ffdf}'),
-        ('\u{ffe7}', '\u{ffe7}'), ('\u{ffef}', '\u{fff8}'), ('\u{fffe}',
-        '\u{ffff}'), ('\u{1000c}', '\u{1000c}'), ('\u{10027}', '\u{10027}'),
-        ('\u{1003b}', '\u{1003b}'), ('\u{1003e}', '\u{1003e}'), ('\u{1004e}',
-        '\u{1004f}'), ('\u{1005e}', '\u{1007f}'), ('\u{100fb}', '\u{100ff}'),
-        ('\u{10103}', '\u{10106}'), ('\u{10134}', '\u{10136}'), ('\u{1018d}',
-        '\u{1018f}'), ('\u{1019c}', '\u{1019f}'), ('\u{101a1}', '\u{101cf}'),
-        ('\u{101fe}', '\u{1027f}'), ('\u{1029d}', '\u{1029f}'), ('\u{102d1}',
-        '\u{102df}'), ('\u{102fc}', '\u{102ff}'), ('\u{10324}', '\u{1032f}'),
-        ('\u{1034b}', '\u{1034f}'), ('\u{1037b}', '\u{1037f}'), ('\u{1039e}',
-        '\u{1039e}'), ('\u{103c4}', '\u{103c7}'), ('\u{103d6}', '\u{103ff}'),
-        ('\u{1049e}', '\u{1049f}'), ('\u{104aa}', '\u{104ff}'), ('\u{10528}',
-        '\u{1052f}'), ('\u{10564}', '\u{1056e}'), ('\u{10570}', '\u{105ff}'),
-        ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'), ('\u{10768}',
-        '\u{107ff}'), ('\u{10806}', '\u{10807}'), ('\u{10809}', '\u{10809}'),
-        ('\u{10836}', '\u{10836}'), ('\u{10839}', '\u{1083b}'), ('\u{1083d}',
-        '\u{1083e}'), ('\u{10856}', '\u{10856}'), ('\u{1089f}', '\u{108a6}'),
-        ('\u{108b0}', '\u{108df}'), ('\u{108f3}', '\u{108f3}'), ('\u{108f6}',
-        '\u{108fa}'), ('\u{1091c}', '\u{1091e}'), ('\u{1093a}', '\u{1093e}'),
-        ('\u{10940}', '\u{1097f}'), ('\u{109b8}', '\u{109bb}'), ('\u{109d0}',
-        '\u{109d1}'), ('\u{10a04}', '\u{10a04}'), ('\u{10a07}', '\u{10a0b}'),
-        ('\u{10a14}', '\u{10a14}'), ('\u{10a18}', '\u{10a18}'), ('\u{10a34}',
-        '\u{10a37}'), ('\u{10a3b}', '\u{10a3e}'), ('\u{10a48}', '\u{10a4f}'),
-        ('\u{10a59}', '\u{10a5f}'), ('\u{10aa0}', '\u{10abf}'), ('\u{10ae7}',
-        '\u{10aea}'), ('\u{10af7}', '\u{10aff}'), ('\u{10b36}', '\u{10b38}'),
-        ('\u{10b56}', '\u{10b57}'), ('\u{10b73}', '\u{10b77}'), ('\u{10b92}',
-        '\u{10b98}'), ('\u{10b9d}', '\u{10ba8}'), ('\u{10bb0}', '\u{10bff}'),
-        ('\u{10c49}', '\u{10c7f}'), ('\u{10cb3}', '\u{10cbf}'), ('\u{10cf3}',
-        '\u{10cf9}'), ('\u{10d00}', '\u{10e5f}'), ('\u{10e7f}', '\u{10fff}'),
-        ('\u{1104e}', '\u{11051}'), ('\u{11070}', '\u{1107e}'), ('\u{110c2}',
-        '\u{110cf}'), ('\u{110e9}', '\u{110ef}'), ('\u{110fa}', '\u{110ff}'),
-        ('\u{11135}', '\u{11135}'), ('\u{11144}', '\u{1114f}'), ('\u{11177}',
-        '\u{1117f}'), ('\u{111ce}', '\u{111cf}'), ('\u{111e0}', '\u{111e0}'),
-        ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'), ('\u{1123e}',
-        '\u{1127f}'), ('\u{11287}', '\u{11287}'), ('\u{11289}', '\u{11289}'),
-        ('\u{1128e}', '\u{1128e}'), ('\u{1129e}', '\u{1129e}'), ('\u{112aa}',
-        '\u{112af}'), ('\u{112eb}', '\u{112ef}'), ('\u{112fa}', '\u{112ff}'),
-        ('\u{11304}', '\u{11304}'), ('\u{1130d}', '\u{1130e}'), ('\u{11311}',
-        '\u{11312}'), ('\u{11329}', '\u{11329}'), ('\u{11331}', '\u{11331}'),
-        ('\u{11334}', '\u{11334}'), ('\u{1133a}', '\u{1133b}'), ('\u{11345}',
-        '\u{11346}'), ('\u{11349}', '\u{1134a}'), ('\u{1134e}', '\u{1134f}'),
-        ('\u{11351}', '\u{11356}'), ('\u{11358}', '\u{1135c}'), ('\u{11364}',
-        '\u{11365}'), ('\u{1136d}', '\u{1136f}'), ('\u{11375}', '\u{1147f}'),
-        ('\u{114c8}', '\u{114cf}'), ('\u{114da}', '\u{1157f}'), ('\u{115b6}',
-        '\u{115b7}'), ('\u{115de}', '\u{115ff}'), ('\u{11645}', '\u{1164f}'),
-        ('\u{1165a}', '\u{1167f}'), ('\u{116b8}', '\u{116bf}'), ('\u{116ca}',
-        '\u{116ff}'), ('\u{1171a}', '\u{1171c}'), ('\u{1172c}', '\u{1172f}'),
-        ('\u{11740}', '\u{1189f}'), ('\u{118f3}', '\u{118fe}'), ('\u{11900}',
-        '\u{11abf}'), ('\u{11af9}', '\u{11fff}'), ('\u{1239a}', '\u{123ff}'),
-        ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'), ('\u{12544}',
-        '\u{12fff}'), ('\u{1342f}', '\u{143ff}'), ('\u{14647}', '\u{167ff}'),
-        ('\u{16a39}', '\u{16a3f}'), ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}',
-        '\u{16a6d}'), ('\u{16a70}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'),
-        ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'), ('\u{16b5a}',
-        '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'), ('\u{16b78}', '\u{16b7c}'),
-        ('\u{16b90}', '\u{16eff}'), ('\u{16f45}', '\u{16f4f}'), ('\u{16f7f}',
-        '\u{16f8e}'), ('\u{16fa0}', '\u{1afff}'), ('\u{1b002}', '\u{1bbff}'),
-        ('\u{1bc6b}', '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'), ('\u{1bc89}',
-        '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'), ('\u{1bca4}', '\u{1cfff}'),
-        ('\u{1d0f6}', '\u{1d0ff}'), ('\u{1d127}', '\u{1d128}'), ('\u{1d1e9}',
-        '\u{1d1ff}'), ('\u{1d246}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'),
-        ('\u{1d372}', '\u{1d3ff}'), ('\u{1d455}', '\u{1d455}'), ('\u{1d49d}',
-        '\u{1d49d}'), ('\u{1d4a0}', '\u{1d4a1}'), ('\u{1d4a3}', '\u{1d4a4}'),
-        ('\u{1d4a7}', '\u{1d4a8}'), ('\u{1d4ad}', '\u{1d4ad}'), ('\u{1d4ba}',
-        '\u{1d4ba}'), ('\u{1d4bc}', '\u{1d4bc}'), ('\u{1d4c4}', '\u{1d4c4}'),
-        ('\u{1d506}', '\u{1d506}'), ('\u{1d50b}', '\u{1d50c}'), ('\u{1d515}',
-        '\u{1d515}'), ('\u{1d51d}', '\u{1d51d}'), ('\u{1d53a}', '\u{1d53a}'),
-        ('\u{1d53f}', '\u{1d53f}'), ('\u{1d545}', '\u{1d545}'), ('\u{1d547}',
-        '\u{1d549}'), ('\u{1d551}', '\u{1d551}'), ('\u{1d6a6}', '\u{1d6a7}'),
-        ('\u{1d7cc}', '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}',
-        '\u{1daa0}'), ('\u{1dab0}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'),
-        ('\u{1e8d7}', '\u{1edff}'), ('\u{1ee04}', '\u{1ee04}'), ('\u{1ee20}',
-        '\u{1ee20}'), ('\u{1ee23}', '\u{1ee23}'), ('\u{1ee25}', '\u{1ee26}'),
-        ('\u{1ee28}', '\u{1ee28}'), ('\u{1ee33}', '\u{1ee33}'), ('\u{1ee38}',
-        '\u{1ee38}'), ('\u{1ee3a}', '\u{1ee3a}'), ('\u{1ee3c}', '\u{1ee41}'),
-        ('\u{1ee43}', '\u{1ee46}'), ('\u{1ee48}', '\u{1ee48}'), ('\u{1ee4a}',
-        '\u{1ee4a}'), ('\u{1ee4c}', '\u{1ee4c}'), ('\u{1ee50}', '\u{1ee50}'),
-        ('\u{1ee53}', '\u{1ee53}'), ('\u{1ee55}', '\u{1ee56}'), ('\u{1ee58}',
-        '\u{1ee58}'), ('\u{1ee5a}', '\u{1ee5a}'), ('\u{1ee5c}', '\u{1ee5c}'),
-        ('\u{1ee5e}', '\u{1ee5e}'), ('\u{1ee60}', '\u{1ee60}'), ('\u{1ee63}',
-        '\u{1ee63}'), ('\u{1ee65}', '\u{1ee66}'), ('\u{1ee6b}', '\u{1ee6b}'),
-        ('\u{1ee73}', '\u{1ee73}'), ('\u{1ee78}', '\u{1ee78}'), ('\u{1ee7d}',
-        '\u{1ee7d}'), ('\u{1ee7f}', '\u{1ee7f}'), ('\u{1ee8a}', '\u{1ee8a}'),
-        ('\u{1ee9c}', '\u{1eea0}'), ('\u{1eea4}', '\u{1eea4}'), ('\u{1eeaa}',
-        '\u{1eeaa}'), ('\u{1eebc}', '\u{1eeef}'), ('\u{1eef2}', '\u{1efff}'),
-        ('\u{1f02c}', '\u{1f02f}'), ('\u{1f094}', '\u{1f09f}'), ('\u{1f0af}',
-        '\u{1f0b0}'), ('\u{1f0c0}', '\u{1f0c0}'), ('\u{1f0d0}', '\u{1f0d0}'),
-        ('\u{1f0f6}', '\u{1f0ff}'), ('\u{1f10d}', '\u{1f10f}'), ('\u{1f12f}',
-        '\u{1f12f}'), ('\u{1f16c}', '\u{1f16f}'), ('\u{1f19b}', '\u{1f1e5}'),
-        ('\u{1f203}', '\u{1f20f}'), ('\u{1f23b}', '\u{1f23f}'), ('\u{1f249}',
-        '\u{1f24f}'), ('\u{1f252}', '\u{1f2ff}'), ('\u{1f57a}', '\u{1f57a}'),
-        ('\u{1f5a4}', '\u{1f5a4}'), ('\u{1f6d1}', '\u{1f6df}'), ('\u{1f6ed}',
-        '\u{1f6ef}'), ('\u{1f6f4}', '\u{1f6ff}'), ('\u{1f774}', '\u{1f77f}'),
-        ('\u{1f7d5}', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}',
-        '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'),
-        ('\u{1f8ae}', '\u{1f90f}'), ('\u{1f919}', '\u{1f97f}'), ('\u{1f985}',
-        '\u{1f9bf}'), ('\u{1f9c1}', '\u{1ffff}'), ('\u{2a6d7}', '\u{2a6ff}'),
-        ('\u{2b735}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}',
-        '\u{2f7ff}'), ('\u{2fa1e}', '\u{e0000}'), ('\u{e0002}', '\u{e001f}'),
-        ('\u{e0080}', '\u{e00ff}'), ('\u{e01f0}', '\u{effff}'), ('\u{ffffe}',
-        '\u{fffff}'), ('\u{10fffe}', '\u{10ffff}')
-    ];
+/// An iterator over a codepoint's simple case equivalence class.
+#[derive(Debug)]
+pub struct SimpleFoldIter(::std::slice::Iter<'static, char>);
 
-    pub const Co_table: &'static [(char, char)] = &[
-        ('\u{e000}', '\u{f8ff}'), ('\u{f0000}', '\u{ffffd}'), ('\u{100000}',
-        '\u{10fffd}')
-    ];
+impl Iterator for SimpleFoldIter {
+    type Item = char;
+
+    fn next(&mut self) -> Option<char> {
+        self.0.next().map(|c| *c)
+    }
+}
 
-    pub const L_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'),
-        ('\u{b5}', '\u{b5}'), ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'),
-        ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'),
-        ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'),
-        ('\u{370}', '\u{374}'), ('\u{376}', '\u{377}'), ('\u{37a}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'),
-        ('\u{3f7}', '\u{481}'), ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'),
-        ('\u{559}', '\u{559}'), ('\u{561}', '\u{587}'), ('\u{5d0}', '\u{5ea}'),
-        ('\u{5f0}', '\u{5f2}'), ('\u{620}', '\u{64a}'), ('\u{66e}', '\u{66f}'),
-        ('\u{671}', '\u{6d3}'), ('\u{6d5}', '\u{6d5}'), ('\u{6e5}', '\u{6e6}'),
-        ('\u{6ee}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'),
-        ('\u{710}', '\u{710}'), ('\u{712}', '\u{72f}'), ('\u{74d}', '\u{7a5}'),
-        ('\u{7b1}', '\u{7b1}'), ('\u{7ca}', '\u{7ea}'), ('\u{7f4}', '\u{7f5}'),
-        ('\u{7fa}', '\u{7fa}'), ('\u{800}', '\u{815}'), ('\u{81a}', '\u{81a}'),
-        ('\u{824}', '\u{824}'), ('\u{828}', '\u{828}'), ('\u{840}', '\u{858}'),
-        ('\u{8a0}', '\u{8b4}'), ('\u{904}', '\u{939}'), ('\u{93d}', '\u{93d}'),
-        ('\u{950}', '\u{950}'), ('\u{958}', '\u{961}'), ('\u{971}', '\u{980}'),
-        ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'), ('\u{993}', '\u{9a8}'),
-        ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'), ('\u{9b6}', '\u{9b9}'),
-        ('\u{9bd}', '\u{9bd}'), ('\u{9ce}', '\u{9ce}'), ('\u{9dc}', '\u{9dd}'),
-        ('\u{9df}', '\u{9e1}'), ('\u{9f0}', '\u{9f1}'), ('\u{a05}', '\u{a0a}'),
-        ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'),
-        ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'),
-        ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'), ('\u{a72}', '\u{a74}'),
-        ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'),
-        ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'),
-        ('\u{abd}', '\u{abd}'), ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae1}'),
-        ('\u{af9}', '\u{af9}'), ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'),
-        ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'),
-        ('\u{b35}', '\u{b39}'), ('\u{b3d}', '\u{b3d}'), ('\u{b5c}', '\u{b5d}'),
-        ('\u{b5f}', '\u{b61}'), ('\u{b71}', '\u{b71}'), ('\u{b83}', '\u{b83}'),
-        ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'), ('\u{b92}', '\u{b95}'),
-        ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'),
-        ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'),
-        ('\u{bd0}', '\u{bd0}'), ('\u{c05}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'),
-        ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c3d}'),
-        ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c61}'), ('\u{c85}', '\u{c8c}'),
-        ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'),
-        ('\u{cb5}', '\u{cb9}'), ('\u{cbd}', '\u{cbd}'), ('\u{cde}', '\u{cde}'),
-        ('\u{ce0}', '\u{ce1}'), ('\u{cf1}', '\u{cf2}'), ('\u{d05}', '\u{d0c}'),
-        ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d3a}'), ('\u{d3d}', '\u{d3d}'),
-        ('\u{d4e}', '\u{d4e}'), ('\u{d5f}', '\u{d61}'), ('\u{d7a}', '\u{d7f}'),
-        ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'),
-        ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'), ('\u{e01}', '\u{e30}'),
-        ('\u{e32}', '\u{e33}'), ('\u{e40}', '\u{e46}'), ('\u{e81}', '\u{e82}'),
-        ('\u{e84}', '\u{e84}'), ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'),
-        ('\u{e8d}', '\u{e8d}'), ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'),
-        ('\u{ea1}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'),
-        ('\u{eaa}', '\u{eab}'), ('\u{ead}', '\u{eb0}'), ('\u{eb2}', '\u{eb3}'),
-        ('\u{ebd}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'),
-        ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'), ('\u{f40}', '\u{f47}'),
-        ('\u{f49}', '\u{f6c}'), ('\u{f88}', '\u{f8c}'), ('\u{1000}',
-        '\u{102a}'), ('\u{103f}', '\u{103f}'), ('\u{1050}', '\u{1055}'),
-        ('\u{105a}', '\u{105d}'), ('\u{1061}', '\u{1061}'), ('\u{1065}',
-        '\u{1066}'), ('\u{106e}', '\u{1070}'), ('\u{1075}', '\u{1081}'),
-        ('\u{108e}', '\u{108e}'), ('\u{10a0}', '\u{10c5}'), ('\u{10c7}',
-        '\u{10c7}'), ('\u{10cd}', '\u{10cd}'), ('\u{10d0}', '\u{10fa}'),
-        ('\u{10fc}', '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}',
-        '\u{1256}'), ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'),
-        ('\u{1260}', '\u{1288}'), ('\u{128a}', '\u{128d}'), ('\u{1290}',
-        '\u{12b0}'), ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'),
-        ('\u{12c0}', '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}',
-        '\u{12d6}'), ('\u{12d8}', '\u{1310}'), ('\u{1312}', '\u{1315}'),
-        ('\u{1318}', '\u{135a}'), ('\u{1380}', '\u{138f}'), ('\u{13a0}',
-        '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{1401}', '\u{166c}'),
-        ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'), ('\u{16a0}',
-        '\u{16ea}'), ('\u{16f1}', '\u{16f8}'), ('\u{1700}', '\u{170c}'),
-        ('\u{170e}', '\u{1711}'), ('\u{1720}', '\u{1731}'), ('\u{1740}',
-        '\u{1751}'), ('\u{1760}', '\u{176c}'), ('\u{176e}', '\u{1770}'),
-        ('\u{1780}', '\u{17b3}'), ('\u{17d7}', '\u{17d7}'), ('\u{17dc}',
-        '\u{17dc}'), ('\u{1820}', '\u{1877}'), ('\u{1880}', '\u{18a8}'),
-        ('\u{18aa}', '\u{18aa}'), ('\u{18b0}', '\u{18f5}'), ('\u{1900}',
-        '\u{191e}'), ('\u{1950}', '\u{196d}'), ('\u{1970}', '\u{1974}'),
-        ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{1a00}',
-        '\u{1a16}'), ('\u{1a20}', '\u{1a54}'), ('\u{1aa7}', '\u{1aa7}'),
-        ('\u{1b05}', '\u{1b33}'), ('\u{1b45}', '\u{1b4b}'), ('\u{1b83}',
-        '\u{1ba0}'), ('\u{1bae}', '\u{1baf}'), ('\u{1bba}', '\u{1be5}'),
-        ('\u{1c00}', '\u{1c23}'), ('\u{1c4d}', '\u{1c4f}'), ('\u{1c5a}',
-        '\u{1c7d}'), ('\u{1ce9}', '\u{1cec}'), ('\u{1cee}', '\u{1cf1}'),
-        ('\u{1cf5}', '\u{1cf6}'), ('\u{1d00}', '\u{1dbf}'), ('\u{1e00}',
-        '\u{1f15}'), ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'),
-        ('\u{1f48}', '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f59}',
-        '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'),
-        ('\u{1f5f}', '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}',
-        '\u{1fbc}'), ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'),
-        ('\u{1fc6}', '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}',
-        '\u{1fdb}'), ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'),
-        ('\u{1ff6}', '\u{1ffc}'), ('\u{2071}', '\u{2071}'), ('\u{207f}',
-        '\u{207f}'), ('\u{2090}', '\u{209c}'), ('\u{2102}', '\u{2102}'),
-        ('\u{2107}', '\u{2107}'), ('\u{210a}', '\u{2113}'), ('\u{2115}',
-        '\u{2115}'), ('\u{2119}', '\u{211d}'), ('\u{2124}', '\u{2124}'),
-        ('\u{2126}', '\u{2126}'), ('\u{2128}', '\u{2128}'), ('\u{212a}',
-        '\u{212d}'), ('\u{212f}', '\u{2139}'), ('\u{213c}', '\u{213f}'),
-        ('\u{2145}', '\u{2149}'), ('\u{214e}', '\u{214e}'), ('\u{2183}',
-        '\u{2184}'), ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}', '\u{2c5e}'),
-        ('\u{2c60}', '\u{2ce4}'), ('\u{2ceb}', '\u{2cee}'), ('\u{2cf2}',
-        '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'),
-        ('\u{2d2d}', '\u{2d2d}'), ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}',
-        '\u{2d6f}'), ('\u{2d80}', '\u{2d96}'), ('\u{2da0}', '\u{2da6}'),
-        ('\u{2da8}', '\u{2dae}'), ('\u{2db0}', '\u{2db6}'), ('\u{2db8}',
-        '\u{2dbe}'), ('\u{2dc0}', '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'),
-        ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}', '\u{2dde}'), ('\u{2e2f}',
-        '\u{2e2f}'), ('\u{3005}', '\u{3006}'), ('\u{3031}', '\u{3035}'),
-        ('\u{303b}', '\u{303c}'), ('\u{3041}', '\u{3096}'), ('\u{309d}',
-        '\u{309f}'), ('\u{30a1}', '\u{30fa}'), ('\u{30fc}', '\u{30ff}'),
-        ('\u{3105}', '\u{312d}'), ('\u{3131}', '\u{318e}'), ('\u{31a0}',
-        '\u{31ba}'), ('\u{31f0}', '\u{31ff}'), ('\u{3400}', '\u{4db5}'),
-        ('\u{4e00}', '\u{9fd5}'), ('\u{a000}', '\u{a48c}'), ('\u{a4d0}',
-        '\u{a4fd}'), ('\u{a500}', '\u{a60c}'), ('\u{a610}', '\u{a61f}'),
-        ('\u{a62a}', '\u{a62b}'), ('\u{a640}', '\u{a66e}'), ('\u{a67f}',
-        '\u{a69d}'), ('\u{a6a0}', '\u{a6e5}'), ('\u{a717}', '\u{a71f}'),
-        ('\u{a722}', '\u{a788}'), ('\u{a78b}', '\u{a7ad}'), ('\u{a7b0}',
-        '\u{a7b7}'), ('\u{a7f7}', '\u{a801}'), ('\u{a803}', '\u{a805}'),
-        ('\u{a807}', '\u{a80a}'), ('\u{a80c}', '\u{a822}'), ('\u{a840}',
-        '\u{a873}'), ('\u{a882}', '\u{a8b3}'), ('\u{a8f2}', '\u{a8f7}'),
-        ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}', '\u{a8fd}'), ('\u{a90a}',
-        '\u{a925}'), ('\u{a930}', '\u{a946}'), ('\u{a960}', '\u{a97c}'),
-        ('\u{a984}', '\u{a9b2}'), ('\u{a9cf}', '\u{a9cf}'), ('\u{a9e0}',
-        '\u{a9e4}'), ('\u{a9e6}', '\u{a9ef}'), ('\u{a9fa}', '\u{a9fe}'),
-        ('\u{aa00}', '\u{aa28}'), ('\u{aa40}', '\u{aa42}'), ('\u{aa44}',
-        '\u{aa4b}'), ('\u{aa60}', '\u{aa76}'), ('\u{aa7a}', '\u{aa7a}'),
-        ('\u{aa7e}', '\u{aaaf}'), ('\u{aab1}', '\u{aab1}'), ('\u{aab5}',
-        '\u{aab6}'), ('\u{aab9}', '\u{aabd}'), ('\u{aac0}', '\u{aac0}'),
-        ('\u{aac2}', '\u{aac2}'), ('\u{aadb}', '\u{aadd}'), ('\u{aae0}',
-        '\u{aaea}'), ('\u{aaf2}', '\u{aaf4}'), ('\u{ab01}', '\u{ab06}'),
-        ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'), ('\u{ab20}',
-        '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}', '\u{ab5a}'),
-        ('\u{ab5c}', '\u{ab65}'), ('\u{ab70}', '\u{abe2}'), ('\u{ac00}',
-        '\u{d7a3}'), ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'),
-        ('\u{f900}', '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'), ('\u{fb00}',
-        '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), ('\u{fb1d}', '\u{fb1d}'),
-        ('\u{fb1f}', '\u{fb28}'), ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}',
-        '\u{fb3c}'), ('\u{fb3e}', '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'),
-        ('\u{fb43}', '\u{fb44}'), ('\u{fb46}', '\u{fbb1}'), ('\u{fbd3}',
-        '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'),
-        ('\u{fdf0}', '\u{fdfb}'), ('\u{fe70}', '\u{fe74}'), ('\u{fe76}',
-        '\u{fefc}'), ('\u{ff21}', '\u{ff3a}'), ('\u{ff41}', '\u{ff5a}'),
-        ('\u{ff66}', '\u{ffbe}'), ('\u{ffc2}', '\u{ffc7}'), ('\u{ffca}',
-        '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'), ('\u{ffda}', '\u{ffdc}'),
-        ('\u{10000}', '\u{1000b}'), ('\u{1000d}', '\u{10026}'), ('\u{10028}',
-        '\u{1003a}'), ('\u{1003c}', '\u{1003d}'), ('\u{1003f}', '\u{1004d}'),
-        ('\u{10050}', '\u{1005d}'), ('\u{10080}', '\u{100fa}'), ('\u{10280}',
-        '\u{1029c}'), ('\u{102a0}', '\u{102d0}'), ('\u{10300}', '\u{1031f}'),
-        ('\u{10330}', '\u{10340}'), ('\u{10342}', '\u{10349}'), ('\u{10350}',
-        '\u{10375}'), ('\u{10380}', '\u{1039d}'), ('\u{103a0}', '\u{103c3}'),
-        ('\u{103c8}', '\u{103cf}'), ('\u{10400}', '\u{1049d}'), ('\u{10500}',
-        '\u{10527}'), ('\u{10530}', '\u{10563}'), ('\u{10600}', '\u{10736}'),
-        ('\u{10740}', '\u{10755}'), ('\u{10760}', '\u{10767}'), ('\u{10800}',
-        '\u{10805}'), ('\u{10808}', '\u{10808}'), ('\u{1080a}', '\u{10835}'),
-        ('\u{10837}', '\u{10838}'), ('\u{1083c}', '\u{1083c}'), ('\u{1083f}',
-        '\u{10855}'), ('\u{10860}', '\u{10876}'), ('\u{10880}', '\u{1089e}'),
-        ('\u{108e0}', '\u{108f2}'), ('\u{108f4}', '\u{108f5}'), ('\u{10900}',
-        '\u{10915}'), ('\u{10920}', '\u{10939}'), ('\u{10980}', '\u{109b7}'),
-        ('\u{109be}', '\u{109bf}'), ('\u{10a00}', '\u{10a00}'), ('\u{10a10}',
-        '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a33}'),
-        ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'), ('\u{10ac0}',
-        '\u{10ac7}'), ('\u{10ac9}', '\u{10ae4}'), ('\u{10b00}', '\u{10b35}'),
-        ('\u{10b40}', '\u{10b55}'), ('\u{10b60}', '\u{10b72}'), ('\u{10b80}',
-        '\u{10b91}'), ('\u{10c00}', '\u{10c48}'), ('\u{10c80}', '\u{10cb2}'),
-        ('\u{10cc0}', '\u{10cf2}'), ('\u{11003}', '\u{11037}'), ('\u{11083}',
-        '\u{110af}'), ('\u{110d0}', '\u{110e8}'), ('\u{11103}', '\u{11126}'),
-        ('\u{11150}', '\u{11172}'), ('\u{11176}', '\u{11176}'), ('\u{11183}',
-        '\u{111b2}'), ('\u{111c1}', '\u{111c4}'), ('\u{111da}', '\u{111da}'),
-        ('\u{111dc}', '\u{111dc}'), ('\u{11200}', '\u{11211}'), ('\u{11213}',
-        '\u{1122b}'), ('\u{11280}', '\u{11286}'), ('\u{11288}', '\u{11288}'),
-        ('\u{1128a}', '\u{1128d}'), ('\u{1128f}', '\u{1129d}'), ('\u{1129f}',
-        '\u{112a8}'), ('\u{112b0}', '\u{112de}'), ('\u{11305}', '\u{1130c}'),
-        ('\u{1130f}', '\u{11310}'), ('\u{11313}', '\u{11328}'), ('\u{1132a}',
-        '\u{11330}'), ('\u{11332}', '\u{11333}'), ('\u{11335}', '\u{11339}'),
-        ('\u{1133d}', '\u{1133d}'), ('\u{11350}', '\u{11350}'), ('\u{1135d}',
-        '\u{11361}'), ('\u{11480}', '\u{114af}'), ('\u{114c4}', '\u{114c5}'),
-        ('\u{114c7}', '\u{114c7}'), ('\u{11580}', '\u{115ae}'), ('\u{115d8}',
-        '\u{115db}'), ('\u{11600}', '\u{1162f}'), ('\u{11644}', '\u{11644}'),
-        ('\u{11680}', '\u{116aa}'), ('\u{11700}', '\u{11719}'), ('\u{118a0}',
-        '\u{118df}'), ('\u{118ff}', '\u{118ff}'), ('\u{11ac0}', '\u{11af8}'),
-        ('\u{12000}', '\u{12399}'), ('\u{12480}', '\u{12543}'), ('\u{13000}',
-        '\u{1342e}'), ('\u{14400}', '\u{14646}'), ('\u{16800}', '\u{16a38}'),
-        ('\u{16a40}', '\u{16a5e}'), ('\u{16ad0}', '\u{16aed}'), ('\u{16b00}',
-        '\u{16b2f}'), ('\u{16b40}', '\u{16b43}'), ('\u{16b63}', '\u{16b77}'),
-        ('\u{16b7d}', '\u{16b8f}'), ('\u{16f00}', '\u{16f44}'), ('\u{16f50}',
-        '\u{16f50}'), ('\u{16f93}', '\u{16f9f}'), ('\u{1b000}', '\u{1b001}'),
-        ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}',
-        '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'), ('\u{1d400}', '\u{1d454}'),
-        ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}',
-        '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'),
-        ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}',
-        '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'),
-        ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}',
-        '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'),
-        ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}',
-        '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}', '\u{1d6da}'),
-        ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}',
-        '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', '\u{1d76e}'),
-        ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}',
-        '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1e800}', '\u{1e8c4}'),
-        ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}', '\u{1ee1f}'), ('\u{1ee21}',
-        '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}', '\u{1ee27}'),
-        ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'), ('\u{1ee39}',
-        '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}', '\u{1ee42}'),
-        ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'), ('\u{1ee4b}',
-        '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}', '\u{1ee52}'),
-        ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}', '\u{1ee57}'), ('\u{1ee59}',
-        '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}', '\u{1ee5d}'),
-        ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'), ('\u{1ee64}',
-        '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}', '\u{1ee72}'),
-        ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'), ('\u{1ee7e}',
-        '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}', '\u{1ee9b}'),
-        ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}', '\u{1eea9}'), ('\u{1eeab}',
-        '\u{1eebb}'), ('\u{20000}', '\u{2a6d6}'), ('\u{2a700}', '\u{2b734}'),
-        ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'), ('\u{2f800}',
-        '\u{2fa1d}')
-    ];
+/// Return an iterator over the equivalence class of simple case mappings
+/// for the given codepoint. The equivalence class does not include the
+/// given codepoint.
+///
+/// If the equivalence class is empty, then this returns the next scalar
+/// value that has a non-empty equivalence class, if it exists. If no such
+/// scalar value exists, then `None` is returned. The point of this behavior
+/// is to permit callers to avoid calling `simple_fold` more than they need
+/// to, since there is some cost to fetching the equivalence class.
+pub fn simple_fold(c: char) -> result::Result<SimpleFoldIter, Option<char>> {
+    CASE_FOLDING_SIMPLE
+        .binary_search_by_key(&c, |&(c1, _)| c1)
+        .map(|i| SimpleFoldIter(CASE_FOLDING_SIMPLE[i].1.iter()))
+        .map_err(|i| {
+            if i >= CASE_FOLDING_SIMPLE.len() {
+                None
+            } else {
+                Some(CASE_FOLDING_SIMPLE[i].0)
+            }
+        })
+}
 
-    pub const LC_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{b5}', '\u{b5}'),
-        ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{1ba}'),
-        ('\u{1bc}', '\u{1bf}'), ('\u{1c4}', '\u{293}'), ('\u{295}', '\u{2af}'),
-        ('\u{370}', '\u{373}'), ('\u{376}', '\u{377}'), ('\u{37b}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'),
-        ('\u{3f7}', '\u{481}'), ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'),
-        ('\u{561}', '\u{587}'), ('\u{10a0}', '\u{10c5}'), ('\u{10c7}',
-        '\u{10c7}'), ('\u{10cd}', '\u{10cd}'), ('\u{13a0}', '\u{13f5}'),
-        ('\u{13f8}', '\u{13fd}'), ('\u{1d00}', '\u{1d2b}'), ('\u{1d6b}',
-        '\u{1d77}'), ('\u{1d79}', '\u{1d9a}'), ('\u{1e00}', '\u{1f15}'),
-        ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}',
-        '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f59}', '\u{1f59}'),
-        ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}',
-        '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'),
-        ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}',
-        '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'),
-        ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}',
-        '\u{1ffc}'), ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'),
-        ('\u{210a}', '\u{2113}'), ('\u{2115}', '\u{2115}'), ('\u{2119}',
-        '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'),
-        ('\u{2128}', '\u{2128}'), ('\u{212a}', '\u{212d}'), ('\u{212f}',
-        '\u{2134}'), ('\u{2139}', '\u{2139}'), ('\u{213c}', '\u{213f}'),
-        ('\u{2145}', '\u{2149}'), ('\u{214e}', '\u{214e}'), ('\u{2183}',
-        '\u{2184}'), ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}', '\u{2c5e}'),
-        ('\u{2c60}', '\u{2c7b}'), ('\u{2c7e}', '\u{2ce4}'), ('\u{2ceb}',
-        '\u{2cee}'), ('\u{2cf2}', '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'),
-        ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), ('\u{a640}',
-        '\u{a66d}'), ('\u{a680}', '\u{a69b}'), ('\u{a722}', '\u{a76f}'),
-        ('\u{a771}', '\u{a787}'), ('\u{a78b}', '\u{a78e}'), ('\u{a790}',
-        '\u{a7ad}'), ('\u{a7b0}', '\u{a7b7}'), ('\u{a7fa}', '\u{a7fa}'),
-        ('\u{ab30}', '\u{ab5a}'), ('\u{ab60}', '\u{ab65}'), ('\u{ab70}',
-        '\u{abbf}'), ('\u{fb00}', '\u{fb06}'), ('\u{fb13}', '\u{fb17}'),
-        ('\u{ff21}', '\u{ff3a}'), ('\u{ff41}', '\u{ff5a}'), ('\u{10400}',
-        '\u{1044f}'), ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'),
-        ('\u{118a0}', '\u{118df}'), ('\u{1d400}', '\u{1d454}'), ('\u{1d456}',
-        '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}', '\u{1d4a2}'),
-        ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'), ('\u{1d4ae}',
-        '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}', '\u{1d4c3}'),
-        ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'), ('\u{1d50d}',
-        '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}', '\u{1d539}'),
-        ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'), ('\u{1d546}',
-        '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}', '\u{1d6a5}'),
-        ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}',
-        '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}', '\u{1d734}'),
-        ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', '\u{1d76e}'), ('\u{1d770}',
-        '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}', '\u{1d7c2}'),
-        ('\u{1d7c4}', '\u{1d7cb}')
-    ];
-
-    pub const Ll_table: &'static [(char, char)] = &[
-        ('\u{61}', '\u{7a}'), ('\u{b5}', '\u{b5}'), ('\u{df}', '\u{f6}'),
-        ('\u{f8}', '\u{ff}'), ('\u{101}', '\u{101}'), ('\u{103}', '\u{103}'),
-        ('\u{105}', '\u{105}'), ('\u{107}', '\u{107}'), ('\u{109}', '\u{109}'),
-        ('\u{10b}', '\u{10b}'), ('\u{10d}', '\u{10d}'), ('\u{10f}', '\u{10f}'),
-        ('\u{111}', '\u{111}'), ('\u{113}', '\u{113}'), ('\u{115}', '\u{115}'),
-        ('\u{117}', '\u{117}'), ('\u{119}', '\u{119}'), ('\u{11b}', '\u{11b}'),
-        ('\u{11d}', '\u{11d}'), ('\u{11f}', '\u{11f}'), ('\u{121}', '\u{121}'),
-        ('\u{123}', '\u{123}'), ('\u{125}', '\u{125}'), ('\u{127}', '\u{127}'),
-        ('\u{129}', '\u{129}'), ('\u{12b}', '\u{12b}'), ('\u{12d}', '\u{12d}'),
-        ('\u{12f}', '\u{12f}'), ('\u{131}', '\u{131}'), ('\u{133}', '\u{133}'),
-        ('\u{135}', '\u{135}'), ('\u{137}', '\u{138}'), ('\u{13a}', '\u{13a}'),
-        ('\u{13c}', '\u{13c}'), ('\u{13e}', '\u{13e}'), ('\u{140}', '\u{140}'),
-        ('\u{142}', '\u{142}'), ('\u{144}', '\u{144}'), ('\u{146}', '\u{146}'),
-        ('\u{148}', '\u{149}'), ('\u{14b}', '\u{14b}'), ('\u{14d}', '\u{14d}'),
-        ('\u{14f}', '\u{14f}'), ('\u{151}', '\u{151}'), ('\u{153}', '\u{153}'),
-        ('\u{155}', '\u{155}'), ('\u{157}', '\u{157}'), ('\u{159}', '\u{159}'),
-        ('\u{15b}', '\u{15b}'), ('\u{15d}', '\u{15d}'), ('\u{15f}', '\u{15f}'),
-        ('\u{161}', '\u{161}'), ('\u{163}', '\u{163}'), ('\u{165}', '\u{165}'),
-        ('\u{167}', '\u{167}'), ('\u{169}', '\u{169}'), ('\u{16b}', '\u{16b}'),
-        ('\u{16d}', '\u{16d}'), ('\u{16f}', '\u{16f}'), ('\u{171}', '\u{171}'),
-        ('\u{173}', '\u{173}'), ('\u{175}', '\u{175}'), ('\u{177}', '\u{177}'),
-        ('\u{17a}', '\u{17a}'), ('\u{17c}', '\u{17c}'), ('\u{17e}', '\u{180}'),
-        ('\u{183}', '\u{183}'), ('\u{185}', '\u{185}'), ('\u{188}', '\u{188}'),
-        ('\u{18c}', '\u{18d}'), ('\u{192}', '\u{192}'), ('\u{195}', '\u{195}'),
-        ('\u{199}', '\u{19b}'), ('\u{19e}', '\u{19e}'), ('\u{1a1}', '\u{1a1}'),
-        ('\u{1a3}', '\u{1a3}'), ('\u{1a5}', '\u{1a5}'), ('\u{1a8}', '\u{1a8}'),
-        ('\u{1aa}', '\u{1ab}'), ('\u{1ad}', '\u{1ad}'), ('\u{1b0}', '\u{1b0}'),
-        ('\u{1b4}', '\u{1b4}'), ('\u{1b6}', '\u{1b6}'), ('\u{1b9}', '\u{1ba}'),
-        ('\u{1bd}', '\u{1bf}'), ('\u{1c6}', '\u{1c6}'), ('\u{1c9}', '\u{1c9}'),
-        ('\u{1cc}', '\u{1cc}'), ('\u{1ce}', '\u{1ce}'), ('\u{1d0}', '\u{1d0}'),
-        ('\u{1d2}', '\u{1d2}'), ('\u{1d4}', '\u{1d4}'), ('\u{1d6}', '\u{1d6}'),
-        ('\u{1d8}', '\u{1d8}'), ('\u{1da}', '\u{1da}'), ('\u{1dc}', '\u{1dd}'),
-        ('\u{1df}', '\u{1df}'), ('\u{1e1}', '\u{1e1}'), ('\u{1e3}', '\u{1e3}'),
-        ('\u{1e5}', '\u{1e5}'), ('\u{1e7}', '\u{1e7}'), ('\u{1e9}', '\u{1e9}'),
-        ('\u{1eb}', '\u{1eb}'), ('\u{1ed}', '\u{1ed}'), ('\u{1ef}', '\u{1f0}'),
-        ('\u{1f3}', '\u{1f3}'), ('\u{1f5}', '\u{1f5}'), ('\u{1f9}', '\u{1f9}'),
-        ('\u{1fb}', '\u{1fb}'), ('\u{1fd}', '\u{1fd}'), ('\u{1ff}', '\u{1ff}'),
-        ('\u{201}', '\u{201}'), ('\u{203}', '\u{203}'), ('\u{205}', '\u{205}'),
-        ('\u{207}', '\u{207}'), ('\u{209}', '\u{209}'), ('\u{20b}', '\u{20b}'),
-        ('\u{20d}', '\u{20d}'), ('\u{20f}', '\u{20f}'), ('\u{211}', '\u{211}'),
-        ('\u{213}', '\u{213}'), ('\u{215}', '\u{215}'), ('\u{217}', '\u{217}'),
-        ('\u{219}', '\u{219}'), ('\u{21b}', '\u{21b}'), ('\u{21d}', '\u{21d}'),
-        ('\u{21f}', '\u{21f}'), ('\u{221}', '\u{221}'), ('\u{223}', '\u{223}'),
-        ('\u{225}', '\u{225}'), ('\u{227}', '\u{227}'), ('\u{229}', '\u{229}'),
-        ('\u{22b}', '\u{22b}'), ('\u{22d}', '\u{22d}'), ('\u{22f}', '\u{22f}'),
-        ('\u{231}', '\u{231}'), ('\u{233}', '\u{239}'), ('\u{23c}', '\u{23c}'),
-        ('\u{23f}', '\u{240}'), ('\u{242}', '\u{242}'), ('\u{247}', '\u{247}'),
-        ('\u{249}', '\u{249}'), ('\u{24b}', '\u{24b}'), ('\u{24d}', '\u{24d}'),
-        ('\u{24f}', '\u{293}'), ('\u{295}', '\u{2af}'), ('\u{371}', '\u{371}'),
-        ('\u{373}', '\u{373}'), ('\u{377}', '\u{377}'), ('\u{37b}', '\u{37d}'),
-        ('\u{390}', '\u{390}'), ('\u{3ac}', '\u{3ce}'), ('\u{3d0}', '\u{3d1}'),
-        ('\u{3d5}', '\u{3d7}'), ('\u{3d9}', '\u{3d9}'), ('\u{3db}', '\u{3db}'),
-        ('\u{3dd}', '\u{3dd}'), ('\u{3df}', '\u{3df}'), ('\u{3e1}', '\u{3e1}'),
-        ('\u{3e3}', '\u{3e3}'), ('\u{3e5}', '\u{3e5}'), ('\u{3e7}', '\u{3e7}'),
-        ('\u{3e9}', '\u{3e9}'), ('\u{3eb}', '\u{3eb}'), ('\u{3ed}', '\u{3ed}'),
-        ('\u{3ef}', '\u{3f3}'), ('\u{3f5}', '\u{3f5}'), ('\u{3f8}', '\u{3f8}'),
-        ('\u{3fb}', '\u{3fc}'), ('\u{430}', '\u{45f}'), ('\u{461}', '\u{461}'),
-        ('\u{463}', '\u{463}'), ('\u{465}', '\u{465}'), ('\u{467}', '\u{467}'),
-        ('\u{469}', '\u{469}'), ('\u{46b}', '\u{46b}'), ('\u{46d}', '\u{46d}'),
-        ('\u{46f}', '\u{46f}'), ('\u{471}', '\u{471}'), ('\u{473}', '\u{473}'),
-        ('\u{475}', '\u{475}'), ('\u{477}', '\u{477}'), ('\u{479}', '\u{479}'),
-        ('\u{47b}', '\u{47b}'), ('\u{47d}', '\u{47d}'), ('\u{47f}', '\u{47f}'),
-        ('\u{481}', '\u{481}'), ('\u{48b}', '\u{48b}'), ('\u{48d}', '\u{48d}'),
-        ('\u{48f}', '\u{48f}'), ('\u{491}', '\u{491}'), ('\u{493}', '\u{493}'),
-        ('\u{495}', '\u{495}'), ('\u{497}', '\u{497}'), ('\u{499}', '\u{499}'),
-        ('\u{49b}', '\u{49b}'), ('\u{49d}', '\u{49d}'), ('\u{49f}', '\u{49f}'),
-        ('\u{4a1}', '\u{4a1}'), ('\u{4a3}', '\u{4a3}'), ('\u{4a5}', '\u{4a5}'),
-        ('\u{4a7}', '\u{4a7}'), ('\u{4a9}', '\u{4a9}'), ('\u{4ab}', '\u{4ab}'),
-        ('\u{4ad}', '\u{4ad}'), ('\u{4af}', '\u{4af}'), ('\u{4b1}', '\u{4b1}'),
-        ('\u{4b3}', '\u{4b3}'), ('\u{4b5}', '\u{4b5}'), ('\u{4b7}', '\u{4b7}'),
-        ('\u{4b9}', '\u{4b9}'), ('\u{4bb}', '\u{4bb}'), ('\u{4bd}', '\u{4bd}'),
-        ('\u{4bf}', '\u{4bf}'), ('\u{4c2}', '\u{4c2}'), ('\u{4c4}', '\u{4c4}'),
-        ('\u{4c6}', '\u{4c6}'), ('\u{4c8}', '\u{4c8}'), ('\u{4ca}', '\u{4ca}'),
-        ('\u{4cc}', '\u{4cc}'), ('\u{4ce}', '\u{4cf}'), ('\u{4d1}', '\u{4d1}'),
-        ('\u{4d3}', '\u{4d3}'), ('\u{4d5}', '\u{4d5}'), ('\u{4d7}', '\u{4d7}'),
-        ('\u{4d9}', '\u{4d9}'), ('\u{4db}', '\u{4db}'), ('\u{4dd}', '\u{4dd}'),
-        ('\u{4df}', '\u{4df}'), ('\u{4e1}', '\u{4e1}'), ('\u{4e3}', '\u{4e3}'),
-        ('\u{4e5}', '\u{4e5}'), ('\u{4e7}', '\u{4e7}'), ('\u{4e9}', '\u{4e9}'),
-        ('\u{4eb}', '\u{4eb}'), ('\u{4ed}', '\u{4ed}'), ('\u{4ef}', '\u{4ef}'),
-        ('\u{4f1}', '\u{4f1}'), ('\u{4f3}', '\u{4f3}'), ('\u{4f5}', '\u{4f5}'),
-        ('\u{4f7}', '\u{4f7}'), ('\u{4f9}', '\u{4f9}'), ('\u{4fb}', '\u{4fb}'),
-        ('\u{4fd}', '\u{4fd}'), ('\u{4ff}', '\u{4ff}'), ('\u{501}', '\u{501}'),
-        ('\u{503}', '\u{503}'), ('\u{505}', '\u{505}'), ('\u{507}', '\u{507}'),
-        ('\u{509}', '\u{509}'), ('\u{50b}', '\u{50b}'), ('\u{50d}', '\u{50d}'),
-        ('\u{50f}', '\u{50f}'), ('\u{511}', '\u{511}'), ('\u{513}', '\u{513}'),
-        ('\u{515}', '\u{515}'), ('\u{517}', '\u{517}'), ('\u{519}', '\u{519}'),
-        ('\u{51b}', '\u{51b}'), ('\u{51d}', '\u{51d}'), ('\u{51f}', '\u{51f}'),
-        ('\u{521}', '\u{521}'), ('\u{523}', '\u{523}'), ('\u{525}', '\u{525}'),
-        ('\u{527}', '\u{527}'), ('\u{529}', '\u{529}'), ('\u{52b}', '\u{52b}'),
-        ('\u{52d}', '\u{52d}'), ('\u{52f}', '\u{52f}'), ('\u{561}', '\u{587}'),
-        ('\u{13f8}', '\u{13fd}'), ('\u{1d00}', '\u{1d2b}'), ('\u{1d6b}',
-        '\u{1d77}'), ('\u{1d79}', '\u{1d9a}'), ('\u{1e01}', '\u{1e01}'),
-        ('\u{1e03}', '\u{1e03}'), ('\u{1e05}', '\u{1e05}'), ('\u{1e07}',
-        '\u{1e07}'), ('\u{1e09}', '\u{1e09}'), ('\u{1e0b}', '\u{1e0b}'),
-        ('\u{1e0d}', '\u{1e0d}'), ('\u{1e0f}', '\u{1e0f}'), ('\u{1e11}',
-        '\u{1e11}'), ('\u{1e13}', '\u{1e13}'), ('\u{1e15}', '\u{1e15}'),
-        ('\u{1e17}', '\u{1e17}'), ('\u{1e19}', '\u{1e19}'), ('\u{1e1b}',
-        '\u{1e1b}'), ('\u{1e1d}', '\u{1e1d}'), ('\u{1e1f}', '\u{1e1f}'),
-        ('\u{1e21}', '\u{1e21}'), ('\u{1e23}', '\u{1e23}'), ('\u{1e25}',
-        '\u{1e25}'), ('\u{1e27}', '\u{1e27}'), ('\u{1e29}', '\u{1e29}'),
-        ('\u{1e2b}', '\u{1e2b}'), ('\u{1e2d}', '\u{1e2d}'), ('\u{1e2f}',
-        '\u{1e2f}'), ('\u{1e31}', '\u{1e31}'), ('\u{1e33}', '\u{1e33}'),
-        ('\u{1e35}', '\u{1e35}'), ('\u{1e37}', '\u{1e37}'), ('\u{1e39}',
-        '\u{1e39}'), ('\u{1e3b}', '\u{1e3b}'), ('\u{1e3d}', '\u{1e3d}'),
-        ('\u{1e3f}', '\u{1e3f}'), ('\u{1e41}', '\u{1e41}'), ('\u{1e43}',
-        '\u{1e43}'), ('\u{1e45}', '\u{1e45}'), ('\u{1e47}', '\u{1e47}'),
-        ('\u{1e49}', '\u{1e49}'), ('\u{1e4b}', '\u{1e4b}'), ('\u{1e4d}',
-        '\u{1e4d}'), ('\u{1e4f}', '\u{1e4f}'), ('\u{1e51}', '\u{1e51}'),
-        ('\u{1e53}', '\u{1e53}'), ('\u{1e55}', '\u{1e55}'), ('\u{1e57}',
-        '\u{1e57}'), ('\u{1e59}', '\u{1e59}'), ('\u{1e5b}', '\u{1e5b}'),
-        ('\u{1e5d}', '\u{1e5d}'), ('\u{1e5f}', '\u{1e5f}'), ('\u{1e61}',
-        '\u{1e61}'), ('\u{1e63}', '\u{1e63}'), ('\u{1e65}', '\u{1e65}'),
-        ('\u{1e67}', '\u{1e67}'), ('\u{1e69}', '\u{1e69}'), ('\u{1e6b}',
-        '\u{1e6b}'), ('\u{1e6d}', '\u{1e6d}'), ('\u{1e6f}', '\u{1e6f}'),
-        ('\u{1e71}', '\u{1e71}'), ('\u{1e73}', '\u{1e73}'), ('\u{1e75}',
-        '\u{1e75}'), ('\u{1e77}', '\u{1e77}'), ('\u{1e79}', '\u{1e79}'),
-        ('\u{1e7b}', '\u{1e7b}'), ('\u{1e7d}', '\u{1e7d}'), ('\u{1e7f}',
-        '\u{1e7f}'), ('\u{1e81}', '\u{1e81}'), ('\u{1e83}', '\u{1e83}'),
-        ('\u{1e85}', '\u{1e85}'), ('\u{1e87}', '\u{1e87}'), ('\u{1e89}',
-        '\u{1e89}'), ('\u{1e8b}', '\u{1e8b}'), ('\u{1e8d}', '\u{1e8d}'),
-        ('\u{1e8f}', '\u{1e8f}'), ('\u{1e91}', '\u{1e91}'), ('\u{1e93}',
-        '\u{1e93}'), ('\u{1e95}', '\u{1e9d}'), ('\u{1e9f}', '\u{1e9f}'),
-        ('\u{1ea1}', '\u{1ea1}'), ('\u{1ea3}', '\u{1ea3}'), ('\u{1ea5}',
-        '\u{1ea5}'), ('\u{1ea7}', '\u{1ea7}'), ('\u{1ea9}', '\u{1ea9}'),
-        ('\u{1eab}', '\u{1eab}'), ('\u{1ead}', '\u{1ead}'), ('\u{1eaf}',
-        '\u{1eaf}'), ('\u{1eb1}', '\u{1eb1}'), ('\u{1eb3}', '\u{1eb3}'),
-        ('\u{1eb5}', '\u{1eb5}'), ('\u{1eb7}', '\u{1eb7}'), ('\u{1eb9}',
-        '\u{1eb9}'), ('\u{1ebb}', '\u{1ebb}'), ('\u{1ebd}', '\u{1ebd}'),
-        ('\u{1ebf}', '\u{1ebf}'), ('\u{1ec1}', '\u{1ec1}'), ('\u{1ec3}',
-        '\u{1ec3}'), ('\u{1ec5}', '\u{1ec5}'), ('\u{1ec7}', '\u{1ec7}'),
-        ('\u{1ec9}', '\u{1ec9}'), ('\u{1ecb}', '\u{1ecb}'), ('\u{1ecd}',
-        '\u{1ecd}'), ('\u{1ecf}', '\u{1ecf}'), ('\u{1ed1}', '\u{1ed1}'),
-        ('\u{1ed3}', '\u{1ed3}'), ('\u{1ed5}', '\u{1ed5}'), ('\u{1ed7}',
-        '\u{1ed7}'), ('\u{1ed9}', '\u{1ed9}'), ('\u{1edb}', '\u{1edb}'),
-        ('\u{1edd}', '\u{1edd}'), ('\u{1edf}', '\u{1edf}'), ('\u{1ee1}',
-        '\u{1ee1}'), ('\u{1ee3}', '\u{1ee3}'), ('\u{1ee5}', '\u{1ee5}'),
-        ('\u{1ee7}', '\u{1ee7}'), ('\u{1ee9}', '\u{1ee9}'), ('\u{1eeb}',
-        '\u{1eeb}'), ('\u{1eed}', '\u{1eed}'), ('\u{1eef}', '\u{1eef}'),
-        ('\u{1ef1}', '\u{1ef1}'), ('\u{1ef3}', '\u{1ef3}'), ('\u{1ef5}',
-        '\u{1ef5}'), ('\u{1ef7}', '\u{1ef7}'), ('\u{1ef9}', '\u{1ef9}'),
-        ('\u{1efb}', '\u{1efb}'), ('\u{1efd}', '\u{1efd}'), ('\u{1eff}',
-        '\u{1f07}'), ('\u{1f10}', '\u{1f15}'), ('\u{1f20}', '\u{1f27}'),
-        ('\u{1f30}', '\u{1f37}'), ('\u{1f40}', '\u{1f45}'), ('\u{1f50}',
-        '\u{1f57}'), ('\u{1f60}', '\u{1f67}'), ('\u{1f70}', '\u{1f7d}'),
-        ('\u{1f80}', '\u{1f87}'), ('\u{1f90}', '\u{1f97}'), ('\u{1fa0}',
-        '\u{1fa7}'), ('\u{1fb0}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fb7}'),
-        ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}',
-        '\u{1fc7}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}', '\u{1fd7}'),
-        ('\u{1fe0}', '\u{1fe7}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}',
-        '\u{1ff7}'), ('\u{210a}', '\u{210a}'), ('\u{210e}', '\u{210f}'),
-        ('\u{2113}', '\u{2113}'), ('\u{212f}', '\u{212f}'), ('\u{2134}',
-        '\u{2134}'), ('\u{2139}', '\u{2139}'), ('\u{213c}', '\u{213d}'),
-        ('\u{2146}', '\u{2149}'), ('\u{214e}', '\u{214e}'), ('\u{2184}',
-        '\u{2184}'), ('\u{2c30}', '\u{2c5e}'), ('\u{2c61}', '\u{2c61}'),
-        ('\u{2c65}', '\u{2c66}'), ('\u{2c68}', '\u{2c68}'), ('\u{2c6a}',
-        '\u{2c6a}'), ('\u{2c6c}', '\u{2c6c}'), ('\u{2c71}', '\u{2c71}'),
-        ('\u{2c73}', '\u{2c74}'), ('\u{2c76}', '\u{2c7b}'), ('\u{2c81}',
-        '\u{2c81}'), ('\u{2c83}', '\u{2c83}'), ('\u{2c85}', '\u{2c85}'),
-        ('\u{2c87}', '\u{2c87}'), ('\u{2c89}', '\u{2c89}'), ('\u{2c8b}',
-        '\u{2c8b}'), ('\u{2c8d}', '\u{2c8d}'), ('\u{2c8f}', '\u{2c8f}'),
-        ('\u{2c91}', '\u{2c91}'), ('\u{2c93}', '\u{2c93}'), ('\u{2c95}',
-        '\u{2c95}'), ('\u{2c97}', '\u{2c97}'), ('\u{2c99}', '\u{2c99}'),
-        ('\u{2c9b}', '\u{2c9b}'), ('\u{2c9d}', '\u{2c9d}'), ('\u{2c9f}',
-        '\u{2c9f}'), ('\u{2ca1}', '\u{2ca1}'), ('\u{2ca3}', '\u{2ca3}'),
-        ('\u{2ca5}', '\u{2ca5}'), ('\u{2ca7}', '\u{2ca7}'), ('\u{2ca9}',
-        '\u{2ca9}'), ('\u{2cab}', '\u{2cab}'), ('\u{2cad}', '\u{2cad}'),
-        ('\u{2caf}', '\u{2caf}'), ('\u{2cb1}', '\u{2cb1}'), ('\u{2cb3}',
-        '\u{2cb3}'), ('\u{2cb5}', '\u{2cb5}'), ('\u{2cb7}', '\u{2cb7}'),
-        ('\u{2cb9}', '\u{2cb9}'), ('\u{2cbb}', '\u{2cbb}'), ('\u{2cbd}',
-        '\u{2cbd}'), ('\u{2cbf}', '\u{2cbf}'), ('\u{2cc1}', '\u{2cc1}'),
-        ('\u{2cc3}', '\u{2cc3}'), ('\u{2cc5}', '\u{2cc5}'), ('\u{2cc7}',
-        '\u{2cc7}'), ('\u{2cc9}', '\u{2cc9}'), ('\u{2ccb}', '\u{2ccb}'),
-        ('\u{2ccd}', '\u{2ccd}'), ('\u{2ccf}', '\u{2ccf}'), ('\u{2cd1}',
-        '\u{2cd1}'), ('\u{2cd3}', '\u{2cd3}'), ('\u{2cd5}', '\u{2cd5}'),
-        ('\u{2cd7}', '\u{2cd7}'), ('\u{2cd9}', '\u{2cd9}'), ('\u{2cdb}',
-        '\u{2cdb}'), ('\u{2cdd}', '\u{2cdd}'), ('\u{2cdf}', '\u{2cdf}'),
-        ('\u{2ce1}', '\u{2ce1}'), ('\u{2ce3}', '\u{2ce4}'), ('\u{2cec}',
-        '\u{2cec}'), ('\u{2cee}', '\u{2cee}'), ('\u{2cf3}', '\u{2cf3}'),
-        ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}',
-        '\u{2d2d}'), ('\u{a641}', '\u{a641}'), ('\u{a643}', '\u{a643}'),
-        ('\u{a645}', '\u{a645}'), ('\u{a647}', '\u{a647}'), ('\u{a649}',
-        '\u{a649}'), ('\u{a64b}', '\u{a64b}'), ('\u{a64d}', '\u{a64d}'),
-        ('\u{a64f}', '\u{a64f}'), ('\u{a651}', '\u{a651}'), ('\u{a653}',
-        '\u{a653}'), ('\u{a655}', '\u{a655}'), ('\u{a657}', '\u{a657}'),
-        ('\u{a659}', '\u{a659}'), ('\u{a65b}', '\u{a65b}'), ('\u{a65d}',
-        '\u{a65d}'), ('\u{a65f}', '\u{a65f}'), ('\u{a661}', '\u{a661}'),
-        ('\u{a663}', '\u{a663}'), ('\u{a665}', '\u{a665}'), ('\u{a667}',
-        '\u{a667}'), ('\u{a669}', '\u{a669}'), ('\u{a66b}', '\u{a66b}'),
-        ('\u{a66d}', '\u{a66d}'), ('\u{a681}', '\u{a681}'), ('\u{a683}',
-        '\u{a683}'), ('\u{a685}', '\u{a685}'), ('\u{a687}', '\u{a687}'),
-        ('\u{a689}', '\u{a689}'), ('\u{a68b}', '\u{a68b}'), ('\u{a68d}',
-        '\u{a68d}'), ('\u{a68f}', '\u{a68f}'), ('\u{a691}', '\u{a691}'),
-        ('\u{a693}', '\u{a693}'), ('\u{a695}', '\u{a695}'), ('\u{a697}',
-        '\u{a697}'), ('\u{a699}', '\u{a699}'), ('\u{a69b}', '\u{a69b}'),
-        ('\u{a723}', '\u{a723}'), ('\u{a725}', '\u{a725}'), ('\u{a727}',
-        '\u{a727}'), ('\u{a729}', '\u{a729}'), ('\u{a72b}', '\u{a72b}'),
-        ('\u{a72d}', '\u{a72d}'), ('\u{a72f}', '\u{a731}'), ('\u{a733}',
-        '\u{a733}'), ('\u{a735}', '\u{a735}'), ('\u{a737}', '\u{a737}'),
-        ('\u{a739}', '\u{a739}'), ('\u{a73b}', '\u{a73b}'), ('\u{a73d}',
-        '\u{a73d}'), ('\u{a73f}', '\u{a73f}'), ('\u{a741}', '\u{a741}'),
-        ('\u{a743}', '\u{a743}'), ('\u{a745}', '\u{a745}'), ('\u{a747}',
-        '\u{a747}'), ('\u{a749}', '\u{a749}'), ('\u{a74b}', '\u{a74b}'),
-        ('\u{a74d}', '\u{a74d}'), ('\u{a74f}', '\u{a74f}'), ('\u{a751}',
-        '\u{a751}'), ('\u{a753}', '\u{a753}'), ('\u{a755}', '\u{a755}'),
-        ('\u{a757}', '\u{a757}'), ('\u{a759}', '\u{a759}'), ('\u{a75b}',
-        '\u{a75b}'), ('\u{a75d}', '\u{a75d}'), ('\u{a75f}', '\u{a75f}'),
-        ('\u{a761}', '\u{a761}'), ('\u{a763}', '\u{a763}'), ('\u{a765}',
-        '\u{a765}'), ('\u{a767}', '\u{a767}'), ('\u{a769}', '\u{a769}'),
-        ('\u{a76b}', '\u{a76b}'), ('\u{a76d}', '\u{a76d}'), ('\u{a76f}',
-        '\u{a76f}'), ('\u{a771}', '\u{a778}'), ('\u{a77a}', '\u{a77a}'),
-        ('\u{a77c}', '\u{a77c}'), ('\u{a77f}', '\u{a77f}'), ('\u{a781}',
-        '\u{a781}'), ('\u{a783}', '\u{a783}'), ('\u{a785}', '\u{a785}'),
-        ('\u{a787}', '\u{a787}'), ('\u{a78c}', '\u{a78c}'), ('\u{a78e}',
-        '\u{a78e}'), ('\u{a791}', '\u{a791}'), ('\u{a793}', '\u{a795}'),
-        ('\u{a797}', '\u{a797}'), ('\u{a799}', '\u{a799}'), ('\u{a79b}',
-        '\u{a79b}'), ('\u{a79d}', '\u{a79d}'), ('\u{a79f}', '\u{a79f}'),
-        ('\u{a7a1}', '\u{a7a1}'), ('\u{a7a3}', '\u{a7a3}'), ('\u{a7a5}',
-        '\u{a7a5}'), ('\u{a7a7}', '\u{a7a7}'), ('\u{a7a9}', '\u{a7a9}'),
-        ('\u{a7b5}', '\u{a7b5}'), ('\u{a7b7}', '\u{a7b7}'), ('\u{a7fa}',
-        '\u{a7fa}'), ('\u{ab30}', '\u{ab5a}'), ('\u{ab60}', '\u{ab65}'),
-        ('\u{ab70}', '\u{abbf}'), ('\u{fb00}', '\u{fb06}'), ('\u{fb13}',
-        '\u{fb17}'), ('\u{ff41}', '\u{ff5a}'), ('\u{10428}', '\u{1044f}'),
-        ('\u{10cc0}', '\u{10cf2}'), ('\u{118c0}', '\u{118df}'), ('\u{1d41a}',
-        '\u{1d433}'), ('\u{1d44e}', '\u{1d454}'), ('\u{1d456}', '\u{1d467}'),
-        ('\u{1d482}', '\u{1d49b}'), ('\u{1d4b6}', '\u{1d4b9}'), ('\u{1d4bb}',
-        '\u{1d4bb}'), ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d4cf}'),
-        ('\u{1d4ea}', '\u{1d503}'), ('\u{1d51e}', '\u{1d537}'), ('\u{1d552}',
-        '\u{1d56b}'), ('\u{1d586}', '\u{1d59f}'), ('\u{1d5ba}', '\u{1d5d3}'),
-        ('\u{1d5ee}', '\u{1d607}'), ('\u{1d622}', '\u{1d63b}'), ('\u{1d656}',
-        '\u{1d66f}'), ('\u{1d68a}', '\u{1d6a5}'), ('\u{1d6c2}', '\u{1d6da}'),
-        ('\u{1d6dc}', '\u{1d6e1}'), ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}',
-        '\u{1d71b}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}', '\u{1d755}'),
-        ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d78f}'), ('\u{1d7aa}',
-        '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7c9}'), ('\u{1d7cb}', '\u{1d7cb}')
-    ];
-
-    pub const Lm_table: &'static [(char, char)] = &[
-        ('\u{2b0}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'), ('\u{2e0}', '\u{2e4}'),
-        ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'), ('\u{374}', '\u{374}'),
-        ('\u{37a}', '\u{37a}'), ('\u{559}', '\u{559}'), ('\u{640}', '\u{640}'),
-        ('\u{6e5}', '\u{6e6}'), ('\u{7f4}', '\u{7f5}'), ('\u{7fa}', '\u{7fa}'),
-        ('\u{81a}', '\u{81a}'), ('\u{824}', '\u{824}'), ('\u{828}', '\u{828}'),
-        ('\u{971}', '\u{971}'), ('\u{e46}', '\u{e46}'), ('\u{ec6}', '\u{ec6}'),
-        ('\u{10fc}', '\u{10fc}'), ('\u{17d7}', '\u{17d7}'), ('\u{1843}',
-        '\u{1843}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1c78}', '\u{1c7d}'),
-        ('\u{1d2c}', '\u{1d6a}'), ('\u{1d78}', '\u{1d78}'), ('\u{1d9b}',
-        '\u{1dbf}'), ('\u{2071}', '\u{2071}'), ('\u{207f}', '\u{207f}'),
-        ('\u{2090}', '\u{209c}'), ('\u{2c7c}', '\u{2c7d}'), ('\u{2d6f}',
-        '\u{2d6f}'), ('\u{2e2f}', '\u{2e2f}'), ('\u{3005}', '\u{3005}'),
-        ('\u{3031}', '\u{3035}'), ('\u{303b}', '\u{303b}'), ('\u{309d}',
-        '\u{309e}'), ('\u{30fc}', '\u{30fe}'), ('\u{a015}', '\u{a015}'),
-        ('\u{a4f8}', '\u{a4fd}'), ('\u{a60c}', '\u{a60c}'), ('\u{a67f}',
-        '\u{a67f}'), ('\u{a69c}', '\u{a69d}'), ('\u{a717}', '\u{a71f}'),
-        ('\u{a770}', '\u{a770}'), ('\u{a788}', '\u{a788}'), ('\u{a7f8}',
-        '\u{a7f9}'), ('\u{a9cf}', '\u{a9cf}'), ('\u{a9e6}', '\u{a9e6}'),
-        ('\u{aa70}', '\u{aa70}'), ('\u{aadd}', '\u{aadd}'), ('\u{aaf3}',
-        '\u{aaf4}'), ('\u{ab5c}', '\u{ab5f}'), ('\u{ff70}', '\u{ff70}'),
-        ('\u{ff9e}', '\u{ff9f}'), ('\u{16b40}', '\u{16b43}'), ('\u{16f93}',
-        '\u{16f9f}')
-    ];
+/// Returns true if and only if the given (inclusive) range contains at least
+/// one Unicode scalar value that has a non-empty non-trivial simple case
+/// mapping.
+///
+/// This function panics if `end < start`.
+pub fn contains_simple_case_mapping(start: char, end: char) -> bool {
+    assert!(start <= end);
+    CASE_FOLDING_SIMPLE
+        .binary_search_by(|&(c, _)| {
+            if start <= c && c <= end {
+                Ordering::Equal
+            } else if c > end {
+                Ordering::Greater
+            } else {
+                Ordering::Less
+            }
+        }).is_ok()
+}
 
-    pub const Lo_table: &'static [(char, char)] = &[
-        ('\u{aa}', '\u{aa}'), ('\u{ba}', '\u{ba}'), ('\u{1bb}', '\u{1bb}'),
-        ('\u{1c0}', '\u{1c3}'), ('\u{294}', '\u{294}'), ('\u{5d0}', '\u{5ea}'),
-        ('\u{5f0}', '\u{5f2}'), ('\u{620}', '\u{63f}'), ('\u{641}', '\u{64a}'),
-        ('\u{66e}', '\u{66f}'), ('\u{671}', '\u{6d3}'), ('\u{6d5}', '\u{6d5}'),
-        ('\u{6ee}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'),
-        ('\u{710}', '\u{710}'), ('\u{712}', '\u{72f}'), ('\u{74d}', '\u{7a5}'),
-        ('\u{7b1}', '\u{7b1}'), ('\u{7ca}', '\u{7ea}'), ('\u{800}', '\u{815}'),
-        ('\u{840}', '\u{858}'), ('\u{8a0}', '\u{8b4}'), ('\u{904}', '\u{939}'),
-        ('\u{93d}', '\u{93d}'), ('\u{950}', '\u{950}'), ('\u{958}', '\u{961}'),
-        ('\u{972}', '\u{980}'), ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'),
-        ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'),
-        ('\u{9b6}', '\u{9b9}'), ('\u{9bd}', '\u{9bd}'), ('\u{9ce}', '\u{9ce}'),
-        ('\u{9dc}', '\u{9dd}'), ('\u{9df}', '\u{9e1}'), ('\u{9f0}', '\u{9f1}'),
-        ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'),
-        ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'),
-        ('\u{a38}', '\u{a39}'), ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'),
-        ('\u{a72}', '\u{a74}'), ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'),
-        ('\u{a93}', '\u{aa8}'), ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'),
-        ('\u{ab5}', '\u{ab9}'), ('\u{abd}', '\u{abd}'), ('\u{ad0}', '\u{ad0}'),
-        ('\u{ae0}', '\u{ae1}'), ('\u{af9}', '\u{af9}'), ('\u{b05}', '\u{b0c}'),
-        ('\u{b0f}', '\u{b10}'), ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'),
-        ('\u{b32}', '\u{b33}'), ('\u{b35}', '\u{b39}'), ('\u{b3d}', '\u{b3d}'),
-        ('\u{b5c}', '\u{b5d}'), ('\u{b5f}', '\u{b61}'), ('\u{b71}', '\u{b71}'),
-        ('\u{b83}', '\u{b83}'), ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'),
-        ('\u{b92}', '\u{b95}'), ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'),
-        ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'),
-        ('\u{bae}', '\u{bb9}'), ('\u{bd0}', '\u{bd0}'), ('\u{c05}', '\u{c0c}'),
-        ('\u{c0e}', '\u{c10}'), ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'),
-        ('\u{c3d}', '\u{c3d}'), ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c61}'),
-        ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'),
-        ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'), ('\u{cbd}', '\u{cbd}'),
-        ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce1}'), ('\u{cf1}', '\u{cf2}'),
-        ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d3a}'),
-        ('\u{d3d}', '\u{d3d}'), ('\u{d4e}', '\u{d4e}'), ('\u{d5f}', '\u{d61}'),
-        ('\u{d7a}', '\u{d7f}'), ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'),
-        ('\u{db3}', '\u{dbb}'), ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'),
-        ('\u{e01}', '\u{e30}'), ('\u{e32}', '\u{e33}'), ('\u{e40}', '\u{e45}'),
-        ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'), ('\u{e87}', '\u{e88}'),
-        ('\u{e8a}', '\u{e8a}'), ('\u{e8d}', '\u{e8d}'), ('\u{e94}', '\u{e97}'),
-        ('\u{e99}', '\u{e9f}'), ('\u{ea1}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'),
-        ('\u{ea7}', '\u{ea7}'), ('\u{eaa}', '\u{eab}'), ('\u{ead}', '\u{eb0}'),
-        ('\u{eb2}', '\u{eb3}'), ('\u{ebd}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'),
-        ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'), ('\u{f40}', '\u{f47}'),
-        ('\u{f49}', '\u{f6c}'), ('\u{f88}', '\u{f8c}'), ('\u{1000}',
-        '\u{102a}'), ('\u{103f}', '\u{103f}'), ('\u{1050}', '\u{1055}'),
-        ('\u{105a}', '\u{105d}'), ('\u{1061}', '\u{1061}'), ('\u{1065}',
-        '\u{1066}'), ('\u{106e}', '\u{1070}'), ('\u{1075}', '\u{1081}'),
-        ('\u{108e}', '\u{108e}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fd}',
-        '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}', '\u{1256}'),
-        ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'), ('\u{1260}',
-        '\u{1288}'), ('\u{128a}', '\u{128d}'), ('\u{1290}', '\u{12b0}'),
-        ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'), ('\u{12c0}',
-        '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'),
-        ('\u{12d8}', '\u{1310}'), ('\u{1312}', '\u{1315}'), ('\u{1318}',
-        '\u{135a}'), ('\u{1380}', '\u{138f}'), ('\u{1401}', '\u{166c}'),
-        ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'), ('\u{16a0}',
-        '\u{16ea}'), ('\u{16f1}', '\u{16f8}'), ('\u{1700}', '\u{170c}'),
-        ('\u{170e}', '\u{1711}'), ('\u{1720}', '\u{1731}'), ('\u{1740}',
-        '\u{1751}'), ('\u{1760}', '\u{176c}'), ('\u{176e}', '\u{1770}'),
-        ('\u{1780}', '\u{17b3}'), ('\u{17dc}', '\u{17dc}'), ('\u{1820}',
-        '\u{1842}'), ('\u{1844}', '\u{1877}'), ('\u{1880}', '\u{18a8}'),
-        ('\u{18aa}', '\u{18aa}'), ('\u{18b0}', '\u{18f5}'), ('\u{1900}',
-        '\u{191e}'), ('\u{1950}', '\u{196d}'), ('\u{1970}', '\u{1974}'),
-        ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{1a00}',
-        '\u{1a16}'), ('\u{1a20}', '\u{1a54}'), ('\u{1b05}', '\u{1b33}'),
-        ('\u{1b45}', '\u{1b4b}'), ('\u{1b83}', '\u{1ba0}'), ('\u{1bae}',
-        '\u{1baf}'), ('\u{1bba}', '\u{1be5}'), ('\u{1c00}', '\u{1c23}'),
-        ('\u{1c4d}', '\u{1c4f}'), ('\u{1c5a}', '\u{1c77}'), ('\u{1ce9}',
-        '\u{1cec}'), ('\u{1cee}', '\u{1cf1}'), ('\u{1cf5}', '\u{1cf6}'),
-        ('\u{2135}', '\u{2138}'), ('\u{2d30}', '\u{2d67}'), ('\u{2d80}',
-        '\u{2d96}'), ('\u{2da0}', '\u{2da6}'), ('\u{2da8}', '\u{2dae}'),
-        ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}',
-        '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'),
-        ('\u{2dd8}', '\u{2dde}'), ('\u{3006}', '\u{3006}'), ('\u{303c}',
-        '\u{303c}'), ('\u{3041}', '\u{3096}'), ('\u{309f}', '\u{309f}'),
-        ('\u{30a1}', '\u{30fa}'), ('\u{30ff}', '\u{30ff}'), ('\u{3105}',
-        '\u{312d}'), ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31ba}'),
-        ('\u{31f0}', '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}',
-        '\u{9fd5}'), ('\u{a000}', '\u{a014}'), ('\u{a016}', '\u{a48c}'),
-        ('\u{a4d0}', '\u{a4f7}'), ('\u{a500}', '\u{a60b}'), ('\u{a610}',
-        '\u{a61f}'), ('\u{a62a}', '\u{a62b}'), ('\u{a66e}', '\u{a66e}'),
-        ('\u{a6a0}', '\u{a6e5}'), ('\u{a78f}', '\u{a78f}'), ('\u{a7f7}',
-        '\u{a7f7}'), ('\u{a7fb}', '\u{a801}'), ('\u{a803}', '\u{a805}'),
-        ('\u{a807}', '\u{a80a}'), ('\u{a80c}', '\u{a822}'), ('\u{a840}',
-        '\u{a873}'), ('\u{a882}', '\u{a8b3}'), ('\u{a8f2}', '\u{a8f7}'),
-        ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}', '\u{a8fd}'), ('\u{a90a}',
-        '\u{a925}'), ('\u{a930}', '\u{a946}'), ('\u{a960}', '\u{a97c}'),
-        ('\u{a984}', '\u{a9b2}'), ('\u{a9e0}', '\u{a9e4}'), ('\u{a9e7}',
-        '\u{a9ef}'), ('\u{a9fa}', '\u{a9fe}'), ('\u{aa00}', '\u{aa28}'),
-        ('\u{aa40}', '\u{aa42}'), ('\u{aa44}', '\u{aa4b}'), ('\u{aa60}',
-        '\u{aa6f}'), ('\u{aa71}', '\u{aa76}'), ('\u{aa7a}', '\u{aa7a}'),
-        ('\u{aa7e}', '\u{aaaf}'), ('\u{aab1}', '\u{aab1}'), ('\u{aab5}',
-        '\u{aab6}'), ('\u{aab9}', '\u{aabd}'), ('\u{aac0}', '\u{aac0}'),
-        ('\u{aac2}', '\u{aac2}'), ('\u{aadb}', '\u{aadc}'), ('\u{aae0}',
-        '\u{aaea}'), ('\u{aaf2}', '\u{aaf2}'), ('\u{ab01}', '\u{ab06}'),
-        ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'), ('\u{ab20}',
-        '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{abc0}', '\u{abe2}'),
-        ('\u{ac00}', '\u{d7a3}'), ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}',
-        '\u{d7fb}'), ('\u{f900}', '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'),
-        ('\u{fb1d}', '\u{fb1d}'), ('\u{fb1f}', '\u{fb28}'), ('\u{fb2a}',
-        '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}', '\u{fb3e}'),
-        ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'), ('\u{fb46}',
-        '\u{fbb1}'), ('\u{fbd3}', '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'),
-        ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfb}'), ('\u{fe70}',
-        '\u{fe74}'), ('\u{fe76}', '\u{fefc}'), ('\u{ff66}', '\u{ff6f}'),
-        ('\u{ff71}', '\u{ff9d}'), ('\u{ffa0}', '\u{ffbe}'), ('\u{ffc2}',
-        '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'),
-        ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
-        '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'),
-        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}',
-        '\u{100fa}'), ('\u{10280}', '\u{1029c}'), ('\u{102a0}', '\u{102d0}'),
-        ('\u{10300}', '\u{1031f}'), ('\u{10330}', '\u{10340}'), ('\u{10342}',
-        '\u{10349}'), ('\u{10350}', '\u{10375}'), ('\u{10380}', '\u{1039d}'),
-        ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{10450}',
-        '\u{1049d}'), ('\u{10500}', '\u{10527}'), ('\u{10530}', '\u{10563}'),
-        ('\u{10600}', '\u{10736}'), ('\u{10740}', '\u{10755}'), ('\u{10760}',
-        '\u{10767}'), ('\u{10800}', '\u{10805}'), ('\u{10808}', '\u{10808}'),
-        ('\u{1080a}', '\u{10835}'), ('\u{10837}', '\u{10838}'), ('\u{1083c}',
-        '\u{1083c}'), ('\u{1083f}', '\u{10855}'), ('\u{10860}', '\u{10876}'),
-        ('\u{10880}', '\u{1089e}'), ('\u{108e0}', '\u{108f2}'), ('\u{108f4}',
-        '\u{108f5}'), ('\u{10900}', '\u{10915}'), ('\u{10920}', '\u{10939}'),
-        ('\u{10980}', '\u{109b7}'), ('\u{109be}', '\u{109bf}'), ('\u{10a00}',
-        '\u{10a00}'), ('\u{10a10}', '\u{10a13}'), ('\u{10a15}', '\u{10a17}'),
-        ('\u{10a19}', '\u{10a33}'), ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}',
-        '\u{10a9c}'), ('\u{10ac0}', '\u{10ac7}'), ('\u{10ac9}', '\u{10ae4}'),
-        ('\u{10b00}', '\u{10b35}'), ('\u{10b40}', '\u{10b55}'), ('\u{10b60}',
-        '\u{10b72}'), ('\u{10b80}', '\u{10b91}'), ('\u{10c00}', '\u{10c48}'),
-        ('\u{11003}', '\u{11037}'), ('\u{11083}', '\u{110af}'), ('\u{110d0}',
-        '\u{110e8}'), ('\u{11103}', '\u{11126}'), ('\u{11150}', '\u{11172}'),
-        ('\u{11176}', '\u{11176}'), ('\u{11183}', '\u{111b2}'), ('\u{111c1}',
-        '\u{111c4}'), ('\u{111da}', '\u{111da}'), ('\u{111dc}', '\u{111dc}'),
-        ('\u{11200}', '\u{11211}'), ('\u{11213}', '\u{1122b}'), ('\u{11280}',
-        '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}', '\u{1128d}'),
-        ('\u{1128f}', '\u{1129d}'), ('\u{1129f}', '\u{112a8}'), ('\u{112b0}',
-        '\u{112de}'), ('\u{11305}', '\u{1130c}'), ('\u{1130f}', '\u{11310}'),
-        ('\u{11313}', '\u{11328}'), ('\u{1132a}', '\u{11330}'), ('\u{11332}',
-        '\u{11333}'), ('\u{11335}', '\u{11339}'), ('\u{1133d}', '\u{1133d}'),
-        ('\u{11350}', '\u{11350}'), ('\u{1135d}', '\u{11361}'), ('\u{11480}',
-        '\u{114af}'), ('\u{114c4}', '\u{114c5}'), ('\u{114c7}', '\u{114c7}'),
-        ('\u{11580}', '\u{115ae}'), ('\u{115d8}', '\u{115db}'), ('\u{11600}',
-        '\u{1162f}'), ('\u{11644}', '\u{11644}'), ('\u{11680}', '\u{116aa}'),
-        ('\u{11700}', '\u{11719}'), ('\u{118ff}', '\u{118ff}'), ('\u{11ac0}',
-        '\u{11af8}'), ('\u{12000}', '\u{12399}'), ('\u{12480}', '\u{12543}'),
-        ('\u{13000}', '\u{1342e}'), ('\u{14400}', '\u{14646}'), ('\u{16800}',
-        '\u{16a38}'), ('\u{16a40}', '\u{16a5e}'), ('\u{16ad0}', '\u{16aed}'),
-        ('\u{16b00}', '\u{16b2f}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}',
-        '\u{16b8f}'), ('\u{16f00}', '\u{16f44}'), ('\u{16f50}', '\u{16f50}'),
-        ('\u{1b000}', '\u{1b001}'), ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}',
-        '\u{1bc7c}'), ('\u{1bc80}', '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'),
-        ('\u{1e800}', '\u{1e8c4}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}',
-        '\u{1ee1f}'), ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'),
-        ('\u{1ee27}', '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}',
-        '\u{1ee37}'), ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'),
-        ('\u{1ee42}', '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}',
-        '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'),
-        ('\u{1ee51}', '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}',
-        '\u{1ee57}'), ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'),
-        ('\u{1ee5d}', '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}',
-        '\u{1ee62}'), ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'),
-        ('\u{1ee6c}', '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}',
-        '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'),
-        ('\u{1ee8b}', '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}',
-        '\u{1eea9}'), ('\u{1eeab}', '\u{1eebb}'), ('\u{20000}', '\u{2a6d6}'),
-        ('\u{2a700}', '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}',
-        '\u{2cea1}'), ('\u{2f800}', '\u{2fa1d}')
-    ];
-
-    pub const Lt_table: &'static [(char, char)] = &[
-        ('\u{1c5}', '\u{1c5}'), ('\u{1c8}', '\u{1c8}'), ('\u{1cb}', '\u{1cb}'),
-        ('\u{1f2}', '\u{1f2}'), ('\u{1f88}', '\u{1f8f}'), ('\u{1f98}',
-        '\u{1f9f}'), ('\u{1fa8}', '\u{1faf}'), ('\u{1fbc}', '\u{1fbc}'),
-        ('\u{1fcc}', '\u{1fcc}'), ('\u{1ffc}', '\u{1ffc}')
-    ];
+/// A query for finding a character class defined by Unicode. This supports
+/// either use of a property name directly, or lookup by property value. The
+/// former generally refers to Binary properties (see UTS#44, Table 8), but
+/// as a special exception (see UTS#18, Section 1.2) both general categories
+/// (an enumeration) and scripts (a catalog) are supported as if each of their
+/// possible values were a binary property.
+///
+/// In all circumstances, property names and values are normalized and
+/// canonicalized. That is, `GC == gc == GeneralCategory == general_category`.
+///
+/// The lifetime `'a` refers to the shorter of the lifetimes of property name
+/// and property value.
+#[derive(Debug)]
+pub enum ClassQuery<'a> {
+    /// Return a class corresponding to a Unicode binary property, named by
+    /// a single letter.
+    OneLetter(char),
+    /// Return a class corresponding to a Unicode binary property.
+    ///
+    /// Note that, by special exception (see UTS#18, Section 1.2), both
+    /// general category values and script values are permitted here as if
+    /// they were a binary property.
+    Binary(&'a str),
+    /// Return a class corresponding to all codepoints whose property
+    /// (identified by `property_name`) corresponds to the given value
+    /// (identified by `property_value`).
+    ByValue {
+        /// A property name.
+        property_name: &'a str,
+        /// A property value.
+        property_value: &'a str,
+    },
+}
 
-    pub const Lu_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{de}'),
-        ('\u{100}', '\u{100}'), ('\u{102}', '\u{102}'), ('\u{104}', '\u{104}'),
-        ('\u{106}', '\u{106}'), ('\u{108}', '\u{108}'), ('\u{10a}', '\u{10a}'),
-        ('\u{10c}', '\u{10c}'), ('\u{10e}', '\u{10e}'), ('\u{110}', '\u{110}'),
-        ('\u{112}', '\u{112}'), ('\u{114}', '\u{114}'), ('\u{116}', '\u{116}'),
-        ('\u{118}', '\u{118}'), ('\u{11a}', '\u{11a}'), ('\u{11c}', '\u{11c}'),
-        ('\u{11e}', '\u{11e}'), ('\u{120}', '\u{120}'), ('\u{122}', '\u{122}'),
-        ('\u{124}', '\u{124}'), ('\u{126}', '\u{126}'), ('\u{128}', '\u{128}'),
-        ('\u{12a}', '\u{12a}'), ('\u{12c}', '\u{12c}'), ('\u{12e}', '\u{12e}'),
-        ('\u{130}', '\u{130}'), ('\u{132}', '\u{132}'), ('\u{134}', '\u{134}'),
-        ('\u{136}', '\u{136}'), ('\u{139}', '\u{139}'), ('\u{13b}', '\u{13b}'),
-        ('\u{13d}', '\u{13d}'), ('\u{13f}', '\u{13f}'), ('\u{141}', '\u{141}'),
-        ('\u{143}', '\u{143}'), ('\u{145}', '\u{145}'), ('\u{147}', '\u{147}'),
-        ('\u{14a}', '\u{14a}'), ('\u{14c}', '\u{14c}'), ('\u{14e}', '\u{14e}'),
-        ('\u{150}', '\u{150}'), ('\u{152}', '\u{152}'), ('\u{154}', '\u{154}'),
-        ('\u{156}', '\u{156}'), ('\u{158}', '\u{158}'), ('\u{15a}', '\u{15a}'),
-        ('\u{15c}', '\u{15c}'), ('\u{15e}', '\u{15e}'), ('\u{160}', '\u{160}'),
-        ('\u{162}', '\u{162}'), ('\u{164}', '\u{164}'), ('\u{166}', '\u{166}'),
-        ('\u{168}', '\u{168}'), ('\u{16a}', '\u{16a}'), ('\u{16c}', '\u{16c}'),
-        ('\u{16e}', '\u{16e}'), ('\u{170}', '\u{170}'), ('\u{172}', '\u{172}'),
-        ('\u{174}', '\u{174}'), ('\u{176}', '\u{176}'), ('\u{178}', '\u{179}'),
-        ('\u{17b}', '\u{17b}'), ('\u{17d}', '\u{17d}'), ('\u{181}', '\u{182}'),
-        ('\u{184}', '\u{184}'), ('\u{186}', '\u{187}'), ('\u{189}', '\u{18b}'),
-        ('\u{18e}', '\u{191}'), ('\u{193}', '\u{194}'), ('\u{196}', '\u{198}'),
-        ('\u{19c}', '\u{19d}'), ('\u{19f}', '\u{1a0}'), ('\u{1a2}', '\u{1a2}'),
-        ('\u{1a4}', '\u{1a4}'), ('\u{1a6}', '\u{1a7}'), ('\u{1a9}', '\u{1a9}'),
-        ('\u{1ac}', '\u{1ac}'), ('\u{1ae}', '\u{1af}'), ('\u{1b1}', '\u{1b3}'),
-        ('\u{1b5}', '\u{1b5}'), ('\u{1b7}', '\u{1b8}'), ('\u{1bc}', '\u{1bc}'),
-        ('\u{1c4}', '\u{1c4}'), ('\u{1c7}', '\u{1c7}'), ('\u{1ca}', '\u{1ca}'),
-        ('\u{1cd}', '\u{1cd}'), ('\u{1cf}', '\u{1cf}'), ('\u{1d1}', '\u{1d1}'),
-        ('\u{1d3}', '\u{1d3}'), ('\u{1d5}', '\u{1d5}'), ('\u{1d7}', '\u{1d7}'),
-        ('\u{1d9}', '\u{1d9}'), ('\u{1db}', '\u{1db}'), ('\u{1de}', '\u{1de}'),
-        ('\u{1e0}', '\u{1e0}'), ('\u{1e2}', '\u{1e2}'), ('\u{1e4}', '\u{1e4}'),
-        ('\u{1e6}', '\u{1e6}'), ('\u{1e8}', '\u{1e8}'), ('\u{1ea}', '\u{1ea}'),
-        ('\u{1ec}', '\u{1ec}'), ('\u{1ee}', '\u{1ee}'), ('\u{1f1}', '\u{1f1}'),
-        ('\u{1f4}', '\u{1f4}'), ('\u{1f6}', '\u{1f8}'), ('\u{1fa}', '\u{1fa}'),
-        ('\u{1fc}', '\u{1fc}'), ('\u{1fe}', '\u{1fe}'), ('\u{200}', '\u{200}'),
-        ('\u{202}', '\u{202}'), ('\u{204}', '\u{204}'), ('\u{206}', '\u{206}'),
-        ('\u{208}', '\u{208}'), ('\u{20a}', '\u{20a}'), ('\u{20c}', '\u{20c}'),
-        ('\u{20e}', '\u{20e}'), ('\u{210}', '\u{210}'), ('\u{212}', '\u{212}'),
-        ('\u{214}', '\u{214}'), ('\u{216}', '\u{216}'), ('\u{218}', '\u{218}'),
-        ('\u{21a}', '\u{21a}'), ('\u{21c}', '\u{21c}'), ('\u{21e}', '\u{21e}'),
-        ('\u{220}', '\u{220}'), ('\u{222}', '\u{222}'), ('\u{224}', '\u{224}'),
-        ('\u{226}', '\u{226}'), ('\u{228}', '\u{228}'), ('\u{22a}', '\u{22a}'),
-        ('\u{22c}', '\u{22c}'), ('\u{22e}', '\u{22e}'), ('\u{230}', '\u{230}'),
-        ('\u{232}', '\u{232}'), ('\u{23a}', '\u{23b}'), ('\u{23d}', '\u{23e}'),
-        ('\u{241}', '\u{241}'), ('\u{243}', '\u{246}'), ('\u{248}', '\u{248}'),
-        ('\u{24a}', '\u{24a}'), ('\u{24c}', '\u{24c}'), ('\u{24e}', '\u{24e}'),
-        ('\u{370}', '\u{370}'), ('\u{372}', '\u{372}'), ('\u{376}', '\u{376}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{38f}'), ('\u{391}', '\u{3a1}'),
-        ('\u{3a3}', '\u{3ab}'), ('\u{3cf}', '\u{3cf}'), ('\u{3d2}', '\u{3d4}'),
-        ('\u{3d8}', '\u{3d8}'), ('\u{3da}', '\u{3da}'), ('\u{3dc}', '\u{3dc}'),
-        ('\u{3de}', '\u{3de}'), ('\u{3e0}', '\u{3e0}'), ('\u{3e2}', '\u{3e2}'),
-        ('\u{3e4}', '\u{3e4}'), ('\u{3e6}', '\u{3e6}'), ('\u{3e8}', '\u{3e8}'),
-        ('\u{3ea}', '\u{3ea}'), ('\u{3ec}', '\u{3ec}'), ('\u{3ee}', '\u{3ee}'),
-        ('\u{3f4}', '\u{3f4}'), ('\u{3f7}', '\u{3f7}'), ('\u{3f9}', '\u{3fa}'),
-        ('\u{3fd}', '\u{42f}'), ('\u{460}', '\u{460}'), ('\u{462}', '\u{462}'),
-        ('\u{464}', '\u{464}'), ('\u{466}', '\u{466}'), ('\u{468}', '\u{468}'),
-        ('\u{46a}', '\u{46a}'), ('\u{46c}', '\u{46c}'), ('\u{46e}', '\u{46e}'),
-        ('\u{470}', '\u{470}'), ('\u{472}', '\u{472}'), ('\u{474}', '\u{474}'),
-        ('\u{476}', '\u{476}'), ('\u{478}', '\u{478}'), ('\u{47a}', '\u{47a}'),
-        ('\u{47c}', '\u{47c}'), ('\u{47e}', '\u{47e}'), ('\u{480}', '\u{480}'),
-        ('\u{48a}', '\u{48a}'), ('\u{48c}', '\u{48c}'), ('\u{48e}', '\u{48e}'),
-        ('\u{490}', '\u{490}'), ('\u{492}', '\u{492}'), ('\u{494}', '\u{494}'),
-        ('\u{496}', '\u{496}'), ('\u{498}', '\u{498}'), ('\u{49a}', '\u{49a}'),
-        ('\u{49c}', '\u{49c}'), ('\u{49e}', '\u{49e}'), ('\u{4a0}', '\u{4a0}'),
-        ('\u{4a2}', '\u{4a2}'), ('\u{4a4}', '\u{4a4}'), ('\u{4a6}', '\u{4a6}'),
-        ('\u{4a8}', '\u{4a8}'), ('\u{4aa}', '\u{4aa}'), ('\u{4ac}', '\u{4ac}'),
-        ('\u{4ae}', '\u{4ae}'), ('\u{4b0}', '\u{4b0}'), ('\u{4b2}', '\u{4b2}'),
-        ('\u{4b4}', '\u{4b4}'), ('\u{4b6}', '\u{4b6}'), ('\u{4b8}', '\u{4b8}'),
-        ('\u{4ba}', '\u{4ba}'), ('\u{4bc}', '\u{4bc}'), ('\u{4be}', '\u{4be}'),
-        ('\u{4c0}', '\u{4c1}'), ('\u{4c3}', '\u{4c3}'), ('\u{4c5}', '\u{4c5}'),
-        ('\u{4c7}', '\u{4c7}'), ('\u{4c9}', '\u{4c9}'), ('\u{4cb}', '\u{4cb}'),
-        ('\u{4cd}', '\u{4cd}'), ('\u{4d0}', '\u{4d0}'), ('\u{4d2}', '\u{4d2}'),
-        ('\u{4d4}', '\u{4d4}'), ('\u{4d6}', '\u{4d6}'), ('\u{4d8}', '\u{4d8}'),
-        ('\u{4da}', '\u{4da}'), ('\u{4dc}', '\u{4dc}'), ('\u{4de}', '\u{4de}'),
-        ('\u{4e0}', '\u{4e0}'), ('\u{4e2}', '\u{4e2}'), ('\u{4e4}', '\u{4e4}'),
-        ('\u{4e6}', '\u{4e6}'), ('\u{4e8}', '\u{4e8}'), ('\u{4ea}', '\u{4ea}'),
-        ('\u{4ec}', '\u{4ec}'), ('\u{4ee}', '\u{4ee}'), ('\u{4f0}', '\u{4f0}'),
-        ('\u{4f2}', '\u{4f2}'), ('\u{4f4}', '\u{4f4}'), ('\u{4f6}', '\u{4f6}'),
-        ('\u{4f8}', '\u{4f8}'), ('\u{4fa}', '\u{4fa}'), ('\u{4fc}', '\u{4fc}'),
-        ('\u{4fe}', '\u{4fe}'), ('\u{500}', '\u{500}'), ('\u{502}', '\u{502}'),
-        ('\u{504}', '\u{504}'), ('\u{506}', '\u{506}'), ('\u{508}', '\u{508}'),
-        ('\u{50a}', '\u{50a}'), ('\u{50c}', '\u{50c}'), ('\u{50e}', '\u{50e}'),
-        ('\u{510}', '\u{510}'), ('\u{512}', '\u{512}'), ('\u{514}', '\u{514}'),
-        ('\u{516}', '\u{516}'), ('\u{518}', '\u{518}'), ('\u{51a}', '\u{51a}'),
-        ('\u{51c}', '\u{51c}'), ('\u{51e}', '\u{51e}'), ('\u{520}', '\u{520}'),
-        ('\u{522}', '\u{522}'), ('\u{524}', '\u{524}'), ('\u{526}', '\u{526}'),
-        ('\u{528}', '\u{528}'), ('\u{52a}', '\u{52a}'), ('\u{52c}', '\u{52c}'),
-        ('\u{52e}', '\u{52e}'), ('\u{531}', '\u{556}'), ('\u{10a0}',
-        '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}', '\u{10cd}'),
-        ('\u{13a0}', '\u{13f5}'), ('\u{1e00}', '\u{1e00}'), ('\u{1e02}',
-        '\u{1e02}'), ('\u{1e04}', '\u{1e04}'), ('\u{1e06}', '\u{1e06}'),
-        ('\u{1e08}', '\u{1e08}'), ('\u{1e0a}', '\u{1e0a}'), ('\u{1e0c}',
-        '\u{1e0c}'), ('\u{1e0e}', '\u{1e0e}'), ('\u{1e10}', '\u{1e10}'),
-        ('\u{1e12}', '\u{1e12}'), ('\u{1e14}', '\u{1e14}'), ('\u{1e16}',
-        '\u{1e16}'), ('\u{1e18}', '\u{1e18}'), ('\u{1e1a}', '\u{1e1a}'),
-        ('\u{1e1c}', '\u{1e1c}'), ('\u{1e1e}', '\u{1e1e}'), ('\u{1e20}',
-        '\u{1e20}'), ('\u{1e22}', '\u{1e22}'), ('\u{1e24}', '\u{1e24}'),
-        ('\u{1e26}', '\u{1e26}'), ('\u{1e28}', '\u{1e28}'), ('\u{1e2a}',
-        '\u{1e2a}'), ('\u{1e2c}', '\u{1e2c}'), ('\u{1e2e}', '\u{1e2e}'),
-        ('\u{1e30}', '\u{1e30}'), ('\u{1e32}', '\u{1e32}'), ('\u{1e34}',
-        '\u{1e34}'), ('\u{1e36}', '\u{1e36}'), ('\u{1e38}', '\u{1e38}'),
-        ('\u{1e3a}', '\u{1e3a}'), ('\u{1e3c}', '\u{1e3c}'), ('\u{1e3e}',
-        '\u{1e3e}'), ('\u{1e40}', '\u{1e40}'), ('\u{1e42}', '\u{1e42}'),
-        ('\u{1e44}', '\u{1e44}'), ('\u{1e46}', '\u{1e46}'), ('\u{1e48}',
-        '\u{1e48}'), ('\u{1e4a}', '\u{1e4a}'), ('\u{1e4c}', '\u{1e4c}'),
-        ('\u{1e4e}', '\u{1e4e}'), ('\u{1e50}', '\u{1e50}'), ('\u{1e52}',
-        '\u{1e52}'), ('\u{1e54}', '\u{1e54}'), ('\u{1e56}', '\u{1e56}'),
-        ('\u{1e58}', '\u{1e58}'), ('\u{1e5a}', '\u{1e5a}'), ('\u{1e5c}',
-        '\u{1e5c}'), ('\u{1e5e}', '\u{1e5e}'), ('\u{1e60}', '\u{1e60}'),
-        ('\u{1e62}', '\u{1e62}'), ('\u{1e64}', '\u{1e64}'), ('\u{1e66}',
-        '\u{1e66}'), ('\u{1e68}', '\u{1e68}'), ('\u{1e6a}', '\u{1e6a}'),
-        ('\u{1e6c}', '\u{1e6c}'), ('\u{1e6e}', '\u{1e6e}'), ('\u{1e70}',
-        '\u{1e70}'), ('\u{1e72}', '\u{1e72}'), ('\u{1e74}', '\u{1e74}'),
-        ('\u{1e76}', '\u{1e76}'), ('\u{1e78}', '\u{1e78}'), ('\u{1e7a}',
-        '\u{1e7a}'), ('\u{1e7c}', '\u{1e7c}'), ('\u{1e7e}', '\u{1e7e}'),
-        ('\u{1e80}', '\u{1e80}'), ('\u{1e82}', '\u{1e82}'), ('\u{1e84}',
-        '\u{1e84}'), ('\u{1e86}', '\u{1e86}'), ('\u{1e88}', '\u{1e88}'),
-        ('\u{1e8a}', '\u{1e8a}'), ('\u{1e8c}', '\u{1e8c}'), ('\u{1e8e}',
-        '\u{1e8e}'), ('\u{1e90}', '\u{1e90}'), ('\u{1e92}', '\u{1e92}'),
-        ('\u{1e94}', '\u{1e94}'), ('\u{1e9e}', '\u{1e9e}'), ('\u{1ea0}',
-        '\u{1ea0}'), ('\u{1ea2}', '\u{1ea2}'), ('\u{1ea4}', '\u{1ea4}'),
-        ('\u{1ea6}', '\u{1ea6}'), ('\u{1ea8}', '\u{1ea8}'), ('\u{1eaa}',
-        '\u{1eaa}'), ('\u{1eac}', '\u{1eac}'), ('\u{1eae}', '\u{1eae}'),
-        ('\u{1eb0}', '\u{1eb0}'), ('\u{1eb2}', '\u{1eb2}'), ('\u{1eb4}',
-        '\u{1eb4}'), ('\u{1eb6}', '\u{1eb6}'), ('\u{1eb8}', '\u{1eb8}'),
-        ('\u{1eba}', '\u{1eba}'), ('\u{1ebc}', '\u{1ebc}'), ('\u{1ebe}',
-        '\u{1ebe}'), ('\u{1ec0}', '\u{1ec0}'), ('\u{1ec2}', '\u{1ec2}'),
-        ('\u{1ec4}', '\u{1ec4}'), ('\u{1ec6}', '\u{1ec6}'), ('\u{1ec8}',
-        '\u{1ec8}'), ('\u{1eca}', '\u{1eca}'), ('\u{1ecc}', '\u{1ecc}'),
-        ('\u{1ece}', '\u{1ece}'), ('\u{1ed0}', '\u{1ed0}'), ('\u{1ed2}',
-        '\u{1ed2}'), ('\u{1ed4}', '\u{1ed4}'), ('\u{1ed6}', '\u{1ed6}'),
-        ('\u{1ed8}', '\u{1ed8}'), ('\u{1eda}', '\u{1eda}'), ('\u{1edc}',
-        '\u{1edc}'), ('\u{1ede}', '\u{1ede}'), ('\u{1ee0}', '\u{1ee0}'),
-        ('\u{1ee2}', '\u{1ee2}'), ('\u{1ee4}', '\u{1ee4}'), ('\u{1ee6}',
-        '\u{1ee6}'), ('\u{1ee8}', '\u{1ee8}'), ('\u{1eea}', '\u{1eea}'),
-        ('\u{1eec}', '\u{1eec}'), ('\u{1eee}', '\u{1eee}'), ('\u{1ef0}',
-        '\u{1ef0}'), ('\u{1ef2}', '\u{1ef2}'), ('\u{1ef4}', '\u{1ef4}'),
-        ('\u{1ef6}', '\u{1ef6}'), ('\u{1ef8}', '\u{1ef8}'), ('\u{1efa}',
-        '\u{1efa}'), ('\u{1efc}', '\u{1efc}'), ('\u{1efe}', '\u{1efe}'),
-        ('\u{1f08}', '\u{1f0f}'), ('\u{1f18}', '\u{1f1d}'), ('\u{1f28}',
-        '\u{1f2f}'), ('\u{1f38}', '\u{1f3f}'), ('\u{1f48}', '\u{1f4d}'),
-        ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}',
-        '\u{1f5d}'), ('\u{1f5f}', '\u{1f5f}'), ('\u{1f68}', '\u{1f6f}'),
-        ('\u{1fb8}', '\u{1fbb}'), ('\u{1fc8}', '\u{1fcb}'), ('\u{1fd8}',
-        '\u{1fdb}'), ('\u{1fe8}', '\u{1fec}'), ('\u{1ff8}', '\u{1ffb}'),
-        ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210b}',
-        '\u{210d}'), ('\u{2110}', '\u{2112}'), ('\u{2115}', '\u{2115}'),
-        ('\u{2119}', '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}',
-        '\u{2126}'), ('\u{2128}', '\u{2128}'), ('\u{212a}', '\u{212d}'),
-        ('\u{2130}', '\u{2133}'), ('\u{213e}', '\u{213f}'), ('\u{2145}',
-        '\u{2145}'), ('\u{2183}', '\u{2183}'), ('\u{2c00}', '\u{2c2e}'),
-        ('\u{2c60}', '\u{2c60}'), ('\u{2c62}', '\u{2c64}'), ('\u{2c67}',
-        '\u{2c67}'), ('\u{2c69}', '\u{2c69}'), ('\u{2c6b}', '\u{2c6b}'),
-        ('\u{2c6d}', '\u{2c70}'), ('\u{2c72}', '\u{2c72}'), ('\u{2c75}',
-        '\u{2c75}'), ('\u{2c7e}', '\u{2c80}'), ('\u{2c82}', '\u{2c82}'),
-        ('\u{2c84}', '\u{2c84}'), ('\u{2c86}', '\u{2c86}'), ('\u{2c88}',
-        '\u{2c88}'), ('\u{2c8a}', '\u{2c8a}'), ('\u{2c8c}', '\u{2c8c}'),
-        ('\u{2c8e}', '\u{2c8e}'), ('\u{2c90}', '\u{2c90}'), ('\u{2c92}',
-        '\u{2c92}'), ('\u{2c94}', '\u{2c94}'), ('\u{2c96}', '\u{2c96}'),
-        ('\u{2c98}', '\u{2c98}'), ('\u{2c9a}', '\u{2c9a}'), ('\u{2c9c}',
-        '\u{2c9c}'), ('\u{2c9e}', '\u{2c9e}'), ('\u{2ca0}', '\u{2ca0}'),
-        ('\u{2ca2}', '\u{2ca2}'), ('\u{2ca4}', '\u{2ca4}'), ('\u{2ca6}',
-        '\u{2ca6}'), ('\u{2ca8}', '\u{2ca8}'), ('\u{2caa}', '\u{2caa}'),
-        ('\u{2cac}', '\u{2cac}'), ('\u{2cae}', '\u{2cae}'), ('\u{2cb0}',
-        '\u{2cb0}'), ('\u{2cb2}', '\u{2cb2}'), ('\u{2cb4}', '\u{2cb4}'),
-        ('\u{2cb6}', '\u{2cb6}'), ('\u{2cb8}', '\u{2cb8}'), ('\u{2cba}',
-        '\u{2cba}'), ('\u{2cbc}', '\u{2cbc}'), ('\u{2cbe}', '\u{2cbe}'),
-        ('\u{2cc0}', '\u{2cc0}'), ('\u{2cc2}', '\u{2cc2}'), ('\u{2cc4}',
-        '\u{2cc4}'), ('\u{2cc6}', '\u{2cc6}'), ('\u{2cc8}', '\u{2cc8}'),
-        ('\u{2cca}', '\u{2cca}'), ('\u{2ccc}', '\u{2ccc}'), ('\u{2cce}',
-        '\u{2cce}'), ('\u{2cd0}', '\u{2cd0}'), ('\u{2cd2}', '\u{2cd2}'),
-        ('\u{2cd4}', '\u{2cd4}'), ('\u{2cd6}', '\u{2cd6}'), ('\u{2cd8}',
-        '\u{2cd8}'), ('\u{2cda}', '\u{2cda}'), ('\u{2cdc}', '\u{2cdc}'),
-        ('\u{2cde}', '\u{2cde}'), ('\u{2ce0}', '\u{2ce0}'), ('\u{2ce2}',
-        '\u{2ce2}'), ('\u{2ceb}', '\u{2ceb}'), ('\u{2ced}', '\u{2ced}'),
-        ('\u{2cf2}', '\u{2cf2}'), ('\u{a640}', '\u{a640}'), ('\u{a642}',
-        '\u{a642}'), ('\u{a644}', '\u{a644}'), ('\u{a646}', '\u{a646}'),
-        ('\u{a648}', '\u{a648}'), ('\u{a64a}', '\u{a64a}'), ('\u{a64c}',
-        '\u{a64c}'), ('\u{a64e}', '\u{a64e}'), ('\u{a650}', '\u{a650}'),
-        ('\u{a652}', '\u{a652}'), ('\u{a654}', '\u{a654}'), ('\u{a656}',
-        '\u{a656}'), ('\u{a658}', '\u{a658}'), ('\u{a65a}', '\u{a65a}'),
-        ('\u{a65c}', '\u{a65c}'), ('\u{a65e}', '\u{a65e}'), ('\u{a660}',
-        '\u{a660}'), ('\u{a662}', '\u{a662}'), ('\u{a664}', '\u{a664}'),
-        ('\u{a666}', '\u{a666}'), ('\u{a668}', '\u{a668}'), ('\u{a66a}',
-        '\u{a66a}'), ('\u{a66c}', '\u{a66c}'), ('\u{a680}', '\u{a680}'),
-        ('\u{a682}', '\u{a682}'), ('\u{a684}', '\u{a684}'), ('\u{a686}',
-        '\u{a686}'), ('\u{a688}', '\u{a688}'), ('\u{a68a}', '\u{a68a}'),
-        ('\u{a68c}', '\u{a68c}'), ('\u{a68e}', '\u{a68e}'), ('\u{a690}',
-        '\u{a690}'), ('\u{a692}', '\u{a692}'), ('\u{a694}', '\u{a694}'),
-        ('\u{a696}', '\u{a696}'), ('\u{a698}', '\u{a698}'), ('\u{a69a}',
-        '\u{a69a}'), ('\u{a722}', '\u{a722}'), ('\u{a724}', '\u{a724}'),
-        ('\u{a726}', '\u{a726}'), ('\u{a728}', '\u{a728}'), ('\u{a72a}',
-        '\u{a72a}'), ('\u{a72c}', '\u{a72c}'), ('\u{a72e}', '\u{a72e}'),
-        ('\u{a732}', '\u{a732}'), ('\u{a734}', '\u{a734}'), ('\u{a736}',
-        '\u{a736}'), ('\u{a738}', '\u{a738}'), ('\u{a73a}', '\u{a73a}'),
-        ('\u{a73c}', '\u{a73c}'), ('\u{a73e}', '\u{a73e}'), ('\u{a740}',
-        '\u{a740}'), ('\u{a742}', '\u{a742}'), ('\u{a744}', '\u{a744}'),
-        ('\u{a746}', '\u{a746}'), ('\u{a748}', '\u{a748}'), ('\u{a74a}',
-        '\u{a74a}'), ('\u{a74c}', '\u{a74c}'), ('\u{a74e}', '\u{a74e}'),
-        ('\u{a750}', '\u{a750}'), ('\u{a752}', '\u{a752}'), ('\u{a754}',
-        '\u{a754}'), ('\u{a756}', '\u{a756}'), ('\u{a758}', '\u{a758}'),
-        ('\u{a75a}', '\u{a75a}'), ('\u{a75c}', '\u{a75c}'), ('\u{a75e}',
-        '\u{a75e}'), ('\u{a760}', '\u{a760}'), ('\u{a762}', '\u{a762}'),
-        ('\u{a764}', '\u{a764}'), ('\u{a766}', '\u{a766}'), ('\u{a768}',
-        '\u{a768}'), ('\u{a76a}', '\u{a76a}'), ('\u{a76c}', '\u{a76c}'),
-        ('\u{a76e}', '\u{a76e}'), ('\u{a779}', '\u{a779}'), ('\u{a77b}',
-        '\u{a77b}'), ('\u{a77d}', '\u{a77e}'), ('\u{a780}', '\u{a780}'),
-        ('\u{a782}', '\u{a782}'), ('\u{a784}', '\u{a784}'), ('\u{a786}',
-        '\u{a786}'), ('\u{a78b}', '\u{a78b}'), ('\u{a78d}', '\u{a78d}'),
-        ('\u{a790}', '\u{a790}'), ('\u{a792}', '\u{a792}'), ('\u{a796}',
-        '\u{a796}'), ('\u{a798}', '\u{a798}'), ('\u{a79a}', '\u{a79a}'),
-        ('\u{a79c}', '\u{a79c}'), ('\u{a79e}', '\u{a79e}'), ('\u{a7a0}',
-        '\u{a7a0}'), ('\u{a7a2}', '\u{a7a2}'), ('\u{a7a4}', '\u{a7a4}'),
-        ('\u{a7a6}', '\u{a7a6}'), ('\u{a7a8}', '\u{a7a8}'), ('\u{a7aa}',
-        '\u{a7ad}'), ('\u{a7b0}', '\u{a7b4}'), ('\u{a7b6}', '\u{a7b6}'),
-        ('\u{ff21}', '\u{ff3a}'), ('\u{10400}', '\u{10427}'), ('\u{10c80}',
-        '\u{10cb2}'), ('\u{118a0}', '\u{118bf}'), ('\u{1d400}', '\u{1d419}'),
-        ('\u{1d434}', '\u{1d44d}'), ('\u{1d468}', '\u{1d481}'), ('\u{1d49c}',
-        '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}', '\u{1d4a2}'),
-        ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'), ('\u{1d4ae}',
-        '\u{1d4b5}'), ('\u{1d4d0}', '\u{1d4e9}'), ('\u{1d504}', '\u{1d505}'),
-        ('\u{1d507}', '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}',
-        '\u{1d51c}'), ('\u{1d538}', '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'),
-        ('\u{1d540}', '\u{1d544}'), ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}',
-        '\u{1d550}'), ('\u{1d56c}', '\u{1d585}'), ('\u{1d5a0}', '\u{1d5b9}'),
-        ('\u{1d5d4}', '\u{1d5ed}'), ('\u{1d608}', '\u{1d621}'), ('\u{1d63c}',
-        '\u{1d655}'), ('\u{1d670}', '\u{1d689}'), ('\u{1d6a8}', '\u{1d6c0}'),
-        ('\u{1d6e2}', '\u{1d6fa}'), ('\u{1d71c}', '\u{1d734}'), ('\u{1d756}',
-        '\u{1d76e}'), ('\u{1d790}', '\u{1d7a8}'), ('\u{1d7ca}', '\u{1d7ca}')
-    ];
-
-    pub const M_table: &'static [(char, char)] = &[
-        ('\u{300}', '\u{36f}'), ('\u{483}', '\u{489}'), ('\u{591}', '\u{5bd}'),
-        ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
-        ('\u{5c7}', '\u{5c7}'), ('\u{610}', '\u{61a}'), ('\u{64b}', '\u{65f}'),
-        ('\u{670}', '\u{670}'), ('\u{6d6}', '\u{6dc}'), ('\u{6df}', '\u{6e4}'),
-        ('\u{6e7}', '\u{6e8}'), ('\u{6ea}', '\u{6ed}'), ('\u{711}', '\u{711}'),
-        ('\u{730}', '\u{74a}'), ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', '\u{7f3}'),
-        ('\u{816}', '\u{819}'), ('\u{81b}', '\u{823}'), ('\u{825}', '\u{827}'),
-        ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), ('\u{8e3}', '\u{903}'),
-        ('\u{93a}', '\u{93c}'), ('\u{93e}', '\u{94f}'), ('\u{951}', '\u{957}'),
-        ('\u{962}', '\u{963}'), ('\u{981}', '\u{983}'), ('\u{9bc}', '\u{9bc}'),
-        ('\u{9be}', '\u{9c4}'), ('\u{9c7}', '\u{9c8}'), ('\u{9cb}', '\u{9cd}'),
-        ('\u{9d7}', '\u{9d7}'), ('\u{9e2}', '\u{9e3}'), ('\u{a01}', '\u{a03}'),
-        ('\u{a3c}', '\u{a3c}'), ('\u{a3e}', '\u{a42}'), ('\u{a47}', '\u{a48}'),
-        ('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'), ('\u{a70}', '\u{a71}'),
-        ('\u{a75}', '\u{a75}'), ('\u{a81}', '\u{a83}'), ('\u{abc}', '\u{abc}'),
-        ('\u{abe}', '\u{ac5}'), ('\u{ac7}', '\u{ac9}'), ('\u{acb}', '\u{acd}'),
-        ('\u{ae2}', '\u{ae3}'), ('\u{b01}', '\u{b03}'), ('\u{b3c}', '\u{b3c}'),
-        ('\u{b3e}', '\u{b44}'), ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4d}'),
-        ('\u{b56}', '\u{b57}'), ('\u{b62}', '\u{b63}'), ('\u{b82}', '\u{b82}'),
-        ('\u{bbe}', '\u{bc2}'), ('\u{bc6}', '\u{bc8}'), ('\u{bca}', '\u{bcd}'),
-        ('\u{bd7}', '\u{bd7}'), ('\u{c00}', '\u{c03}'), ('\u{c3e}', '\u{c44}'),
-        ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'),
-        ('\u{c62}', '\u{c63}'), ('\u{c81}', '\u{c83}'), ('\u{cbc}', '\u{cbc}'),
-        ('\u{cbe}', '\u{cc4}'), ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccd}'),
-        ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('\u{d01}', '\u{d03}'),
-        ('\u{d3e}', '\u{d44}'), ('\u{d46}', '\u{d48}'), ('\u{d4a}', '\u{d4d}'),
-        ('\u{d57}', '\u{d57}'), ('\u{d62}', '\u{d63}'), ('\u{d82}', '\u{d83}'),
-        ('\u{dca}', '\u{dca}'), ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
-        ('\u{dd8}', '\u{ddf}'), ('\u{df2}', '\u{df3}'), ('\u{e31}', '\u{e31}'),
-        ('\u{e34}', '\u{e3a}'), ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'),
-        ('\u{eb4}', '\u{eb9}'), ('\u{ebb}', '\u{ebc}'), ('\u{ec8}', '\u{ecd}'),
-        ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'),
-        ('\u{f39}', '\u{f39}'), ('\u{f3e}', '\u{f3f}'), ('\u{f71}', '\u{f84}'),
-        ('\u{f86}', '\u{f87}'), ('\u{f8d}', '\u{f97}'), ('\u{f99}', '\u{fbc}'),
-        ('\u{fc6}', '\u{fc6}'), ('\u{102b}', '\u{103e}'), ('\u{1056}',
-        '\u{1059}'), ('\u{105e}', '\u{1060}'), ('\u{1062}', '\u{1064}'),
-        ('\u{1067}', '\u{106d}'), ('\u{1071}', '\u{1074}'), ('\u{1082}',
-        '\u{108d}'), ('\u{108f}', '\u{108f}'), ('\u{109a}', '\u{109d}'),
-        ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'), ('\u{1732}',
-        '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}', '\u{1773}'),
-        ('\u{17b4}', '\u{17d3}'), ('\u{17dd}', '\u{17dd}'), ('\u{180b}',
-        '\u{180d}'), ('\u{18a9}', '\u{18a9}'), ('\u{1920}', '\u{192b}'),
-        ('\u{1930}', '\u{193b}'), ('\u{1a17}', '\u{1a1b}'), ('\u{1a55}',
-        '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a7f}'),
-        ('\u{1ab0}', '\u{1abe}'), ('\u{1b00}', '\u{1b04}'), ('\u{1b34}',
-        '\u{1b44}'), ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1b82}'),
-        ('\u{1ba1}', '\u{1bad}'), ('\u{1be6}', '\u{1bf3}'), ('\u{1c24}',
-        '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce8}'),
-        ('\u{1ced}', '\u{1ced}'), ('\u{1cf2}', '\u{1cf4}'), ('\u{1cf8}',
-        '\u{1cf9}'), ('\u{1dc0}', '\u{1df5}'), ('\u{1dfc}', '\u{1dff}'),
-        ('\u{20d0}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), ('\u{2d7f}',
-        '\u{2d7f}'), ('\u{2de0}', '\u{2dff}'), ('\u{302a}', '\u{302f}'),
-        ('\u{3099}', '\u{309a}'), ('\u{a66f}', '\u{a672}'), ('\u{a674}',
-        '\u{a67d}'), ('\u{a69e}', '\u{a69f}'), ('\u{a6f0}', '\u{a6f1}'),
-        ('\u{a802}', '\u{a802}'), ('\u{a806}', '\u{a806}'), ('\u{a80b}',
-        '\u{a80b}'), ('\u{a823}', '\u{a827}'), ('\u{a880}', '\u{a881}'),
-        ('\u{a8b4}', '\u{a8c4}'), ('\u{a8e0}', '\u{a8f1}'), ('\u{a926}',
-        '\u{a92d}'), ('\u{a947}', '\u{a953}'), ('\u{a980}', '\u{a983}'),
-        ('\u{a9b3}', '\u{a9c0}'), ('\u{a9e5}', '\u{a9e5}'), ('\u{aa29}',
-        '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), ('\u{aa4c}', '\u{aa4d}'),
-        ('\u{aa7b}', '\u{aa7d}'), ('\u{aab0}', '\u{aab0}'), ('\u{aab2}',
-        '\u{aab4}'), ('\u{aab7}', '\u{aab8}'), ('\u{aabe}', '\u{aabf}'),
-        ('\u{aac1}', '\u{aac1}'), ('\u{aaeb}', '\u{aaef}'), ('\u{aaf5}',
-        '\u{aaf6}'), ('\u{abe3}', '\u{abea}'), ('\u{abec}', '\u{abed}'),
-        ('\u{fb1e}', '\u{fb1e}'), ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}',
-        '\u{fe2f}'), ('\u{101fd}', '\u{101fd}'), ('\u{102e0}', '\u{102e0}'),
-        ('\u{10376}', '\u{1037a}'), ('\u{10a01}', '\u{10a03}'), ('\u{10a05}',
-        '\u{10a06}'), ('\u{10a0c}', '\u{10a0f}'), ('\u{10a38}', '\u{10a3a}'),
-        ('\u{10a3f}', '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{11000}',
-        '\u{11002}'), ('\u{11038}', '\u{11046}'), ('\u{1107f}', '\u{11082}'),
-        ('\u{110b0}', '\u{110ba}'), ('\u{11100}', '\u{11102}'), ('\u{11127}',
-        '\u{11134}'), ('\u{11173}', '\u{11173}'), ('\u{11180}', '\u{11182}'),
-        ('\u{111b3}', '\u{111c0}'), ('\u{111ca}', '\u{111cc}'), ('\u{1122c}',
-        '\u{11237}'), ('\u{112df}', '\u{112ea}'), ('\u{11300}', '\u{11303}'),
-        ('\u{1133c}', '\u{1133c}'), ('\u{1133e}', '\u{11344}'), ('\u{11347}',
-        '\u{11348}'), ('\u{1134b}', '\u{1134d}'), ('\u{11357}', '\u{11357}'),
-        ('\u{11362}', '\u{11363}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}',
-        '\u{11374}'), ('\u{114b0}', '\u{114c3}'), ('\u{115af}', '\u{115b5}'),
-        ('\u{115b8}', '\u{115c0}'), ('\u{115dc}', '\u{115dd}'), ('\u{11630}',
-        '\u{11640}'), ('\u{116ab}', '\u{116b7}'), ('\u{1171d}', '\u{1172b}'),
-        ('\u{16af0}', '\u{16af4}'), ('\u{16b30}', '\u{16b36}'), ('\u{16f51}',
-        '\u{16f7e}'), ('\u{16f8f}', '\u{16f92}'), ('\u{1bc9d}', '\u{1bc9e}'),
-        ('\u{1d165}', '\u{1d169}'), ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}',
-        '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'),
-        ('\u{1d242}', '\u{1d244}'), ('\u{1da00}', '\u{1da36}'), ('\u{1da3b}',
-        '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'), ('\u{1da84}', '\u{1da84}'),
-        ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), ('\u{1e8d0}',
-        '\u{1e8d6}'), ('\u{e0100}', '\u{e01ef}')
-    ];
-
-    pub const Mc_table: &'static [(char, char)] = &[
-        ('\u{903}', '\u{903}'), ('\u{93b}', '\u{93b}'), ('\u{93e}', '\u{940}'),
-        ('\u{949}', '\u{94c}'), ('\u{94e}', '\u{94f}'), ('\u{982}', '\u{983}'),
-        ('\u{9be}', '\u{9c0}'), ('\u{9c7}', '\u{9c8}'), ('\u{9cb}', '\u{9cc}'),
-        ('\u{9d7}', '\u{9d7}'), ('\u{a03}', '\u{a03}'), ('\u{a3e}', '\u{a40}'),
-        ('\u{a83}', '\u{a83}'), ('\u{abe}', '\u{ac0}'), ('\u{ac9}', '\u{ac9}'),
-        ('\u{acb}', '\u{acc}'), ('\u{b02}', '\u{b03}'), ('\u{b3e}', '\u{b3e}'),
-        ('\u{b40}', '\u{b40}'), ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4c}'),
-        ('\u{b57}', '\u{b57}'), ('\u{bbe}', '\u{bbf}'), ('\u{bc1}', '\u{bc2}'),
-        ('\u{bc6}', '\u{bc8}'), ('\u{bca}', '\u{bcc}'), ('\u{bd7}', '\u{bd7}'),
-        ('\u{c01}', '\u{c03}'), ('\u{c41}', '\u{c44}'), ('\u{c82}', '\u{c83}'),
-        ('\u{cbe}', '\u{cbe}'), ('\u{cc0}', '\u{cc4}'), ('\u{cc7}', '\u{cc8}'),
-        ('\u{cca}', '\u{ccb}'), ('\u{cd5}', '\u{cd6}'), ('\u{d02}', '\u{d03}'),
-        ('\u{d3e}', '\u{d40}'), ('\u{d46}', '\u{d48}'), ('\u{d4a}', '\u{d4c}'),
-        ('\u{d57}', '\u{d57}'), ('\u{d82}', '\u{d83}'), ('\u{dcf}', '\u{dd1}'),
-        ('\u{dd8}', '\u{ddf}'), ('\u{df2}', '\u{df3}'), ('\u{f3e}', '\u{f3f}'),
-        ('\u{f7f}', '\u{f7f}'), ('\u{102b}', '\u{102c}'), ('\u{1031}',
-        '\u{1031}'), ('\u{1038}', '\u{1038}'), ('\u{103b}', '\u{103c}'),
-        ('\u{1056}', '\u{1057}'), ('\u{1062}', '\u{1064}'), ('\u{1067}',
-        '\u{106d}'), ('\u{1083}', '\u{1084}'), ('\u{1087}', '\u{108c}'),
-        ('\u{108f}', '\u{108f}'), ('\u{109a}', '\u{109c}'), ('\u{17b6}',
-        '\u{17b6}'), ('\u{17be}', '\u{17c5}'), ('\u{17c7}', '\u{17c8}'),
-        ('\u{1923}', '\u{1926}'), ('\u{1929}', '\u{192b}'), ('\u{1930}',
-        '\u{1931}'), ('\u{1933}', '\u{1938}'), ('\u{1a19}', '\u{1a1a}'),
-        ('\u{1a55}', '\u{1a55}'), ('\u{1a57}', '\u{1a57}'), ('\u{1a61}',
-        '\u{1a61}'), ('\u{1a63}', '\u{1a64}'), ('\u{1a6d}', '\u{1a72}'),
-        ('\u{1b04}', '\u{1b04}'), ('\u{1b35}', '\u{1b35}'), ('\u{1b3b}',
-        '\u{1b3b}'), ('\u{1b3d}', '\u{1b41}'), ('\u{1b43}', '\u{1b44}'),
-        ('\u{1b82}', '\u{1b82}'), ('\u{1ba1}', '\u{1ba1}'), ('\u{1ba6}',
-        '\u{1ba7}'), ('\u{1baa}', '\u{1baa}'), ('\u{1be7}', '\u{1be7}'),
-        ('\u{1bea}', '\u{1bec}'), ('\u{1bee}', '\u{1bee}'), ('\u{1bf2}',
-        '\u{1bf3}'), ('\u{1c24}', '\u{1c2b}'), ('\u{1c34}', '\u{1c35}'),
-        ('\u{1ce1}', '\u{1ce1}'), ('\u{1cf2}', '\u{1cf3}'), ('\u{302e}',
-        '\u{302f}'), ('\u{a823}', '\u{a824}'), ('\u{a827}', '\u{a827}'),
-        ('\u{a880}', '\u{a881}'), ('\u{a8b4}', '\u{a8c3}'), ('\u{a952}',
-        '\u{a953}'), ('\u{a983}', '\u{a983}'), ('\u{a9b4}', '\u{a9b5}'),
-        ('\u{a9ba}', '\u{a9bb}'), ('\u{a9bd}', '\u{a9c0}'), ('\u{aa2f}',
-        '\u{aa30}'), ('\u{aa33}', '\u{aa34}'), ('\u{aa4d}', '\u{aa4d}'),
-        ('\u{aa7b}', '\u{aa7b}'), ('\u{aa7d}', '\u{aa7d}'), ('\u{aaeb}',
-        '\u{aaeb}'), ('\u{aaee}', '\u{aaef}'), ('\u{aaf5}', '\u{aaf5}'),
-        ('\u{abe3}', '\u{abe4}'), ('\u{abe6}', '\u{abe7}'), ('\u{abe9}',
-        '\u{abea}'), ('\u{abec}', '\u{abec}'), ('\u{11000}', '\u{11000}'),
-        ('\u{11002}', '\u{11002}'), ('\u{11082}', '\u{11082}'), ('\u{110b0}',
-        '\u{110b2}'), ('\u{110b7}', '\u{110b8}'), ('\u{1112c}', '\u{1112c}'),
-        ('\u{11182}', '\u{11182}'), ('\u{111b3}', '\u{111b5}'), ('\u{111bf}',
-        '\u{111c0}'), ('\u{1122c}', '\u{1122e}'), ('\u{11232}', '\u{11233}'),
-        ('\u{11235}', '\u{11235}'), ('\u{112e0}', '\u{112e2}'), ('\u{11302}',
-        '\u{11303}'), ('\u{1133e}', '\u{1133f}'), ('\u{11341}', '\u{11344}'),
-        ('\u{11347}', '\u{11348}'), ('\u{1134b}', '\u{1134d}'), ('\u{11357}',
-        '\u{11357}'), ('\u{11362}', '\u{11363}'), ('\u{114b0}', '\u{114b2}'),
-        ('\u{114b9}', '\u{114b9}'), ('\u{114bb}', '\u{114be}'), ('\u{114c1}',
-        '\u{114c1}'), ('\u{115af}', '\u{115b1}'), ('\u{115b8}', '\u{115bb}'),
-        ('\u{115be}', '\u{115be}'), ('\u{11630}', '\u{11632}'), ('\u{1163b}',
-        '\u{1163c}'), ('\u{1163e}', '\u{1163e}'), ('\u{116ac}', '\u{116ac}'),
-        ('\u{116ae}', '\u{116af}'), ('\u{116b6}', '\u{116b6}'), ('\u{11720}',
-        '\u{11721}'), ('\u{11726}', '\u{11726}'), ('\u{16f51}', '\u{16f7e}'),
-        ('\u{1d165}', '\u{1d166}'), ('\u{1d16d}', '\u{1d172}')
-    ];
-
-    pub const Me_table: &'static [(char, char)] = &[
-        ('\u{488}', '\u{489}'), ('\u{1abe}', '\u{1abe}'), ('\u{20dd}',
-        '\u{20e0}'), ('\u{20e2}', '\u{20e4}'), ('\u{a670}', '\u{a672}')
-    ];
+impl<'a> ClassQuery<'a> {
+    fn canonicalize(&self) -> Result<CanonicalClassQuery> {
+        match *self {
+            ClassQuery::OneLetter(c) => self.canonical_binary(&c.to_string()),
+            ClassQuery::Binary(name) => self.canonical_binary(name),
+            ClassQuery::ByValue { property_name, property_value } => {
+                let property_name = normalize(property_name);
+                let property_value = normalize(property_value);
 
-    pub const Mn_table: &'static [(char, char)] = &[
-        ('\u{300}', '\u{36f}'), ('\u{483}', '\u{487}'), ('\u{591}', '\u{5bd}'),
-        ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
-        ('\u{5c7}', '\u{5c7}'), ('\u{610}', '\u{61a}'), ('\u{64b}', '\u{65f}'),
-        ('\u{670}', '\u{670}'), ('\u{6d6}', '\u{6dc}'), ('\u{6df}', '\u{6e4}'),
-        ('\u{6e7}', '\u{6e8}'), ('\u{6ea}', '\u{6ed}'), ('\u{711}', '\u{711}'),
-        ('\u{730}', '\u{74a}'), ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', '\u{7f3}'),
-        ('\u{816}', '\u{819}'), ('\u{81b}', '\u{823}'), ('\u{825}', '\u{827}'),
-        ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), ('\u{8e3}', '\u{902}'),
-        ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), ('\u{941}', '\u{948}'),
-        ('\u{94d}', '\u{94d}'), ('\u{951}', '\u{957}'), ('\u{962}', '\u{963}'),
-        ('\u{981}', '\u{981}'), ('\u{9bc}', '\u{9bc}'), ('\u{9c1}', '\u{9c4}'),
-        ('\u{9cd}', '\u{9cd}'), ('\u{9e2}', '\u{9e3}'), ('\u{a01}', '\u{a02}'),
-        ('\u{a3c}', '\u{a3c}'), ('\u{a41}', '\u{a42}'), ('\u{a47}', '\u{a48}'),
-        ('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'), ('\u{a70}', '\u{a71}'),
-        ('\u{a75}', '\u{a75}'), ('\u{a81}', '\u{a82}'), ('\u{abc}', '\u{abc}'),
-        ('\u{ac1}', '\u{ac5}'), ('\u{ac7}', '\u{ac8}'), ('\u{acd}', '\u{acd}'),
-        ('\u{ae2}', '\u{ae3}'), ('\u{b01}', '\u{b01}'), ('\u{b3c}', '\u{b3c}'),
-        ('\u{b3f}', '\u{b3f}'), ('\u{b41}', '\u{b44}'), ('\u{b4d}', '\u{b4d}'),
-        ('\u{b56}', '\u{b56}'), ('\u{b62}', '\u{b63}'), ('\u{b82}', '\u{b82}'),
-        ('\u{bc0}', '\u{bc0}'), ('\u{bcd}', '\u{bcd}'), ('\u{c00}', '\u{c00}'),
-        ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'),
-        ('\u{c55}', '\u{c56}'), ('\u{c62}', '\u{c63}'), ('\u{c81}', '\u{c81}'),
-        ('\u{cbc}', '\u{cbc}'), ('\u{cbf}', '\u{cbf}'), ('\u{cc6}', '\u{cc6}'),
-        ('\u{ccc}', '\u{ccd}'), ('\u{ce2}', '\u{ce3}'), ('\u{d01}', '\u{d01}'),
-        ('\u{d41}', '\u{d44}'), ('\u{d4d}', '\u{d4d}'), ('\u{d62}', '\u{d63}'),
-        ('\u{dca}', '\u{dca}'), ('\u{dd2}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
-        ('\u{e31}', '\u{e31}'), ('\u{e34}', '\u{e3a}'), ('\u{e47}', '\u{e4e}'),
-        ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{eb9}'), ('\u{ebb}', '\u{ebc}'),
-        ('\u{ec8}', '\u{ecd}'), ('\u{f18}', '\u{f19}'), ('\u{f35}', '\u{f35}'),
-        ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'), ('\u{f71}', '\u{f7e}'),
-        ('\u{f80}', '\u{f84}'), ('\u{f86}', '\u{f87}'), ('\u{f8d}', '\u{f97}'),
-        ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'), ('\u{102d}',
-        '\u{1030}'), ('\u{1032}', '\u{1037}'), ('\u{1039}', '\u{103a}'),
-        ('\u{103d}', '\u{103e}'), ('\u{1058}', '\u{1059}'), ('\u{105e}',
-        '\u{1060}'), ('\u{1071}', '\u{1074}'), ('\u{1082}', '\u{1082}'),
-        ('\u{1085}', '\u{1086}'), ('\u{108d}', '\u{108d}'), ('\u{109d}',
-        '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}', '\u{1714}'),
-        ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'), ('\u{1772}',
-        '\u{1773}'), ('\u{17b4}', '\u{17b5}'), ('\u{17b7}', '\u{17bd}'),
-        ('\u{17c6}', '\u{17c6}'), ('\u{17c9}', '\u{17d3}'), ('\u{17dd}',
-        '\u{17dd}'), ('\u{180b}', '\u{180d}'), ('\u{18a9}', '\u{18a9}'),
-        ('\u{1920}', '\u{1922}'), ('\u{1927}', '\u{1928}'), ('\u{1932}',
-        '\u{1932}'), ('\u{1939}', '\u{193b}'), ('\u{1a17}', '\u{1a18}'),
-        ('\u{1a1b}', '\u{1a1b}'), ('\u{1a56}', '\u{1a56}'), ('\u{1a58}',
-        '\u{1a5e}'), ('\u{1a60}', '\u{1a60}'), ('\u{1a62}', '\u{1a62}'),
-        ('\u{1a65}', '\u{1a6c}'), ('\u{1a73}', '\u{1a7c}'), ('\u{1a7f}',
-        '\u{1a7f}'), ('\u{1ab0}', '\u{1abd}'), ('\u{1b00}', '\u{1b03}'),
-        ('\u{1b34}', '\u{1b34}'), ('\u{1b36}', '\u{1b3a}'), ('\u{1b3c}',
-        '\u{1b3c}'), ('\u{1b42}', '\u{1b42}'), ('\u{1b6b}', '\u{1b73}'),
-        ('\u{1b80}', '\u{1b81}'), ('\u{1ba2}', '\u{1ba5}'), ('\u{1ba8}',
-        '\u{1ba9}'), ('\u{1bab}', '\u{1bad}'), ('\u{1be6}', '\u{1be6}'),
-        ('\u{1be8}', '\u{1be9}'), ('\u{1bed}', '\u{1bed}'), ('\u{1bef}',
-        '\u{1bf1}'), ('\u{1c2c}', '\u{1c33}'), ('\u{1c36}', '\u{1c37}'),
-        ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce0}'), ('\u{1ce2}',
-        '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}', '\u{1cf4}'),
-        ('\u{1cf8}', '\u{1cf9}'), ('\u{1dc0}', '\u{1df5}'), ('\u{1dfc}',
-        '\u{1dff}'), ('\u{20d0}', '\u{20dc}'), ('\u{20e1}', '\u{20e1}'),
-        ('\u{20e5}', '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), ('\u{2d7f}',
-        '\u{2d7f}'), ('\u{2de0}', '\u{2dff}'), ('\u{302a}', '\u{302d}'),
-        ('\u{3099}', '\u{309a}'), ('\u{a66f}', '\u{a66f}'), ('\u{a674}',
-        '\u{a67d}'), ('\u{a69e}', '\u{a69f}'), ('\u{a6f0}', '\u{a6f1}'),
-        ('\u{a802}', '\u{a802}'), ('\u{a806}', '\u{a806}'), ('\u{a80b}',
-        '\u{a80b}'), ('\u{a825}', '\u{a826}'), ('\u{a8c4}', '\u{a8c4}'),
-        ('\u{a8e0}', '\u{a8f1}'), ('\u{a926}', '\u{a92d}'), ('\u{a947}',
-        '\u{a951}'), ('\u{a980}', '\u{a982}'), ('\u{a9b3}', '\u{a9b3}'),
-        ('\u{a9b6}', '\u{a9b9}'), ('\u{a9bc}', '\u{a9bc}'), ('\u{a9e5}',
-        '\u{a9e5}'), ('\u{aa29}', '\u{aa2e}'), ('\u{aa31}', '\u{aa32}'),
-        ('\u{aa35}', '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), ('\u{aa4c}',
-        '\u{aa4c}'), ('\u{aa7c}', '\u{aa7c}'), ('\u{aab0}', '\u{aab0}'),
-        ('\u{aab2}', '\u{aab4}'), ('\u{aab7}', '\u{aab8}'), ('\u{aabe}',
-        '\u{aabf}'), ('\u{aac1}', '\u{aac1}'), ('\u{aaec}', '\u{aaed}'),
-        ('\u{aaf6}', '\u{aaf6}'), ('\u{abe5}', '\u{abe5}'), ('\u{abe8}',
-        '\u{abe8}'), ('\u{abed}', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'),
-        ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'), ('\u{101fd}',
-        '\u{101fd}'), ('\u{102e0}', '\u{102e0}'), ('\u{10376}', '\u{1037a}'),
-        ('\u{10a01}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}',
-        '\u{10a0f}'), ('\u{10a38}', '\u{10a3a}'), ('\u{10a3f}', '\u{10a3f}'),
-        ('\u{10ae5}', '\u{10ae6}'), ('\u{11001}', '\u{11001}'), ('\u{11038}',
-        '\u{11046}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}', '\u{110b6}'),
-        ('\u{110b9}', '\u{110ba}'), ('\u{11100}', '\u{11102}'), ('\u{11127}',
-        '\u{1112b}'), ('\u{1112d}', '\u{11134}'), ('\u{11173}', '\u{11173}'),
-        ('\u{11180}', '\u{11181}'), ('\u{111b6}', '\u{111be}'), ('\u{111ca}',
-        '\u{111cc}'), ('\u{1122f}', '\u{11231}'), ('\u{11234}', '\u{11234}'),
-        ('\u{11236}', '\u{11237}'), ('\u{112df}', '\u{112df}'), ('\u{112e3}',
-        '\u{112ea}'), ('\u{11300}', '\u{11301}'), ('\u{1133c}', '\u{1133c}'),
-        ('\u{11340}', '\u{11340}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}',
-        '\u{11374}'), ('\u{114b3}', '\u{114b8}'), ('\u{114ba}', '\u{114ba}'),
-        ('\u{114bf}', '\u{114c0}'), ('\u{114c2}', '\u{114c3}'), ('\u{115b2}',
-        '\u{115b5}'), ('\u{115bc}', '\u{115bd}'), ('\u{115bf}', '\u{115c0}'),
-        ('\u{115dc}', '\u{115dd}'), ('\u{11633}', '\u{1163a}'), ('\u{1163d}',
-        '\u{1163d}'), ('\u{1163f}', '\u{11640}'), ('\u{116ab}', '\u{116ab}'),
-        ('\u{116ad}', '\u{116ad}'), ('\u{116b0}', '\u{116b5}'), ('\u{116b7}',
-        '\u{116b7}'), ('\u{1171d}', '\u{1171f}'), ('\u{11722}', '\u{11725}'),
-        ('\u{11727}', '\u{1172b}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b30}',
-        '\u{16b36}'), ('\u{16f8f}', '\u{16f92}'), ('\u{1bc9d}', '\u{1bc9e}'),
-        ('\u{1d167}', '\u{1d169}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}',
-        '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), ('\u{1d242}', '\u{1d244}'),
-        ('\u{1da00}', '\u{1da36}'), ('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}',
-        '\u{1da75}'), ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'),
-        ('\u{1daa1}', '\u{1daaf}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{e0100}',
-        '\u{e01ef}')
-    ];
-
-    pub const N_table: &'static [(char, char)] = &[
-        ('\u{30}', '\u{39}'), ('\u{660}', '\u{669}'), ('\u{6f0}', '\u{6f9}'),
-        ('\u{7c0}', '\u{7c9}'), ('\u{966}', '\u{96f}'), ('\u{9e6}', '\u{9ef}'),
-        ('\u{a66}', '\u{a6f}'), ('\u{ae6}', '\u{aef}'), ('\u{b66}', '\u{b6f}'),
-        ('\u{be6}', '\u{bef}'), ('\u{c66}', '\u{c6f}'), ('\u{ce6}', '\u{cef}'),
-        ('\u{d66}', '\u{d6f}'), ('\u{de6}', '\u{def}'), ('\u{e50}', '\u{e59}'),
-        ('\u{ed0}', '\u{ed9}'), ('\u{f20}', '\u{f29}'), ('\u{1040}',
-        '\u{1049}'), ('\u{1090}', '\u{1099}'), ('\u{16ee}', '\u{16f0}'),
-        ('\u{17e0}', '\u{17e9}'), ('\u{1810}', '\u{1819}'), ('\u{1946}',
-        '\u{194f}'), ('\u{19d0}', '\u{19d9}'), ('\u{1a80}', '\u{1a89}'),
-        ('\u{1a90}', '\u{1a99}'), ('\u{1b50}', '\u{1b59}'), ('\u{1bb0}',
-        '\u{1bb9}'), ('\u{1c40}', '\u{1c49}'), ('\u{1c50}', '\u{1c59}'),
-        ('\u{2160}', '\u{2182}'), ('\u{2185}', '\u{2188}'), ('\u{3007}',
-        '\u{3007}'), ('\u{3021}', '\u{3029}'), ('\u{3038}', '\u{303a}'),
-        ('\u{a620}', '\u{a629}'), ('\u{a6e6}', '\u{a6ef}'), ('\u{a8d0}',
-        '\u{a8d9}'), ('\u{a900}', '\u{a909}'), ('\u{a9d0}', '\u{a9d9}'),
-        ('\u{a9f0}', '\u{a9f9}'), ('\u{aa50}', '\u{aa59}'), ('\u{abf0}',
-        '\u{abf9}'), ('\u{ff10}', '\u{ff19}'), ('\u{10140}', '\u{10174}'),
-        ('\u{10341}', '\u{10341}'), ('\u{1034a}', '\u{1034a}'), ('\u{103d1}',
-        '\u{103d5}'), ('\u{104a0}', '\u{104a9}'), ('\u{11066}', '\u{1106f}'),
-        ('\u{110f0}', '\u{110f9}'), ('\u{11136}', '\u{1113f}'), ('\u{111d0}',
-        '\u{111d9}'), ('\u{112f0}', '\u{112f9}'), ('\u{114d0}', '\u{114d9}'),
-        ('\u{11650}', '\u{11659}'), ('\u{116c0}', '\u{116c9}'), ('\u{11730}',
-        '\u{11739}'), ('\u{118e0}', '\u{118e9}'), ('\u{12400}', '\u{1246e}'),
-        ('\u{16a60}', '\u{16a69}'), ('\u{16b50}', '\u{16b59}'), ('\u{1d7ce}',
-        '\u{1d7ff}')
-    ];
-
-    pub const Nd_table: &'static [(char, char)] = &[
-        ('\u{30}', '\u{39}'), ('\u{660}', '\u{669}'), ('\u{6f0}', '\u{6f9}'),
-        ('\u{7c0}', '\u{7c9}'), ('\u{966}', '\u{96f}'), ('\u{9e6}', '\u{9ef}'),
-        ('\u{a66}', '\u{a6f}'), ('\u{ae6}', '\u{aef}'), ('\u{b66}', '\u{b6f}'),
-        ('\u{be6}', '\u{bef}'), ('\u{c66}', '\u{c6f}'), ('\u{ce6}', '\u{cef}'),
-        ('\u{d66}', '\u{d6f}'), ('\u{de6}', '\u{def}'), ('\u{e50}', '\u{e59}'),
-        ('\u{ed0}', '\u{ed9}'), ('\u{f20}', '\u{f29}'), ('\u{1040}',
-        '\u{1049}'), ('\u{1090}', '\u{1099}'), ('\u{17e0}', '\u{17e9}'),
-        ('\u{1810}', '\u{1819}'), ('\u{1946}', '\u{194f}'), ('\u{19d0}',
-        '\u{19d9}'), ('\u{1a80}', '\u{1a89}'), ('\u{1a90}', '\u{1a99}'),
-        ('\u{1b50}', '\u{1b59}'), ('\u{1bb0}', '\u{1bb9}'), ('\u{1c40}',
-        '\u{1c49}'), ('\u{1c50}', '\u{1c59}'), ('\u{a620}', '\u{a629}'),
-        ('\u{a8d0}', '\u{a8d9}'), ('\u{a900}', '\u{a909}'), ('\u{a9d0}',
-        '\u{a9d9}'), ('\u{a9f0}', '\u{a9f9}'), ('\u{aa50}', '\u{aa59}'),
-        ('\u{abf0}', '\u{abf9}'), ('\u{ff10}', '\u{ff19}'), ('\u{104a0}',
-        '\u{104a9}'), ('\u{11066}', '\u{1106f}'), ('\u{110f0}', '\u{110f9}'),
-        ('\u{11136}', '\u{1113f}'), ('\u{111d0}', '\u{111d9}'), ('\u{112f0}',
-        '\u{112f9}'), ('\u{114d0}', '\u{114d9}'), ('\u{11650}', '\u{11659}'),
-        ('\u{116c0}', '\u{116c9}'), ('\u{11730}', '\u{11739}'), ('\u{118e0}',
-        '\u{118e9}'), ('\u{16a60}', '\u{16a69}'), ('\u{16b50}', '\u{16b59}'),
-        ('\u{1d7ce}', '\u{1d7ff}')
-    ];
-
-    pub const Nl_table: &'static [(char, char)] = &[
-        ('\u{16ee}', '\u{16f0}'), ('\u{2160}', '\u{2182}'), ('\u{2185}',
-        '\u{2188}'), ('\u{3007}', '\u{3007}'), ('\u{3021}', '\u{3029}'),
-        ('\u{3038}', '\u{303a}'), ('\u{a6e6}', '\u{a6ef}'), ('\u{10140}',
-        '\u{10174}'), ('\u{10341}', '\u{10341}'), ('\u{1034a}', '\u{1034a}'),
-        ('\u{103d1}', '\u{103d5}'), ('\u{12400}', '\u{1246e}')
-    ];
-
-    pub const No_table: &'static [(char, char)] = &[
-        ('\u{b2}', '\u{b3}'), ('\u{b9}', '\u{b9}'), ('\u{bc}', '\u{be}'),
-        ('\u{9f4}', '\u{9f9}'), ('\u{b72}', '\u{b77}'), ('\u{bf0}', '\u{bf2}'),
-        ('\u{c78}', '\u{c7e}'), ('\u{d70}', '\u{d75}'), ('\u{f2a}', '\u{f33}'),
-        ('\u{1369}', '\u{137c}'), ('\u{17f0}', '\u{17f9}'), ('\u{19da}',
-        '\u{19da}'), ('\u{2070}', '\u{2070}'), ('\u{2074}', '\u{2079}'),
-        ('\u{2080}', '\u{2089}'), ('\u{2150}', '\u{215f}'), ('\u{2189}',
-        '\u{2189}'), ('\u{2460}', '\u{249b}'), ('\u{24ea}', '\u{24ff}'),
-        ('\u{2776}', '\u{2793}'), ('\u{2cfd}', '\u{2cfd}'), ('\u{3192}',
-        '\u{3195}'), ('\u{3220}', '\u{3229}'), ('\u{3248}', '\u{324f}'),
-        ('\u{3251}', '\u{325f}'), ('\u{3280}', '\u{3289}'), ('\u{32b1}',
-        '\u{32bf}'), ('\u{a830}', '\u{a835}'), ('\u{10107}', '\u{10133}'),
-        ('\u{10175}', '\u{10178}'), ('\u{1018a}', '\u{1018b}'), ('\u{102e1}',
-        '\u{102fb}'), ('\u{10320}', '\u{10323}'), ('\u{10858}', '\u{1085f}'),
-        ('\u{10879}', '\u{1087f}'), ('\u{108a7}', '\u{108af}'), ('\u{108fb}',
-        '\u{108ff}'), ('\u{10916}', '\u{1091b}'), ('\u{109bc}', '\u{109bd}'),
-        ('\u{109c0}', '\u{109cf}'), ('\u{109d2}', '\u{109ff}'), ('\u{10a40}',
-        '\u{10a47}'), ('\u{10a7d}', '\u{10a7e}'), ('\u{10a9d}', '\u{10a9f}'),
-        ('\u{10aeb}', '\u{10aef}'), ('\u{10b58}', '\u{10b5f}'), ('\u{10b78}',
-        '\u{10b7f}'), ('\u{10ba9}', '\u{10baf}'), ('\u{10cfa}', '\u{10cff}'),
-        ('\u{10e60}', '\u{10e7e}'), ('\u{11052}', '\u{11065}'), ('\u{111e1}',
-        '\u{111f4}'), ('\u{1173a}', '\u{1173b}'), ('\u{118ea}', '\u{118f2}'),
-        ('\u{16b5b}', '\u{16b61}'), ('\u{1d360}', '\u{1d371}'), ('\u{1e8c7}',
-        '\u{1e8cf}'), ('\u{1f100}', '\u{1f10c}')
-    ];
-
-    pub const P_table: &'static [(char, char)] = &[
-        ('\u{21}', '\u{23}'), ('\u{25}', '\u{2a}'), ('\u{2c}', '\u{2f}'),
-        ('\u{3a}', '\u{3b}'), ('\u{3f}', '\u{40}'), ('\u{5b}', '\u{5d}'),
-        ('\u{5f}', '\u{5f}'), ('\u{7b}', '\u{7b}'), ('\u{7d}', '\u{7d}'),
-        ('\u{a1}', '\u{a1}'), ('\u{a7}', '\u{a7}'), ('\u{ab}', '\u{ab}'),
-        ('\u{b6}', '\u{b7}'), ('\u{bb}', '\u{bb}'), ('\u{bf}', '\u{bf}'),
-        ('\u{37e}', '\u{37e}'), ('\u{387}', '\u{387}'), ('\u{55a}', '\u{55f}'),
-        ('\u{589}', '\u{58a}'), ('\u{5be}', '\u{5be}'), ('\u{5c0}', '\u{5c0}'),
-        ('\u{5c3}', '\u{5c3}'), ('\u{5c6}', '\u{5c6}'), ('\u{5f3}', '\u{5f4}'),
-        ('\u{609}', '\u{60a}'), ('\u{60c}', '\u{60d}'), ('\u{61b}', '\u{61b}'),
-        ('\u{61e}', '\u{61f}'), ('\u{66a}', '\u{66d}'), ('\u{6d4}', '\u{6d4}'),
-        ('\u{700}', '\u{70d}'), ('\u{7f7}', '\u{7f9}'), ('\u{830}', '\u{83e}'),
-        ('\u{85e}', '\u{85e}'), ('\u{964}', '\u{965}'), ('\u{970}', '\u{970}'),
-        ('\u{af0}', '\u{af0}'), ('\u{df4}', '\u{df4}'), ('\u{e4f}', '\u{e4f}'),
-        ('\u{e5a}', '\u{e5b}'), ('\u{f04}', '\u{f12}'), ('\u{f14}', '\u{f14}'),
-        ('\u{f3a}', '\u{f3d}'), ('\u{f85}', '\u{f85}'), ('\u{fd0}', '\u{fd4}'),
-        ('\u{fd9}', '\u{fda}'), ('\u{104a}', '\u{104f}'), ('\u{10fb}',
-        '\u{10fb}'), ('\u{1360}', '\u{1368}'), ('\u{1400}', '\u{1400}'),
-        ('\u{166d}', '\u{166e}'), ('\u{169b}', '\u{169c}'), ('\u{16eb}',
-        '\u{16ed}'), ('\u{1735}', '\u{1736}'), ('\u{17d4}', '\u{17d6}'),
-        ('\u{17d8}', '\u{17da}'), ('\u{1800}', '\u{180a}'), ('\u{1944}',
-        '\u{1945}'), ('\u{1a1e}', '\u{1a1f}'), ('\u{1aa0}', '\u{1aa6}'),
-        ('\u{1aa8}', '\u{1aad}'), ('\u{1b5a}', '\u{1b60}'), ('\u{1bfc}',
-        '\u{1bff}'), ('\u{1c3b}', '\u{1c3f}'), ('\u{1c7e}', '\u{1c7f}'),
-        ('\u{1cc0}', '\u{1cc7}'), ('\u{1cd3}', '\u{1cd3}'), ('\u{2010}',
-        '\u{2027}'), ('\u{2030}', '\u{2043}'), ('\u{2045}', '\u{2051}'),
-        ('\u{2053}', '\u{205e}'), ('\u{207d}', '\u{207e}'), ('\u{208d}',
-        '\u{208e}'), ('\u{2308}', '\u{230b}'), ('\u{2329}', '\u{232a}'),
-        ('\u{2768}', '\u{2775}'), ('\u{27c5}', '\u{27c6}'), ('\u{27e6}',
-        '\u{27ef}'), ('\u{2983}', '\u{2998}'), ('\u{29d8}', '\u{29db}'),
-        ('\u{29fc}', '\u{29fd}'), ('\u{2cf9}', '\u{2cfc}'), ('\u{2cfe}',
-        '\u{2cff}'), ('\u{2d70}', '\u{2d70}'), ('\u{2e00}', '\u{2e2e}'),
-        ('\u{2e30}', '\u{2e42}'), ('\u{3001}', '\u{3003}'), ('\u{3008}',
-        '\u{3011}'), ('\u{3014}', '\u{301f}'), ('\u{3030}', '\u{3030}'),
-        ('\u{303d}', '\u{303d}'), ('\u{30a0}', '\u{30a0}'), ('\u{30fb}',
-        '\u{30fb}'), ('\u{a4fe}', '\u{a4ff}'), ('\u{a60d}', '\u{a60f}'),
-        ('\u{a673}', '\u{a673}'), ('\u{a67e}', '\u{a67e}'), ('\u{a6f2}',
-        '\u{a6f7}'), ('\u{a874}', '\u{a877}'), ('\u{a8ce}', '\u{a8cf}'),
-        ('\u{a8f8}', '\u{a8fa}'), ('\u{a8fc}', '\u{a8fc}'), ('\u{a92e}',
-        '\u{a92f}'), ('\u{a95f}', '\u{a95f}'), ('\u{a9c1}', '\u{a9cd}'),
-        ('\u{a9de}', '\u{a9df}'), ('\u{aa5c}', '\u{aa5f}'), ('\u{aade}',
-        '\u{aadf}'), ('\u{aaf0}', '\u{aaf1}'), ('\u{abeb}', '\u{abeb}'),
-        ('\u{fd3e}', '\u{fd3f}'), ('\u{fe10}', '\u{fe19}'), ('\u{fe30}',
-        '\u{fe52}'), ('\u{fe54}', '\u{fe61}'), ('\u{fe63}', '\u{fe63}'),
-        ('\u{fe68}', '\u{fe68}'), ('\u{fe6a}', '\u{fe6b}'), ('\u{ff01}',
-        '\u{ff03}'), ('\u{ff05}', '\u{ff0a}'), ('\u{ff0c}', '\u{ff0f}'),
-        ('\u{ff1a}', '\u{ff1b}'), ('\u{ff1f}', '\u{ff20}'), ('\u{ff3b}',
-        '\u{ff3d}'), ('\u{ff3f}', '\u{ff3f}'), ('\u{ff5b}', '\u{ff5b}'),
-        ('\u{ff5d}', '\u{ff5d}'), ('\u{ff5f}', '\u{ff65}'), ('\u{10100}',
-        '\u{10102}'), ('\u{1039f}', '\u{1039f}'), ('\u{103d0}', '\u{103d0}'),
-        ('\u{1056f}', '\u{1056f}'), ('\u{10857}', '\u{10857}'), ('\u{1091f}',
-        '\u{1091f}'), ('\u{1093f}', '\u{1093f}'), ('\u{10a50}', '\u{10a58}'),
-        ('\u{10a7f}', '\u{10a7f}'), ('\u{10af0}', '\u{10af6}'), ('\u{10b39}',
-        '\u{10b3f}'), ('\u{10b99}', '\u{10b9c}'), ('\u{11047}', '\u{1104d}'),
-        ('\u{110bb}', '\u{110bc}'), ('\u{110be}', '\u{110c1}'), ('\u{11140}',
-        '\u{11143}'), ('\u{11174}', '\u{11175}'), ('\u{111c5}', '\u{111c9}'),
-        ('\u{111cd}', '\u{111cd}'), ('\u{111db}', '\u{111db}'), ('\u{111dd}',
-        '\u{111df}'), ('\u{11238}', '\u{1123d}'), ('\u{112a9}', '\u{112a9}'),
-        ('\u{114c6}', '\u{114c6}'), ('\u{115c1}', '\u{115d7}'), ('\u{11641}',
-        '\u{11643}'), ('\u{1173c}', '\u{1173e}'), ('\u{12470}', '\u{12474}'),
-        ('\u{16a6e}', '\u{16a6f}'), ('\u{16af5}', '\u{16af5}'), ('\u{16b37}',
-        '\u{16b3b}'), ('\u{16b44}', '\u{16b44}'), ('\u{1bc9f}', '\u{1bc9f}'),
-        ('\u{1da87}', '\u{1da8b}')
-    ];
-
-    pub const Pc_table: &'static [(char, char)] = &[
-        ('\u{5f}', '\u{5f}'), ('\u{203f}', '\u{2040}'), ('\u{2054}',
-        '\u{2054}'), ('\u{fe33}', '\u{fe34}'), ('\u{fe4d}', '\u{fe4f}'),
-        ('\u{ff3f}', '\u{ff3f}')
-    ];
-
-    pub const Pd_table: &'static [(char, char)] = &[
-        ('\u{2d}', '\u{2d}'), ('\u{58a}', '\u{58a}'), ('\u{5be}', '\u{5be}'),
-        ('\u{1400}', '\u{1400}'), ('\u{1806}', '\u{1806}'), ('\u{2010}',
-        '\u{2015}'), ('\u{2e17}', '\u{2e17}'), ('\u{2e1a}', '\u{2e1a}'),
-        ('\u{2e3a}', '\u{2e3b}'), ('\u{2e40}', '\u{2e40}'), ('\u{301c}',
-        '\u{301c}'), ('\u{3030}', '\u{3030}'), ('\u{30a0}', '\u{30a0}'),
-        ('\u{fe31}', '\u{fe32}'), ('\u{fe58}', '\u{fe58}'), ('\u{fe63}',
-        '\u{fe63}'), ('\u{ff0d}', '\u{ff0d}')
-    ];
-
-    pub const Pe_table: &'static [(char, char)] = &[
-        ('\u{29}', '\u{29}'), ('\u{5d}', '\u{5d}'), ('\u{7d}', '\u{7d}'),
-        ('\u{f3b}', '\u{f3b}'), ('\u{f3d}', '\u{f3d}'), ('\u{169c}',
-        '\u{169c}'), ('\u{2046}', '\u{2046}'), ('\u{207e}', '\u{207e}'),
-        ('\u{208e}', '\u{208e}'), ('\u{2309}', '\u{2309}'), ('\u{230b}',
-        '\u{230b}'), ('\u{232a}', '\u{232a}'), ('\u{2769}', '\u{2769}'),
-        ('\u{276b}', '\u{276b}'), ('\u{276d}', '\u{276d}'), ('\u{276f}',
-        '\u{276f}'), ('\u{2771}', '\u{2771}'), ('\u{2773}', '\u{2773}'),
-        ('\u{2775}', '\u{2775}'), ('\u{27c6}', '\u{27c6}'), ('\u{27e7}',
-        '\u{27e7}'), ('\u{27e9}', '\u{27e9}'), ('\u{27eb}', '\u{27eb}'),
-        ('\u{27ed}', '\u{27ed}'), ('\u{27ef}', '\u{27ef}'), ('\u{2984}',
-        '\u{2984}'), ('\u{2986}', '\u{2986}'), ('\u{2988}', '\u{2988}'),
-        ('\u{298a}', '\u{298a}'), ('\u{298c}', '\u{298c}'), ('\u{298e}',
-        '\u{298e}'), ('\u{2990}', '\u{2990}'), ('\u{2992}', '\u{2992}'),
-        ('\u{2994}', '\u{2994}'), ('\u{2996}', '\u{2996}'), ('\u{2998}',
-        '\u{2998}'), ('\u{29d9}', '\u{29d9}'), ('\u{29db}', '\u{29db}'),
-        ('\u{29fd}', '\u{29fd}'), ('\u{2e23}', '\u{2e23}'), ('\u{2e25}',
-        '\u{2e25}'), ('\u{2e27}', '\u{2e27}'), ('\u{2e29}', '\u{2e29}'),
-        ('\u{3009}', '\u{3009}'), ('\u{300b}', '\u{300b}'), ('\u{300d}',
-        '\u{300d}'), ('\u{300f}', '\u{300f}'), ('\u{3011}', '\u{3011}'),
-        ('\u{3015}', '\u{3015}'), ('\u{3017}', '\u{3017}'), ('\u{3019}',
-        '\u{3019}'), ('\u{301b}', '\u{301b}'), ('\u{301e}', '\u{301f}'),
-        ('\u{fd3e}', '\u{fd3e}'), ('\u{fe18}', '\u{fe18}'), ('\u{fe36}',
-        '\u{fe36}'), ('\u{fe38}', '\u{fe38}'), ('\u{fe3a}', '\u{fe3a}'),
-        ('\u{fe3c}', '\u{fe3c}'), ('\u{fe3e}', '\u{fe3e}'), ('\u{fe40}',
-        '\u{fe40}'), ('\u{fe42}', '\u{fe42}'), ('\u{fe44}', '\u{fe44}'),
-        ('\u{fe48}', '\u{fe48}'), ('\u{fe5a}', '\u{fe5a}'), ('\u{fe5c}',
-        '\u{fe5c}'), ('\u{fe5e}', '\u{fe5e}'), ('\u{ff09}', '\u{ff09}'),
-        ('\u{ff3d}', '\u{ff3d}'), ('\u{ff5d}', '\u{ff5d}'), ('\u{ff60}',
-        '\u{ff60}'), ('\u{ff63}', '\u{ff63}')
-    ];
-
-    pub const Pf_table: &'static [(char, char)] = &[
-        ('\u{bb}', '\u{bb}'), ('\u{2019}', '\u{2019}'), ('\u{201d}',
-        '\u{201d}'), ('\u{203a}', '\u{203a}'), ('\u{2e03}', '\u{2e03}'),
-        ('\u{2e05}', '\u{2e05}'), ('\u{2e0a}', '\u{2e0a}'), ('\u{2e0d}',
-        '\u{2e0d}'), ('\u{2e1d}', '\u{2e1d}'), ('\u{2e21}', '\u{2e21}')
-    ];
-
-    pub const Pi_table: &'static [(char, char)] = &[
-        ('\u{ab}', '\u{ab}'), ('\u{2018}', '\u{2018}'), ('\u{201b}',
-        '\u{201c}'), ('\u{201f}', '\u{201f}'), ('\u{2039}', '\u{2039}'),
-        ('\u{2e02}', '\u{2e02}'), ('\u{2e04}', '\u{2e04}'), ('\u{2e09}',
-        '\u{2e09}'), ('\u{2e0c}', '\u{2e0c}'), ('\u{2e1c}', '\u{2e1c}'),
-        ('\u{2e20}', '\u{2e20}')
-    ];
+                let canon_name = match canonical_prop(&property_name) {
+                    None => return Err(Error::PropertyNotFound),
+                    Some(canon_name) => canon_name,
+                };
+                Ok(match canon_name {
+                    "General_Category" => {
+                        let canon = match canonical_gencat(&property_value) {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(canon) => canon,
+                        };
+                        CanonicalClassQuery::GeneralCategory(canon)
+                    }
+                    "Script" => {
+                        let canon = match canonical_script(&property_value) {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(canon) => canon,
+                        };
+                        CanonicalClassQuery::Script(canon)
+                    }
+                    _ => {
+                        let vals = match property_values(canon_name) {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(vals) => vals,
+                        };
+                        let canon_val = match canonical_value(
+                            vals,
+                            &property_value,
+                        ) {
+                            None => return Err(Error::PropertyValueNotFound),
+                            Some(canon_val) => canon_val,
+                        };
+                        CanonicalClassQuery::ByValue {
+                            property_name: canon_name,
+                            property_value: canon_val,
+                        }
+                    }
+                })
+            }
+        }
+    }
 
-    pub const Po_table: &'static [(char, char)] = &[
-        ('\u{21}', '\u{23}'), ('\u{25}', '\u{27}'), ('\u{2a}', '\u{2a}'),
-        ('\u{2c}', '\u{2c}'), ('\u{2e}', '\u{2f}'), ('\u{3a}', '\u{3b}'),
-        ('\u{3f}', '\u{40}'), ('\u{5c}', '\u{5c}'), ('\u{a1}', '\u{a1}'),
-        ('\u{a7}', '\u{a7}'), ('\u{b6}', '\u{b7}'), ('\u{bf}', '\u{bf}'),
-        ('\u{37e}', '\u{37e}'), ('\u{387}', '\u{387}'), ('\u{55a}', '\u{55f}'),
-        ('\u{589}', '\u{589}'), ('\u{5c0}', '\u{5c0}'), ('\u{5c3}', '\u{5c3}'),
-        ('\u{5c6}', '\u{5c6}'), ('\u{5f3}', '\u{5f4}'), ('\u{609}', '\u{60a}'),
-        ('\u{60c}', '\u{60d}'), ('\u{61b}', '\u{61b}'), ('\u{61e}', '\u{61f}'),
-        ('\u{66a}', '\u{66d}'), ('\u{6d4}', '\u{6d4}'), ('\u{700}', '\u{70d}'),
-        ('\u{7f7}', '\u{7f9}'), ('\u{830}', '\u{83e}'), ('\u{85e}', '\u{85e}'),
-        ('\u{964}', '\u{965}'), ('\u{970}', '\u{970}'), ('\u{af0}', '\u{af0}'),
-        ('\u{df4}', '\u{df4}'), ('\u{e4f}', '\u{e4f}'), ('\u{e5a}', '\u{e5b}'),
-        ('\u{f04}', '\u{f12}'), ('\u{f14}', '\u{f14}'), ('\u{f85}', '\u{f85}'),
-        ('\u{fd0}', '\u{fd4}'), ('\u{fd9}', '\u{fda}'), ('\u{104a}',
-        '\u{104f}'), ('\u{10fb}', '\u{10fb}'), ('\u{1360}', '\u{1368}'),
-        ('\u{166d}', '\u{166e}'), ('\u{16eb}', '\u{16ed}'), ('\u{1735}',
-        '\u{1736}'), ('\u{17d4}', '\u{17d6}'), ('\u{17d8}', '\u{17da}'),
-        ('\u{1800}', '\u{1805}'), ('\u{1807}', '\u{180a}'), ('\u{1944}',
-        '\u{1945}'), ('\u{1a1e}', '\u{1a1f}'), ('\u{1aa0}', '\u{1aa6}'),
-        ('\u{1aa8}', '\u{1aad}'), ('\u{1b5a}', '\u{1b60}'), ('\u{1bfc}',
-        '\u{1bff}'), ('\u{1c3b}', '\u{1c3f}'), ('\u{1c7e}', '\u{1c7f}'),
-        ('\u{1cc0}', '\u{1cc7}'), ('\u{1cd3}', '\u{1cd3}'), ('\u{2016}',
-        '\u{2017}'), ('\u{2020}', '\u{2027}'), ('\u{2030}', '\u{2038}'),
-        ('\u{203b}', '\u{203e}'), ('\u{2041}', '\u{2043}'), ('\u{2047}',
-        '\u{2051}'), ('\u{2053}', '\u{2053}'), ('\u{2055}', '\u{205e}'),
-        ('\u{2cf9}', '\u{2cfc}'), ('\u{2cfe}', '\u{2cff}'), ('\u{2d70}',
-        '\u{2d70}'), ('\u{2e00}', '\u{2e01}'), ('\u{2e06}', '\u{2e08}'),
-        ('\u{2e0b}', '\u{2e0b}'), ('\u{2e0e}', '\u{2e16}'), ('\u{2e18}',
-        '\u{2e19}'), ('\u{2e1b}', '\u{2e1b}'), ('\u{2e1e}', '\u{2e1f}'),
-        ('\u{2e2a}', '\u{2e2e}'), ('\u{2e30}', '\u{2e39}'), ('\u{2e3c}',
-        '\u{2e3f}'), ('\u{2e41}', '\u{2e41}'), ('\u{3001}', '\u{3003}'),
-        ('\u{303d}', '\u{303d}'), ('\u{30fb}', '\u{30fb}'), ('\u{a4fe}',
-        '\u{a4ff}'), ('\u{a60d}', '\u{a60f}'), ('\u{a673}', '\u{a673}'),
-        ('\u{a67e}', '\u{a67e}'), ('\u{a6f2}', '\u{a6f7}'), ('\u{a874}',
-        '\u{a877}'), ('\u{a8ce}', '\u{a8cf}'), ('\u{a8f8}', '\u{a8fa}'),
-        ('\u{a8fc}', '\u{a8fc}'), ('\u{a92e}', '\u{a92f}'), ('\u{a95f}',
-        '\u{a95f}'), ('\u{a9c1}', '\u{a9cd}'), ('\u{a9de}', '\u{a9df}'),
-        ('\u{aa5c}', '\u{aa5f}'), ('\u{aade}', '\u{aadf}'), ('\u{aaf0}',
-        '\u{aaf1}'), ('\u{abeb}', '\u{abeb}'), ('\u{fe10}', '\u{fe16}'),
-        ('\u{fe19}', '\u{fe19}'), ('\u{fe30}', '\u{fe30}'), ('\u{fe45}',
-        '\u{fe46}'), ('\u{fe49}', '\u{fe4c}'), ('\u{fe50}', '\u{fe52}'),
-        ('\u{fe54}', '\u{fe57}'), ('\u{fe5f}', '\u{fe61}'), ('\u{fe68}',
-        '\u{fe68}'), ('\u{fe6a}', '\u{fe6b}'), ('\u{ff01}', '\u{ff03}'),
-        ('\u{ff05}', '\u{ff07}'), ('\u{ff0a}', '\u{ff0a}'), ('\u{ff0c}',
-        '\u{ff0c}'), ('\u{ff0e}', '\u{ff0f}'), ('\u{ff1a}', '\u{ff1b}'),
-        ('\u{ff1f}', '\u{ff20}'), ('\u{ff3c}', '\u{ff3c}'), ('\u{ff61}',
-        '\u{ff61}'), ('\u{ff64}', '\u{ff65}'), ('\u{10100}', '\u{10102}'),
-        ('\u{1039f}', '\u{1039f}'), ('\u{103d0}', '\u{103d0}'), ('\u{1056f}',
-        '\u{1056f}'), ('\u{10857}', '\u{10857}'), ('\u{1091f}', '\u{1091f}'),
-        ('\u{1093f}', '\u{1093f}'), ('\u{10a50}', '\u{10a58}'), ('\u{10a7f}',
-        '\u{10a7f}'), ('\u{10af0}', '\u{10af6}'), ('\u{10b39}', '\u{10b3f}'),
-        ('\u{10b99}', '\u{10b9c}'), ('\u{11047}', '\u{1104d}'), ('\u{110bb}',
-        '\u{110bc}'), ('\u{110be}', '\u{110c1}'), ('\u{11140}', '\u{11143}'),
-        ('\u{11174}', '\u{11175}'), ('\u{111c5}', '\u{111c9}'), ('\u{111cd}',
-        '\u{111cd}'), ('\u{111db}', '\u{111db}'), ('\u{111dd}', '\u{111df}'),
-        ('\u{11238}', '\u{1123d}'), ('\u{112a9}', '\u{112a9}'), ('\u{114c6}',
-        '\u{114c6}'), ('\u{115c1}', '\u{115d7}'), ('\u{11641}', '\u{11643}'),
-        ('\u{1173c}', '\u{1173e}'), ('\u{12470}', '\u{12474}'), ('\u{16a6e}',
-        '\u{16a6f}'), ('\u{16af5}', '\u{16af5}'), ('\u{16b37}', '\u{16b3b}'),
-        ('\u{16b44}', '\u{16b44}'), ('\u{1bc9f}', '\u{1bc9f}'), ('\u{1da87}',
-        '\u{1da8b}')
-    ];
+    fn canonical_binary(&self, name: &str) -> Result<CanonicalClassQuery> {
+        let norm = normalize(name);
 
-    pub const Ps_table: &'static [(char, char)] = &[
-        ('\u{28}', '\u{28}'), ('\u{5b}', '\u{5b}'), ('\u{7b}', '\u{7b}'),
-        ('\u{f3a}', '\u{f3a}'), ('\u{f3c}', '\u{f3c}'), ('\u{169b}',
-        '\u{169b}'), ('\u{201a}', '\u{201a}'), ('\u{201e}', '\u{201e}'),
-        ('\u{2045}', '\u{2045}'), ('\u{207d}', '\u{207d}'), ('\u{208d}',
-        '\u{208d}'), ('\u{2308}', '\u{2308}'), ('\u{230a}', '\u{230a}'),
-        ('\u{2329}', '\u{2329}'), ('\u{2768}', '\u{2768}'), ('\u{276a}',
-        '\u{276a}'), ('\u{276c}', '\u{276c}'), ('\u{276e}', '\u{276e}'),
-        ('\u{2770}', '\u{2770}'), ('\u{2772}', '\u{2772}'), ('\u{2774}',
-        '\u{2774}'), ('\u{27c5}', '\u{27c5}'), ('\u{27e6}', '\u{27e6}'),
-        ('\u{27e8}', '\u{27e8}'), ('\u{27ea}', '\u{27ea}'), ('\u{27ec}',
-        '\u{27ec}'), ('\u{27ee}', '\u{27ee}'), ('\u{2983}', '\u{2983}'),
-        ('\u{2985}', '\u{2985}'), ('\u{2987}', '\u{2987}'), ('\u{2989}',
-        '\u{2989}'), ('\u{298b}', '\u{298b}'), ('\u{298d}', '\u{298d}'),
-        ('\u{298f}', '\u{298f}'), ('\u{2991}', '\u{2991}'), ('\u{2993}',
-        '\u{2993}'), ('\u{2995}', '\u{2995}'), ('\u{2997}', '\u{2997}'),
-        ('\u{29d8}', '\u{29d8}'), ('\u{29da}', '\u{29da}'), ('\u{29fc}',
-        '\u{29fc}'), ('\u{2e22}', '\u{2e22}'), ('\u{2e24}', '\u{2e24}'),
-        ('\u{2e26}', '\u{2e26}'), ('\u{2e28}', '\u{2e28}'), ('\u{2e42}',
-        '\u{2e42}'), ('\u{3008}', '\u{3008}'), ('\u{300a}', '\u{300a}'),
-        ('\u{300c}', '\u{300c}'), ('\u{300e}', '\u{300e}'), ('\u{3010}',
-        '\u{3010}'), ('\u{3014}', '\u{3014}'), ('\u{3016}', '\u{3016}'),
-        ('\u{3018}', '\u{3018}'), ('\u{301a}', '\u{301a}'), ('\u{301d}',
-        '\u{301d}'), ('\u{fd3f}', '\u{fd3f}'), ('\u{fe17}', '\u{fe17}'),
-        ('\u{fe35}', '\u{fe35}'), ('\u{fe37}', '\u{fe37}'), ('\u{fe39}',
-        '\u{fe39}'), ('\u{fe3b}', '\u{fe3b}'), ('\u{fe3d}', '\u{fe3d}'),
-        ('\u{fe3f}', '\u{fe3f}'), ('\u{fe41}', '\u{fe41}'), ('\u{fe43}',
-        '\u{fe43}'), ('\u{fe47}', '\u{fe47}'), ('\u{fe59}', '\u{fe59}'),
-        ('\u{fe5b}', '\u{fe5b}'), ('\u{fe5d}', '\u{fe5d}'), ('\u{ff08}',
-        '\u{ff08}'), ('\u{ff3b}', '\u{ff3b}'), ('\u{ff5b}', '\u{ff5b}'),
-        ('\u{ff5f}', '\u{ff5f}'), ('\u{ff62}', '\u{ff62}')
-    ];
+        if let Some(canon) = canonical_prop(&norm) {
+            return Ok(CanonicalClassQuery::Binary(canon));
+        }
+        if let Some(canon) = canonical_gencat(&norm) {
+            return Ok(CanonicalClassQuery::GeneralCategory(canon));
+        }
+        if let Some(canon) = canonical_script(&norm) {
+            return Ok(CanonicalClassQuery::Script(canon));
+        }
+        Err(Error::PropertyNotFound)
+    }
+}
 
-    pub const S_table: &'static [(char, char)] = &[
-        ('\u{24}', '\u{24}'), ('\u{2b}', '\u{2b}'), ('\u{3c}', '\u{3e}'),
-        ('\u{5e}', '\u{5e}'), ('\u{60}', '\u{60}'), ('\u{7c}', '\u{7c}'),
-        ('\u{7e}', '\u{7e}'), ('\u{a2}', '\u{a6}'), ('\u{a8}', '\u{a9}'),
-        ('\u{ac}', '\u{ac}'), ('\u{ae}', '\u{b1}'), ('\u{b4}', '\u{b4}'),
-        ('\u{b8}', '\u{b8}'), ('\u{d7}', '\u{d7}'), ('\u{f7}', '\u{f7}'),
-        ('\u{2c2}', '\u{2c5}'), ('\u{2d2}', '\u{2df}'), ('\u{2e5}', '\u{2eb}'),
-        ('\u{2ed}', '\u{2ed}'), ('\u{2ef}', '\u{2ff}'), ('\u{375}', '\u{375}'),
-        ('\u{384}', '\u{385}'), ('\u{3f6}', '\u{3f6}'), ('\u{482}', '\u{482}'),
-        ('\u{58d}', '\u{58f}'), ('\u{606}', '\u{608}'), ('\u{60b}', '\u{60b}'),
-        ('\u{60e}', '\u{60f}'), ('\u{6de}', '\u{6de}'), ('\u{6e9}', '\u{6e9}'),
-        ('\u{6fd}', '\u{6fe}'), ('\u{7f6}', '\u{7f6}'), ('\u{9f2}', '\u{9f3}'),
-        ('\u{9fa}', '\u{9fb}'), ('\u{af1}', '\u{af1}'), ('\u{b70}', '\u{b70}'),
-        ('\u{bf3}', '\u{bfa}'), ('\u{c7f}', '\u{c7f}'), ('\u{d79}', '\u{d79}'),
-        ('\u{e3f}', '\u{e3f}'), ('\u{f01}', '\u{f03}'), ('\u{f13}', '\u{f13}'),
-        ('\u{f15}', '\u{f17}'), ('\u{f1a}', '\u{f1f}'), ('\u{f34}', '\u{f34}'),
-        ('\u{f36}', '\u{f36}'), ('\u{f38}', '\u{f38}'), ('\u{fbe}', '\u{fc5}'),
-        ('\u{fc7}', '\u{fcc}'), ('\u{fce}', '\u{fcf}'), ('\u{fd5}', '\u{fd8}'),
-        ('\u{109e}', '\u{109f}'), ('\u{1390}', '\u{1399}'), ('\u{17db}',
-        '\u{17db}'), ('\u{1940}', '\u{1940}'), ('\u{19de}', '\u{19ff}'),
-        ('\u{1b61}', '\u{1b6a}'), ('\u{1b74}', '\u{1b7c}'), ('\u{1fbd}',
-        '\u{1fbd}'), ('\u{1fbf}', '\u{1fc1}'), ('\u{1fcd}', '\u{1fcf}'),
-        ('\u{1fdd}', '\u{1fdf}'), ('\u{1fed}', '\u{1fef}'), ('\u{1ffd}',
-        '\u{1ffe}'), ('\u{2044}', '\u{2044}'), ('\u{2052}', '\u{2052}'),
-        ('\u{207a}', '\u{207c}'), ('\u{208a}', '\u{208c}'), ('\u{20a0}',
-        '\u{20be}'), ('\u{2100}', '\u{2101}'), ('\u{2103}', '\u{2106}'),
-        ('\u{2108}', '\u{2109}'), ('\u{2114}', '\u{2114}'), ('\u{2116}',
-        '\u{2118}'), ('\u{211e}', '\u{2123}'), ('\u{2125}', '\u{2125}'),
-        ('\u{2127}', '\u{2127}'), ('\u{2129}', '\u{2129}'), ('\u{212e}',
-        '\u{212e}'), ('\u{213a}', '\u{213b}'), ('\u{2140}', '\u{2144}'),
-        ('\u{214a}', '\u{214d}'), ('\u{214f}', '\u{214f}'), ('\u{218a}',
-        '\u{218b}'), ('\u{2190}', '\u{2307}'), ('\u{230c}', '\u{2328}'),
-        ('\u{232b}', '\u{23fa}'), ('\u{2400}', '\u{2426}'), ('\u{2440}',
-        '\u{244a}'), ('\u{249c}', '\u{24e9}'), ('\u{2500}', '\u{2767}'),
-        ('\u{2794}', '\u{27c4}'), ('\u{27c7}', '\u{27e5}'), ('\u{27f0}',
-        '\u{2982}'), ('\u{2999}', '\u{29d7}'), ('\u{29dc}', '\u{29fb}'),
-        ('\u{29fe}', '\u{2b73}'), ('\u{2b76}', '\u{2b95}'), ('\u{2b98}',
-        '\u{2bb9}'), ('\u{2bbd}', '\u{2bc8}'), ('\u{2bca}', '\u{2bd1}'),
-        ('\u{2bec}', '\u{2bef}'), ('\u{2ce5}', '\u{2cea}'), ('\u{2e80}',
-        '\u{2e99}'), ('\u{2e9b}', '\u{2ef3}'), ('\u{2f00}', '\u{2fd5}'),
-        ('\u{2ff0}', '\u{2ffb}'), ('\u{3004}', '\u{3004}'), ('\u{3012}',
-        '\u{3013}'), ('\u{3020}', '\u{3020}'), ('\u{3036}', '\u{3037}'),
-        ('\u{303e}', '\u{303f}'), ('\u{309b}', '\u{309c}'), ('\u{3190}',
-        '\u{3191}'), ('\u{3196}', '\u{319f}'), ('\u{31c0}', '\u{31e3}'),
-        ('\u{3200}', '\u{321e}'), ('\u{322a}', '\u{3247}'), ('\u{3250}',
-        '\u{3250}'), ('\u{3260}', '\u{327f}'), ('\u{328a}', '\u{32b0}'),
-        ('\u{32c0}', '\u{32fe}'), ('\u{3300}', '\u{33ff}'), ('\u{4dc0}',
-        '\u{4dff}'), ('\u{a490}', '\u{a4c6}'), ('\u{a700}', '\u{a716}'),
-        ('\u{a720}', '\u{a721}'), ('\u{a789}', '\u{a78a}'), ('\u{a828}',
-        '\u{a82b}'), ('\u{a836}', '\u{a839}'), ('\u{aa77}', '\u{aa79}'),
-        ('\u{ab5b}', '\u{ab5b}'), ('\u{fb29}', '\u{fb29}'), ('\u{fbb2}',
-        '\u{fbc1}'), ('\u{fdfc}', '\u{fdfd}'), ('\u{fe62}', '\u{fe62}'),
-        ('\u{fe64}', '\u{fe66}'), ('\u{fe69}', '\u{fe69}'), ('\u{ff04}',
-        '\u{ff04}'), ('\u{ff0b}', '\u{ff0b}'), ('\u{ff1c}', '\u{ff1e}'),
-        ('\u{ff3e}', '\u{ff3e}'), ('\u{ff40}', '\u{ff40}'), ('\u{ff5c}',
-        '\u{ff5c}'), ('\u{ff5e}', '\u{ff5e}'), ('\u{ffe0}', '\u{ffe6}'),
-        ('\u{ffe8}', '\u{ffee}'), ('\u{fffc}', '\u{fffd}'), ('\u{10137}',
-        '\u{1013f}'), ('\u{10179}', '\u{10189}'), ('\u{1018c}', '\u{1018c}'),
-        ('\u{10190}', '\u{1019b}'), ('\u{101a0}', '\u{101a0}'), ('\u{101d0}',
-        '\u{101fc}'), ('\u{10877}', '\u{10878}'), ('\u{10ac8}', '\u{10ac8}'),
-        ('\u{1173f}', '\u{1173f}'), ('\u{16b3c}', '\u{16b3f}'), ('\u{16b45}',
-        '\u{16b45}'), ('\u{1bc9c}', '\u{1bc9c}'), ('\u{1d000}', '\u{1d0f5}'),
-        ('\u{1d100}', '\u{1d126}'), ('\u{1d129}', '\u{1d164}'), ('\u{1d16a}',
-        '\u{1d16c}'), ('\u{1d183}', '\u{1d184}'), ('\u{1d18c}', '\u{1d1a9}'),
-        ('\u{1d1ae}', '\u{1d1e8}'), ('\u{1d200}', '\u{1d241}'), ('\u{1d245}',
-        '\u{1d245}'), ('\u{1d300}', '\u{1d356}'), ('\u{1d6c1}', '\u{1d6c1}'),
-        ('\u{1d6db}', '\u{1d6db}'), ('\u{1d6fb}', '\u{1d6fb}'), ('\u{1d715}',
-        '\u{1d715}'), ('\u{1d735}', '\u{1d735}'), ('\u{1d74f}', '\u{1d74f}'),
-        ('\u{1d76f}', '\u{1d76f}'), ('\u{1d789}', '\u{1d789}'), ('\u{1d7a9}',
-        '\u{1d7a9}'), ('\u{1d7c3}', '\u{1d7c3}'), ('\u{1d800}', '\u{1d9ff}'),
-        ('\u{1da37}', '\u{1da3a}'), ('\u{1da6d}', '\u{1da74}'), ('\u{1da76}',
-        '\u{1da83}'), ('\u{1da85}', '\u{1da86}'), ('\u{1eef0}', '\u{1eef1}'),
-        ('\u{1f000}', '\u{1f02b}'), ('\u{1f030}', '\u{1f093}'), ('\u{1f0a0}',
-        '\u{1f0ae}'), ('\u{1f0b1}', '\u{1f0bf}'), ('\u{1f0c1}', '\u{1f0cf}'),
-        ('\u{1f0d1}', '\u{1f0f5}'), ('\u{1f110}', '\u{1f12e}'), ('\u{1f130}',
-        '\u{1f16b}'), ('\u{1f170}', '\u{1f19a}'), ('\u{1f1e6}', '\u{1f202}'),
-        ('\u{1f210}', '\u{1f23a}'), ('\u{1f240}', '\u{1f248}'), ('\u{1f250}',
-        '\u{1f251}'), ('\u{1f300}', '\u{1f579}'), ('\u{1f57b}', '\u{1f5a3}'),
-        ('\u{1f5a5}', '\u{1f6d0}'), ('\u{1f6e0}', '\u{1f6ec}'), ('\u{1f6f0}',
-        '\u{1f6f3}'), ('\u{1f700}', '\u{1f773}'), ('\u{1f780}', '\u{1f7d4}'),
-        ('\u{1f800}', '\u{1f80b}'), ('\u{1f810}', '\u{1f847}'), ('\u{1f850}',
-        '\u{1f859}'), ('\u{1f860}', '\u{1f887}'), ('\u{1f890}', '\u{1f8ad}'),
-        ('\u{1f910}', '\u{1f918}'), ('\u{1f980}', '\u{1f984}'), ('\u{1f9c0}',
-        '\u{1f9c0}')
-    ];
-
-    pub const Sc_table: &'static [(char, char)] = &[
-        ('\u{24}', '\u{24}'), ('\u{a2}', '\u{a5}'), ('\u{58f}', '\u{58f}'),
-        ('\u{60b}', '\u{60b}'), ('\u{9f2}', '\u{9f3}'), ('\u{9fb}', '\u{9fb}'),
-        ('\u{af1}', '\u{af1}'), ('\u{bf9}', '\u{bf9}'), ('\u{e3f}', '\u{e3f}'),
-        ('\u{17db}', '\u{17db}'), ('\u{20a0}', '\u{20be}'), ('\u{a838}',
-        '\u{a838}'), ('\u{fdfc}', '\u{fdfc}'), ('\u{fe69}', '\u{fe69}'),
-        ('\u{ff04}', '\u{ff04}'), ('\u{ffe0}', '\u{ffe1}'), ('\u{ffe5}',
-        '\u{ffe6}')
-    ];
-
-    pub const Sk_table: &'static [(char, char)] = &[
-        ('\u{5e}', '\u{5e}'), ('\u{60}', '\u{60}'), ('\u{a8}', '\u{a8}'),
-        ('\u{af}', '\u{af}'), ('\u{b4}', '\u{b4}'), ('\u{b8}', '\u{b8}'),
-        ('\u{2c2}', '\u{2c5}'), ('\u{2d2}', '\u{2df}'), ('\u{2e5}', '\u{2eb}'),
-        ('\u{2ed}', '\u{2ed}'), ('\u{2ef}', '\u{2ff}'), ('\u{375}', '\u{375}'),
-        ('\u{384}', '\u{385}'), ('\u{1fbd}', '\u{1fbd}'), ('\u{1fbf}',
-        '\u{1fc1}'), ('\u{1fcd}', '\u{1fcf}'), ('\u{1fdd}', '\u{1fdf}'),
-        ('\u{1fed}', '\u{1fef}'), ('\u{1ffd}', '\u{1ffe}'), ('\u{309b}',
-        '\u{309c}'), ('\u{a700}', '\u{a716}'), ('\u{a720}', '\u{a721}'),
-        ('\u{a789}', '\u{a78a}'), ('\u{ab5b}', '\u{ab5b}'), ('\u{fbb2}',
-        '\u{fbc1}'), ('\u{ff3e}', '\u{ff3e}'), ('\u{ff40}', '\u{ff40}'),
-        ('\u{ffe3}', '\u{ffe3}'), ('\u{1f3fb}', '\u{1f3ff}')
-    ];
-
-    pub const Sm_table: &'static [(char, char)] = &[
-        ('\u{2b}', '\u{2b}'), ('\u{3c}', '\u{3e}'), ('\u{7c}', '\u{7c}'),
-        ('\u{7e}', '\u{7e}'), ('\u{ac}', '\u{ac}'), ('\u{b1}', '\u{b1}'),
-        ('\u{d7}', '\u{d7}'), ('\u{f7}', '\u{f7}'), ('\u{3f6}', '\u{3f6}'),
-        ('\u{606}', '\u{608}'), ('\u{2044}', '\u{2044}'), ('\u{2052}',
-        '\u{2052}'), ('\u{207a}', '\u{207c}'), ('\u{208a}', '\u{208c}'),
-        ('\u{2118}', '\u{2118}'), ('\u{2140}', '\u{2144}'), ('\u{214b}',
-        '\u{214b}'), ('\u{2190}', '\u{2194}'), ('\u{219a}', '\u{219b}'),
-        ('\u{21a0}', '\u{21a0}'), ('\u{21a3}', '\u{21a3}'), ('\u{21a6}',
-        '\u{21a6}'), ('\u{21ae}', '\u{21ae}'), ('\u{21ce}', '\u{21cf}'),
-        ('\u{21d2}', '\u{21d2}'), ('\u{21d4}', '\u{21d4}'), ('\u{21f4}',
-        '\u{22ff}'), ('\u{2320}', '\u{2321}'), ('\u{237c}', '\u{237c}'),
-        ('\u{239b}', '\u{23b3}'), ('\u{23dc}', '\u{23e1}'), ('\u{25b7}',
-        '\u{25b7}'), ('\u{25c1}', '\u{25c1}'), ('\u{25f8}', '\u{25ff}'),
-        ('\u{266f}', '\u{266f}'), ('\u{27c0}', '\u{27c4}'), ('\u{27c7}',
-        '\u{27e5}'), ('\u{27f0}', '\u{27ff}'), ('\u{2900}', '\u{2982}'),
-        ('\u{2999}', '\u{29d7}'), ('\u{29dc}', '\u{29fb}'), ('\u{29fe}',
-        '\u{2aff}'), ('\u{2b30}', '\u{2b44}'), ('\u{2b47}', '\u{2b4c}'),
-        ('\u{fb29}', '\u{fb29}'), ('\u{fe62}', '\u{fe62}'), ('\u{fe64}',
-        '\u{fe66}'), ('\u{ff0b}', '\u{ff0b}'), ('\u{ff1c}', '\u{ff1e}'),
-        ('\u{ff5c}', '\u{ff5c}'), ('\u{ff5e}', '\u{ff5e}'), ('\u{ffe2}',
-        '\u{ffe2}'), ('\u{ffe9}', '\u{ffec}'), ('\u{1d6c1}', '\u{1d6c1}'),
-        ('\u{1d6db}', '\u{1d6db}'), ('\u{1d6fb}', '\u{1d6fb}'), ('\u{1d715}',
-        '\u{1d715}'), ('\u{1d735}', '\u{1d735}'), ('\u{1d74f}', '\u{1d74f}'),
-        ('\u{1d76f}', '\u{1d76f}'), ('\u{1d789}', '\u{1d789}'), ('\u{1d7a9}',
-        '\u{1d7a9}'), ('\u{1d7c3}', '\u{1d7c3}'), ('\u{1eef0}', '\u{1eef1}')
-    ];
-
-    pub const So_table: &'static [(char, char)] = &[
-        ('\u{a6}', '\u{a6}'), ('\u{a9}', '\u{a9}'), ('\u{ae}', '\u{ae}'),
-        ('\u{b0}', '\u{b0}'), ('\u{482}', '\u{482}'), ('\u{58d}', '\u{58e}'),
-        ('\u{60e}', '\u{60f}'), ('\u{6de}', '\u{6de}'), ('\u{6e9}', '\u{6e9}'),
-        ('\u{6fd}', '\u{6fe}'), ('\u{7f6}', '\u{7f6}'), ('\u{9fa}', '\u{9fa}'),
-        ('\u{b70}', '\u{b70}'), ('\u{bf3}', '\u{bf8}'), ('\u{bfa}', '\u{bfa}'),
-        ('\u{c7f}', '\u{c7f}'), ('\u{d79}', '\u{d79}'), ('\u{f01}', '\u{f03}'),
-        ('\u{f13}', '\u{f13}'), ('\u{f15}', '\u{f17}'), ('\u{f1a}', '\u{f1f}'),
-        ('\u{f34}', '\u{f34}'), ('\u{f36}', '\u{f36}'), ('\u{f38}', '\u{f38}'),
-        ('\u{fbe}', '\u{fc5}'), ('\u{fc7}', '\u{fcc}'), ('\u{fce}', '\u{fcf}'),
-        ('\u{fd5}', '\u{fd8}'), ('\u{109e}', '\u{109f}'), ('\u{1390}',
-        '\u{1399}'), ('\u{1940}', '\u{1940}'), ('\u{19de}', '\u{19ff}'),
-        ('\u{1b61}', '\u{1b6a}'), ('\u{1b74}', '\u{1b7c}'), ('\u{2100}',
-        '\u{2101}'), ('\u{2103}', '\u{2106}'), ('\u{2108}', '\u{2109}'),
-        ('\u{2114}', '\u{2114}'), ('\u{2116}', '\u{2117}'), ('\u{211e}',
-        '\u{2123}'), ('\u{2125}', '\u{2125}'), ('\u{2127}', '\u{2127}'),
-        ('\u{2129}', '\u{2129}'), ('\u{212e}', '\u{212e}'), ('\u{213a}',
-        '\u{213b}'), ('\u{214a}', '\u{214a}'), ('\u{214c}', '\u{214d}'),
-        ('\u{214f}', '\u{214f}'), ('\u{218a}', '\u{218b}'), ('\u{2195}',
-        '\u{2199}'), ('\u{219c}', '\u{219f}'), ('\u{21a1}', '\u{21a2}'),
-        ('\u{21a4}', '\u{21a5}'), ('\u{21a7}', '\u{21ad}'), ('\u{21af}',
-        '\u{21cd}'), ('\u{21d0}', '\u{21d1}'), ('\u{21d3}', '\u{21d3}'),
-        ('\u{21d5}', '\u{21f3}'), ('\u{2300}', '\u{2307}'), ('\u{230c}',
-        '\u{231f}'), ('\u{2322}', '\u{2328}'), ('\u{232b}', '\u{237b}'),
-        ('\u{237d}', '\u{239a}'), ('\u{23b4}', '\u{23db}'), ('\u{23e2}',
-        '\u{23fa}'), ('\u{2400}', '\u{2426}'), ('\u{2440}', '\u{244a}'),
-        ('\u{249c}', '\u{24e9}'), ('\u{2500}', '\u{25b6}'), ('\u{25b8}',
-        '\u{25c0}'), ('\u{25c2}', '\u{25f7}'), ('\u{2600}', '\u{266e}'),
-        ('\u{2670}', '\u{2767}'), ('\u{2794}', '\u{27bf}'), ('\u{2800}',
-        '\u{28ff}'), ('\u{2b00}', '\u{2b2f}'), ('\u{2b45}', '\u{2b46}'),
-        ('\u{2b4d}', '\u{2b73}'), ('\u{2b76}', '\u{2b95}'), ('\u{2b98}',
-        '\u{2bb9}'), ('\u{2bbd}', '\u{2bc8}'), ('\u{2bca}', '\u{2bd1}'),
-        ('\u{2bec}', '\u{2bef}'), ('\u{2ce5}', '\u{2cea}'), ('\u{2e80}',
-        '\u{2e99}'), ('\u{2e9b}', '\u{2ef3}'), ('\u{2f00}', '\u{2fd5}'),
-        ('\u{2ff0}', '\u{2ffb}'), ('\u{3004}', '\u{3004}'), ('\u{3012}',
-        '\u{3013}'), ('\u{3020}', '\u{3020}'), ('\u{3036}', '\u{3037}'),
-        ('\u{303e}', '\u{303f}'), ('\u{3190}', '\u{3191}'), ('\u{3196}',
-        '\u{319f}'), ('\u{31c0}', '\u{31e3}'), ('\u{3200}', '\u{321e}'),
-        ('\u{322a}', '\u{3247}'), ('\u{3250}', '\u{3250}'), ('\u{3260}',
-        '\u{327f}'), ('\u{328a}', '\u{32b0}'), ('\u{32c0}', '\u{32fe}'),
-        ('\u{3300}', '\u{33ff}'), ('\u{4dc0}', '\u{4dff}'), ('\u{a490}',
-        '\u{a4c6}'), ('\u{a828}', '\u{a82b}'), ('\u{a836}', '\u{a837}'),
-        ('\u{a839}', '\u{a839}'), ('\u{aa77}', '\u{aa79}'), ('\u{fdfd}',
-        '\u{fdfd}'), ('\u{ffe4}', '\u{ffe4}'), ('\u{ffe8}', '\u{ffe8}'),
-        ('\u{ffed}', '\u{ffee}'), ('\u{fffc}', '\u{fffd}'), ('\u{10137}',
-        '\u{1013f}'), ('\u{10179}', '\u{10189}'), ('\u{1018c}', '\u{1018c}'),
-        ('\u{10190}', '\u{1019b}'), ('\u{101a0}', '\u{101a0}'), ('\u{101d0}',
-        '\u{101fc}'), ('\u{10877}', '\u{10878}'), ('\u{10ac8}', '\u{10ac8}'),
-        ('\u{1173f}', '\u{1173f}'), ('\u{16b3c}', '\u{16b3f}'), ('\u{16b45}',
-        '\u{16b45}'), ('\u{1bc9c}', '\u{1bc9c}'), ('\u{1d000}', '\u{1d0f5}'),
-        ('\u{1d100}', '\u{1d126}'), ('\u{1d129}', '\u{1d164}'), ('\u{1d16a}',
-        '\u{1d16c}'), ('\u{1d183}', '\u{1d184}'), ('\u{1d18c}', '\u{1d1a9}'),
-        ('\u{1d1ae}', '\u{1d1e8}'), ('\u{1d200}', '\u{1d241}'), ('\u{1d245}',
-        '\u{1d245}'), ('\u{1d300}', '\u{1d356}'), ('\u{1d800}', '\u{1d9ff}'),
-        ('\u{1da37}', '\u{1da3a}'), ('\u{1da6d}', '\u{1da74}'), ('\u{1da76}',
-        '\u{1da83}'), ('\u{1da85}', '\u{1da86}'), ('\u{1f000}', '\u{1f02b}'),
-        ('\u{1f030}', '\u{1f093}'), ('\u{1f0a0}', '\u{1f0ae}'), ('\u{1f0b1}',
-        '\u{1f0bf}'), ('\u{1f0c1}', '\u{1f0cf}'), ('\u{1f0d1}', '\u{1f0f5}'),
-        ('\u{1f110}', '\u{1f12e}'), ('\u{1f130}', '\u{1f16b}'), ('\u{1f170}',
-        '\u{1f19a}'), ('\u{1f1e6}', '\u{1f202}'), ('\u{1f210}', '\u{1f23a}'),
-        ('\u{1f240}', '\u{1f248}'), ('\u{1f250}', '\u{1f251}'), ('\u{1f300}',
-        '\u{1f3fa}'), ('\u{1f400}', '\u{1f579}'), ('\u{1f57b}', '\u{1f5a3}'),
-        ('\u{1f5a5}', '\u{1f6d0}'), ('\u{1f6e0}', '\u{1f6ec}'), ('\u{1f6f0}',
-        '\u{1f6f3}'), ('\u{1f700}', '\u{1f773}'), ('\u{1f780}', '\u{1f7d4}'),
-        ('\u{1f800}', '\u{1f80b}'), ('\u{1f810}', '\u{1f847}'), ('\u{1f850}',
-        '\u{1f859}'), ('\u{1f860}', '\u{1f887}'), ('\u{1f890}', '\u{1f8ad}'),
-        ('\u{1f910}', '\u{1f918}'), ('\u{1f980}', '\u{1f984}'), ('\u{1f9c0}',
-        '\u{1f9c0}')
-    ];
-
-    pub const Z_table: &'static [(char, char)] = &[
-        ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'),
-        ('\u{2000}', '\u{200a}'), ('\u{2028}', '\u{2029}'), ('\u{202f}',
-        '\u{202f}'), ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}')
-    ];
-
-    pub const Zl_table: &'static [(char, char)] = &[
-        ('\u{2028}', '\u{2028}')
-    ];
-
-    pub const Zp_table: &'static [(char, char)] = &[
-        ('\u{2029}', '\u{2029}')
-    ];
-
-    pub const Zs_table: &'static [(char, char)] = &[
-        ('\u{20}', '\u{20}'), ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'),
-        ('\u{2000}', '\u{200a}'), ('\u{202f}', '\u{202f}'), ('\u{205f}',
-        '\u{205f}'), ('\u{3000}', '\u{3000}')
-    ];
-
+/// Like ClassQuery, but its parameters have been canonicalized. This also
+/// differentiates binary properties from flattened general categories and
+/// scripts.
+#[derive(Debug, Eq, PartialEq)]
+enum CanonicalClassQuery {
+    /// The canonical binary property name.
+    Binary(&'static str),
+    /// The canonical general category name.
+    GeneralCategory(&'static str),
+    /// The canonical script name.
+    Script(&'static str),
+    /// An arbitrary association between property and value, both of which
+    /// have been canonicalized.
+    ///
+    /// Note that by construction, the property name of ByValue will never
+    /// be General_Category or Script. Those two cases are subsumed by the
+    /// eponymous variants.
+    ByValue {
+        /// The canonical property name.
+        property_name: &'static str,
+        /// The canonical property value.
+        property_value: &'static str,
+    },
 }
 
-pub mod derived_property {
-    pub const Alphabetic_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'),
-        ('\u{b5}', '\u{b5}'), ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'),
-        ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'),
-        ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'),
-        ('\u{345}', '\u{345}'), ('\u{370}', '\u{374}'), ('\u{376}', '\u{377}'),
-        ('\u{37a}', '\u{37d}'), ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'),
-        ('\u{388}', '\u{38a}'), ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'),
-        ('\u{3a3}', '\u{3f5}'), ('\u{3f7}', '\u{481}'), ('\u{48a}', '\u{52f}'),
-        ('\u{531}', '\u{556}'), ('\u{559}', '\u{559}'), ('\u{561}', '\u{587}'),
-        ('\u{5b0}', '\u{5bd}'), ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'),
-        ('\u{5c4}', '\u{5c5}'), ('\u{5c7}', '\u{5c7}'), ('\u{5d0}', '\u{5ea}'),
-        ('\u{5f0}', '\u{5f2}'), ('\u{610}', '\u{61a}'), ('\u{620}', '\u{657}'),
-        ('\u{659}', '\u{65f}'), ('\u{66e}', '\u{6d3}'), ('\u{6d5}', '\u{6dc}'),
-        ('\u{6e1}', '\u{6e8}'), ('\u{6ed}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'),
-        ('\u{6ff}', '\u{6ff}'), ('\u{710}', '\u{73f}'), ('\u{74d}', '\u{7b1}'),
-        ('\u{7ca}', '\u{7ea}'), ('\u{7f4}', '\u{7f5}'), ('\u{7fa}', '\u{7fa}'),
-        ('\u{800}', '\u{817}'), ('\u{81a}', '\u{82c}'), ('\u{840}', '\u{858}'),
-        ('\u{8a0}', '\u{8b4}'), ('\u{8e3}', '\u{8e9}'), ('\u{8f0}', '\u{93b}'),
-        ('\u{93d}', '\u{94c}'), ('\u{94e}', '\u{950}'), ('\u{955}', '\u{963}'),
-        ('\u{971}', '\u{983}'), ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'),
-        ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'),
-        ('\u{9b6}', '\u{9b9}'), ('\u{9bd}', '\u{9c4}'), ('\u{9c7}', '\u{9c8}'),
-        ('\u{9cb}', '\u{9cc}'), ('\u{9ce}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'),
-        ('\u{9dc}', '\u{9dd}'), ('\u{9df}', '\u{9e3}'), ('\u{9f0}', '\u{9f1}'),
-        ('\u{a01}', '\u{a03}'), ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'),
-        ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'),
-        ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'), ('\u{a3e}', '\u{a42}'),
-        ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4c}'), ('\u{a51}', '\u{a51}'),
-        ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'), ('\u{a70}', '\u{a75}'),
-        ('\u{a81}', '\u{a83}'), ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'),
-        ('\u{a93}', '\u{aa8}'), ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'),
-        ('\u{ab5}', '\u{ab9}'), ('\u{abd}', '\u{ac5}'), ('\u{ac7}', '\u{ac9}'),
-        ('\u{acb}', '\u{acc}'), ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae3}'),
-        ('\u{af9}', '\u{af9}'), ('\u{b01}', '\u{b03}'), ('\u{b05}', '\u{b0c}'),
-        ('\u{b0f}', '\u{b10}'), ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'),
-        ('\u{b32}', '\u{b33}'), ('\u{b35}', '\u{b39}'), ('\u{b3d}', '\u{b44}'),
-        ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4c}'), ('\u{b56}', '\u{b57}'),
-        ('\u{b5c}', '\u{b5d}'), ('\u{b5f}', '\u{b63}'), ('\u{b71}', '\u{b71}'),
-        ('\u{b82}', '\u{b83}'), ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'),
-        ('\u{b92}', '\u{b95}'), ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'),
-        ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'),
-        ('\u{bae}', '\u{bb9}'), ('\u{bbe}', '\u{bc2}'), ('\u{bc6}', '\u{bc8}'),
-        ('\u{bca}', '\u{bcc}'), ('\u{bd0}', '\u{bd0}'), ('\u{bd7}', '\u{bd7}'),
-        ('\u{c00}', '\u{c03}'), ('\u{c05}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'),
-        ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c44}'),
-        ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4c}'), ('\u{c55}', '\u{c56}'),
-        ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c63}'), ('\u{c81}', '\u{c83}'),
-        ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'),
-        ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'), ('\u{cbd}', '\u{cc4}'),
-        ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccc}'), ('\u{cd5}', '\u{cd6}'),
-        ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce3}'), ('\u{cf1}', '\u{cf2}'),
-        ('\u{d01}', '\u{d03}'), ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'),
-        ('\u{d12}', '\u{d3a}'), ('\u{d3d}', '\u{d44}'), ('\u{d46}', '\u{d48}'),
-        ('\u{d4a}', '\u{d4c}'), ('\u{d4e}', '\u{d4e}'), ('\u{d57}', '\u{d57}'),
-        ('\u{d5f}', '\u{d63}'), ('\u{d7a}', '\u{d7f}'), ('\u{d82}', '\u{d83}'),
-        ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'),
-        ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'), ('\u{dcf}', '\u{dd4}'),
-        ('\u{dd6}', '\u{dd6}'), ('\u{dd8}', '\u{ddf}'), ('\u{df2}', '\u{df3}'),
-        ('\u{e01}', '\u{e3a}'), ('\u{e40}', '\u{e46}'), ('\u{e4d}', '\u{e4d}'),
-        ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'), ('\u{e87}', '\u{e88}'),
-        ('\u{e8a}', '\u{e8a}'), ('\u{e8d}', '\u{e8d}'), ('\u{e94}', '\u{e97}'),
-        ('\u{e99}', '\u{e9f}'), ('\u{ea1}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'),
-        ('\u{ea7}', '\u{ea7}'), ('\u{eaa}', '\u{eab}'), ('\u{ead}', '\u{eb9}'),
-        ('\u{ebb}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'),
-        ('\u{ecd}', '\u{ecd}'), ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'),
-        ('\u{f40}', '\u{f47}'), ('\u{f49}', '\u{f6c}'), ('\u{f71}', '\u{f81}'),
-        ('\u{f88}', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('\u{1000}',
-        '\u{1036}'), ('\u{1038}', '\u{1038}'), ('\u{103b}', '\u{103f}'),
-        ('\u{1050}', '\u{1062}'), ('\u{1065}', '\u{1068}'), ('\u{106e}',
-        '\u{1086}'), ('\u{108e}', '\u{108e}'), ('\u{109c}', '\u{109d}'),
-        ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}',
-        '\u{10cd}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fc}', '\u{1248}'),
-        ('\u{124a}', '\u{124d}'), ('\u{1250}', '\u{1256}'), ('\u{1258}',
-        '\u{1258}'), ('\u{125a}', '\u{125d}'), ('\u{1260}', '\u{1288}'),
-        ('\u{128a}', '\u{128d}'), ('\u{1290}', '\u{12b0}'), ('\u{12b2}',
-        '\u{12b5}'), ('\u{12b8}', '\u{12be}'), ('\u{12c0}', '\u{12c0}'),
-        ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'), ('\u{12d8}',
-        '\u{1310}'), ('\u{1312}', '\u{1315}'), ('\u{1318}', '\u{135a}'),
-        ('\u{135f}', '\u{135f}'), ('\u{1380}', '\u{138f}'), ('\u{13a0}',
-        '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{1401}', '\u{166c}'),
-        ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'), ('\u{16a0}',
-        '\u{16ea}'), ('\u{16ee}', '\u{16f8}'), ('\u{1700}', '\u{170c}'),
-        ('\u{170e}', '\u{1713}'), ('\u{1720}', '\u{1733}'), ('\u{1740}',
-        '\u{1753}'), ('\u{1760}', '\u{176c}'), ('\u{176e}', '\u{1770}'),
-        ('\u{1772}', '\u{1773}'), ('\u{1780}', '\u{17b3}'), ('\u{17b6}',
-        '\u{17c8}'), ('\u{17d7}', '\u{17d7}'), ('\u{17dc}', '\u{17dc}'),
-        ('\u{1820}', '\u{1877}'), ('\u{1880}', '\u{18aa}'), ('\u{18b0}',
-        '\u{18f5}'), ('\u{1900}', '\u{191e}'), ('\u{1920}', '\u{192b}'),
-        ('\u{1930}', '\u{1938}'), ('\u{1950}', '\u{196d}'), ('\u{1970}',
-        '\u{1974}'), ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'),
-        ('\u{1a00}', '\u{1a1b}'), ('\u{1a20}', '\u{1a5e}'), ('\u{1a61}',
-        '\u{1a74}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1b00}', '\u{1b33}'),
-        ('\u{1b35}', '\u{1b43}'), ('\u{1b45}', '\u{1b4b}'), ('\u{1b80}',
-        '\u{1ba9}'), ('\u{1bac}', '\u{1baf}'), ('\u{1bba}', '\u{1be5}'),
-        ('\u{1be7}', '\u{1bf1}'), ('\u{1c00}', '\u{1c35}'), ('\u{1c4d}',
-        '\u{1c4f}'), ('\u{1c5a}', '\u{1c7d}'), ('\u{1ce9}', '\u{1cec}'),
-        ('\u{1cee}', '\u{1cf3}'), ('\u{1cf5}', '\u{1cf6}'), ('\u{1d00}',
-        '\u{1dbf}'), ('\u{1de7}', '\u{1df4}'), ('\u{1e00}', '\u{1f15}'),
-        ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}',
-        '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f59}', '\u{1f59}'),
-        ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}',
-        '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'),
-        ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}',
-        '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'),
-        ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}',
-        '\u{1ffc}'), ('\u{2071}', '\u{2071}'), ('\u{207f}', '\u{207f}'),
-        ('\u{2090}', '\u{209c}'), ('\u{2102}', '\u{2102}'), ('\u{2107}',
-        '\u{2107}'), ('\u{210a}', '\u{2113}'), ('\u{2115}', '\u{2115}'),
-        ('\u{2119}', '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}',
-        '\u{2126}'), ('\u{2128}', '\u{2128}'), ('\u{212a}', '\u{212d}'),
-        ('\u{212f}', '\u{2139}'), ('\u{213c}', '\u{213f}'), ('\u{2145}',
-        '\u{2149}'), ('\u{214e}', '\u{214e}'), ('\u{2160}', '\u{2188}'),
-        ('\u{24b6}', '\u{24e9}'), ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}',
-        '\u{2c5e}'), ('\u{2c60}', '\u{2ce4}'), ('\u{2ceb}', '\u{2cee}'),
-        ('\u{2cf2}', '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'), ('\u{2d27}',
-        '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), ('\u{2d30}', '\u{2d67}'),
-        ('\u{2d6f}', '\u{2d6f}'), ('\u{2d80}', '\u{2d96}'), ('\u{2da0}',
-        '\u{2da6}'), ('\u{2da8}', '\u{2dae}'), ('\u{2db0}', '\u{2db6}'),
-        ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}', '\u{2dc6}'), ('\u{2dc8}',
-        '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}', '\u{2dde}'),
-        ('\u{2de0}', '\u{2dff}'), ('\u{2e2f}', '\u{2e2f}'), ('\u{3005}',
-        '\u{3007}'), ('\u{3021}', '\u{3029}'), ('\u{3031}', '\u{3035}'),
-        ('\u{3038}', '\u{303c}'), ('\u{3041}', '\u{3096}'), ('\u{309d}',
-        '\u{309f}'), ('\u{30a1}', '\u{30fa}'), ('\u{30fc}', '\u{30ff}'),
-        ('\u{3105}', '\u{312d}'), ('\u{3131}', '\u{318e}'), ('\u{31a0}',
-        '\u{31ba}'), ('\u{31f0}', '\u{31ff}'), ('\u{3400}', '\u{4db5}'),
-        ('\u{4e00}', '\u{9fd5}'), ('\u{a000}', '\u{a48c}'), ('\u{a4d0}',
-        '\u{a4fd}'), ('\u{a500}', '\u{a60c}'), ('\u{a610}', '\u{a61f}'),
-        ('\u{a62a}', '\u{a62b}'), ('\u{a640}', '\u{a66e}'), ('\u{a674}',
-        '\u{a67b}'), ('\u{a67f}', '\u{a6ef}'), ('\u{a717}', '\u{a71f}'),
-        ('\u{a722}', '\u{a788}'), ('\u{a78b}', '\u{a7ad}'), ('\u{a7b0}',
-        '\u{a7b7}'), ('\u{a7f7}', '\u{a801}'), ('\u{a803}', '\u{a805}'),
-        ('\u{a807}', '\u{a80a}'), ('\u{a80c}', '\u{a827}'), ('\u{a840}',
-        '\u{a873}'), ('\u{a880}', '\u{a8c3}'), ('\u{a8f2}', '\u{a8f7}'),
-        ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}', '\u{a8fd}'), ('\u{a90a}',
-        '\u{a92a}'), ('\u{a930}', '\u{a952}'), ('\u{a960}', '\u{a97c}'),
-        ('\u{a980}', '\u{a9b2}'), ('\u{a9b4}', '\u{a9bf}'), ('\u{a9cf}',
-        '\u{a9cf}'), ('\u{a9e0}', '\u{a9e4}'), ('\u{a9e6}', '\u{a9ef}'),
-        ('\u{a9fa}', '\u{a9fe}'), ('\u{aa00}', '\u{aa36}'), ('\u{aa40}',
-        '\u{aa4d}'), ('\u{aa60}', '\u{aa76}'), ('\u{aa7a}', '\u{aa7a}'),
-        ('\u{aa7e}', '\u{aabe}'), ('\u{aac0}', '\u{aac0}'), ('\u{aac2}',
-        '\u{aac2}'), ('\u{aadb}', '\u{aadd}'), ('\u{aae0}', '\u{aaef}'),
-        ('\u{aaf2}', '\u{aaf5}'), ('\u{ab01}', '\u{ab06}'), ('\u{ab09}',
-        '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'), ('\u{ab20}', '\u{ab26}'),
-        ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}', '\u{ab5a}'), ('\u{ab5c}',
-        '\u{ab65}'), ('\u{ab70}', '\u{abea}'), ('\u{ac00}', '\u{d7a3}'),
-        ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'), ('\u{f900}',
-        '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'), ('\u{fb00}', '\u{fb06}'),
-        ('\u{fb13}', '\u{fb17}'), ('\u{fb1d}', '\u{fb28}'), ('\u{fb2a}',
-        '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}', '\u{fb3e}'),
-        ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'), ('\u{fb46}',
-        '\u{fbb1}'), ('\u{fbd3}', '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'),
-        ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfb}'), ('\u{fe70}',
-        '\u{fe74}'), ('\u{fe76}', '\u{fefc}'), ('\u{ff21}', '\u{ff3a}'),
-        ('\u{ff41}', '\u{ff5a}'), ('\u{ff66}', '\u{ffbe}'), ('\u{ffc2}',
-        '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'),
-        ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
-        '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'),
-        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}',
-        '\u{100fa}'), ('\u{10140}', '\u{10174}'), ('\u{10280}', '\u{1029c}'),
-        ('\u{102a0}', '\u{102d0}'), ('\u{10300}', '\u{1031f}'), ('\u{10330}',
-        '\u{1034a}'), ('\u{10350}', '\u{1037a}'), ('\u{10380}', '\u{1039d}'),
-        ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{103d1}',
-        '\u{103d5}'), ('\u{10400}', '\u{1049d}'), ('\u{10500}', '\u{10527}'),
-        ('\u{10530}', '\u{10563}'), ('\u{10600}', '\u{10736}'), ('\u{10740}',
-        '\u{10755}'), ('\u{10760}', '\u{10767}'), ('\u{10800}', '\u{10805}'),
-        ('\u{10808}', '\u{10808}'), ('\u{1080a}', '\u{10835}'), ('\u{10837}',
-        '\u{10838}'), ('\u{1083c}', '\u{1083c}'), ('\u{1083f}', '\u{10855}'),
-        ('\u{10860}', '\u{10876}'), ('\u{10880}', '\u{1089e}'), ('\u{108e0}',
-        '\u{108f2}'), ('\u{108f4}', '\u{108f5}'), ('\u{10900}', '\u{10915}'),
-        ('\u{10920}', '\u{10939}'), ('\u{10980}', '\u{109b7}'), ('\u{109be}',
-        '\u{109bf}'), ('\u{10a00}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'),
-        ('\u{10a0c}', '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}',
-        '\u{10a33}'), ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'),
-        ('\u{10ac0}', '\u{10ac7}'), ('\u{10ac9}', '\u{10ae4}'), ('\u{10b00}',
-        '\u{10b35}'), ('\u{10b40}', '\u{10b55}'), ('\u{10b60}', '\u{10b72}'),
-        ('\u{10b80}', '\u{10b91}'), ('\u{10c00}', '\u{10c48}'), ('\u{10c80}',
-        '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'), ('\u{11000}', '\u{11045}'),
-        ('\u{11082}', '\u{110b8}'), ('\u{110d0}', '\u{110e8}'), ('\u{11100}',
-        '\u{11132}'), ('\u{11150}', '\u{11172}'), ('\u{11176}', '\u{11176}'),
-        ('\u{11180}', '\u{111bf}'), ('\u{111c1}', '\u{111c4}'), ('\u{111da}',
-        '\u{111da}'), ('\u{111dc}', '\u{111dc}'), ('\u{11200}', '\u{11211}'),
-        ('\u{11213}', '\u{11234}'), ('\u{11237}', '\u{11237}'), ('\u{11280}',
-        '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}', '\u{1128d}'),
-        ('\u{1128f}', '\u{1129d}'), ('\u{1129f}', '\u{112a8}'), ('\u{112b0}',
-        '\u{112e8}'), ('\u{11300}', '\u{11303}'), ('\u{11305}', '\u{1130c}'),
-        ('\u{1130f}', '\u{11310}'), ('\u{11313}', '\u{11328}'), ('\u{1132a}',
-        '\u{11330}'), ('\u{11332}', '\u{11333}'), ('\u{11335}', '\u{11339}'),
-        ('\u{1133d}', '\u{11344}'), ('\u{11347}', '\u{11348}'), ('\u{1134b}',
-        '\u{1134c}'), ('\u{11350}', '\u{11350}'), ('\u{11357}', '\u{11357}'),
-        ('\u{1135d}', '\u{11363}'), ('\u{11480}', '\u{114c1}'), ('\u{114c4}',
-        '\u{114c5}'), ('\u{114c7}', '\u{114c7}'), ('\u{11580}', '\u{115b5}'),
-        ('\u{115b8}', '\u{115be}'), ('\u{115d8}', '\u{115dd}'), ('\u{11600}',
-        '\u{1163e}'), ('\u{11640}', '\u{11640}'), ('\u{11644}', '\u{11644}'),
-        ('\u{11680}', '\u{116b5}'), ('\u{11700}', '\u{11719}'), ('\u{1171d}',
-        '\u{1172a}'), ('\u{118a0}', '\u{118df}'), ('\u{118ff}', '\u{118ff}'),
-        ('\u{11ac0}', '\u{11af8}'), ('\u{12000}', '\u{12399}'), ('\u{12400}',
-        '\u{1246e}'), ('\u{12480}', '\u{12543}'), ('\u{13000}', '\u{1342e}'),
-        ('\u{14400}', '\u{14646}'), ('\u{16800}', '\u{16a38}'), ('\u{16a40}',
-        '\u{16a5e}'), ('\u{16ad0}', '\u{16aed}'), ('\u{16b00}', '\u{16b36}'),
-        ('\u{16b40}', '\u{16b43}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}',
-        '\u{16b8f}'), ('\u{16f00}', '\u{16f44}'), ('\u{16f50}', '\u{16f7e}'),
-        ('\u{16f93}', '\u{16f9f}'), ('\u{1b000}', '\u{1b001}'), ('\u{1bc00}',
-        '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}', '\u{1bc88}'),
-        ('\u{1bc90}', '\u{1bc99}'), ('\u{1bc9e}', '\u{1bc9e}'), ('\u{1d400}',
-        '\u{1d454}'), ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'),
-        ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}',
-        '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'),
-        ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}',
-        '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'),
-        ('\u{1d51e}', '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}',
-        '\u{1d544}'), ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'),
-        ('\u{1d552}', '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}',
-        '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'),
-        ('\u{1d716}', '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}',
-        '\u{1d76e}'), ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'),
-        ('\u{1d7aa}', '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1e800}',
-        '\u{1e8c4}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}', '\u{1ee1f}'),
-        ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}',
-        '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'),
-        ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}',
-        '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'),
-        ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}',
-        '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}', '\u{1ee57}'),
-        ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}',
-        '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'),
-        ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}',
-        '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'),
-        ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}',
-        '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}', '\u{1eea9}'),
-        ('\u{1eeab}', '\u{1eebb}'), ('\u{1f130}', '\u{1f149}'), ('\u{1f150}',
-        '\u{1f169}'), ('\u{1f170}', '\u{1f189}'), ('\u{20000}', '\u{2a6d6}'),
-        ('\u{2a700}', '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}',
-        '\u{2cea1}'), ('\u{2f800}', '\u{2fa1d}')
-    ];
-
-    pub const Default_Ignorable_Code_Point_table: &'static [(char, char)] = &[
-        ('\u{ad}', '\u{ad}'), ('\u{34f}', '\u{34f}'), ('\u{61c}', '\u{61c}'),
-        ('\u{115f}', '\u{1160}'), ('\u{17b4}', '\u{17b5}'), ('\u{180b}',
-        '\u{180e}'), ('\u{200b}', '\u{200f}'), ('\u{202a}', '\u{202e}'),
-        ('\u{2060}', '\u{206f}'), ('\u{3164}', '\u{3164}'), ('\u{fe00}',
-        '\u{fe0f}'), ('\u{feff}', '\u{feff}'), ('\u{ffa0}', '\u{ffa0}'),
-        ('\u{fff0}', '\u{fff8}'), ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}',
-        '\u{1d17a}'), ('\u{e0000}', '\u{e0fff}')
-    ];
+/// Looks up a Unicode class given a query. If one doesn't exist, then
+/// `None` is returned.
+pub fn class<'a>(query: ClassQuery<'a>) -> Result<hir::ClassUnicode> {
+    use self::CanonicalClassQuery::*;
 
-    pub const Grapheme_Extend_table: &'static [(char, char)] = &[
-        ('\u{300}', '\u{36f}'), ('\u{483}', '\u{489}'), ('\u{591}', '\u{5bd}'),
-        ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
-        ('\u{5c7}', '\u{5c7}'), ('\u{610}', '\u{61a}'), ('\u{64b}', '\u{65f}'),
-        ('\u{670}', '\u{670}'), ('\u{6d6}', '\u{6dc}'), ('\u{6df}', '\u{6e4}'),
-        ('\u{6e7}', '\u{6e8}'), ('\u{6ea}', '\u{6ed}'), ('\u{711}', '\u{711}'),
-        ('\u{730}', '\u{74a}'), ('\u{7a6}', '\u{7b0}'), ('\u{7eb}', '\u{7f3}'),
-        ('\u{816}', '\u{819}'), ('\u{81b}', '\u{823}'), ('\u{825}', '\u{827}'),
-        ('\u{829}', '\u{82d}'), ('\u{859}', '\u{85b}'), ('\u{8e3}', '\u{902}'),
-        ('\u{93a}', '\u{93a}'), ('\u{93c}', '\u{93c}'), ('\u{941}', '\u{948}'),
-        ('\u{94d}', '\u{94d}'), ('\u{951}', '\u{957}'), ('\u{962}', '\u{963}'),
-        ('\u{981}', '\u{981}'), ('\u{9bc}', '\u{9bc}'), ('\u{9be}', '\u{9be}'),
-        ('\u{9c1}', '\u{9c4}'), ('\u{9cd}', '\u{9cd}'), ('\u{9d7}', '\u{9d7}'),
-        ('\u{9e2}', '\u{9e3}'), ('\u{a01}', '\u{a02}'), ('\u{a3c}', '\u{a3c}'),
-        ('\u{a41}', '\u{a42}'), ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'),
-        ('\u{a51}', '\u{a51}'), ('\u{a70}', '\u{a71}'), ('\u{a75}', '\u{a75}'),
-        ('\u{a81}', '\u{a82}'), ('\u{abc}', '\u{abc}'), ('\u{ac1}', '\u{ac5}'),
-        ('\u{ac7}', '\u{ac8}'), ('\u{acd}', '\u{acd}'), ('\u{ae2}', '\u{ae3}'),
-        ('\u{b01}', '\u{b01}'), ('\u{b3c}', '\u{b3c}'), ('\u{b3e}', '\u{b3f}'),
-        ('\u{b41}', '\u{b44}'), ('\u{b4d}', '\u{b4d}'), ('\u{b56}', '\u{b57}'),
-        ('\u{b62}', '\u{b63}'), ('\u{b82}', '\u{b82}'), ('\u{bbe}', '\u{bbe}'),
-        ('\u{bc0}', '\u{bc0}'), ('\u{bcd}', '\u{bcd}'), ('\u{bd7}', '\u{bd7}'),
-        ('\u{c00}', '\u{c00}'), ('\u{c3e}', '\u{c40}'), ('\u{c46}', '\u{c48}'),
-        ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'), ('\u{c62}', '\u{c63}'),
-        ('\u{c81}', '\u{c81}'), ('\u{cbc}', '\u{cbc}'), ('\u{cbf}', '\u{cbf}'),
-        ('\u{cc2}', '\u{cc2}'), ('\u{cc6}', '\u{cc6}'), ('\u{ccc}', '\u{ccd}'),
-        ('\u{cd5}', '\u{cd6}'), ('\u{ce2}', '\u{ce3}'), ('\u{d01}', '\u{d01}'),
-        ('\u{d3e}', '\u{d3e}'), ('\u{d41}', '\u{d44}'), ('\u{d4d}', '\u{d4d}'),
-        ('\u{d57}', '\u{d57}'), ('\u{d62}', '\u{d63}'), ('\u{dca}', '\u{dca}'),
-        ('\u{dcf}', '\u{dcf}'), ('\u{dd2}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
-        ('\u{ddf}', '\u{ddf}'), ('\u{e31}', '\u{e31}'), ('\u{e34}', '\u{e3a}'),
-        ('\u{e47}', '\u{e4e}'), ('\u{eb1}', '\u{eb1}'), ('\u{eb4}', '\u{eb9}'),
-        ('\u{ebb}', '\u{ebc}'), ('\u{ec8}', '\u{ecd}'), ('\u{f18}', '\u{f19}'),
-        ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'),
-        ('\u{f71}', '\u{f7e}'), ('\u{f80}', '\u{f84}'), ('\u{f86}', '\u{f87}'),
-        ('\u{f8d}', '\u{f97}'), ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'),
-        ('\u{102d}', '\u{1030}'), ('\u{1032}', '\u{1037}'), ('\u{1039}',
-        '\u{103a}'), ('\u{103d}', '\u{103e}'), ('\u{1058}', '\u{1059}'),
-        ('\u{105e}', '\u{1060}'), ('\u{1071}', '\u{1074}'), ('\u{1082}',
-        '\u{1082}'), ('\u{1085}', '\u{1086}'), ('\u{108d}', '\u{108d}'),
-        ('\u{109d}', '\u{109d}'), ('\u{135d}', '\u{135f}'), ('\u{1712}',
-        '\u{1714}'), ('\u{1732}', '\u{1734}'), ('\u{1752}', '\u{1753}'),
-        ('\u{1772}', '\u{1773}'), ('\u{17b4}', '\u{17b5}'), ('\u{17b7}',
-        '\u{17bd}'), ('\u{17c6}', '\u{17c6}'), ('\u{17c9}', '\u{17d3}'),
-        ('\u{17dd}', '\u{17dd}'), ('\u{180b}', '\u{180d}'), ('\u{18a9}',
-        '\u{18a9}'), ('\u{1920}', '\u{1922}'), ('\u{1927}', '\u{1928}'),
-        ('\u{1932}', '\u{1932}'), ('\u{1939}', '\u{193b}'), ('\u{1a17}',
-        '\u{1a18}'), ('\u{1a1b}', '\u{1a1b}'), ('\u{1a56}', '\u{1a56}'),
-        ('\u{1a58}', '\u{1a5e}'), ('\u{1a60}', '\u{1a60}'), ('\u{1a62}',
-        '\u{1a62}'), ('\u{1a65}', '\u{1a6c}'), ('\u{1a73}', '\u{1a7c}'),
-        ('\u{1a7f}', '\u{1a7f}'), ('\u{1ab0}', '\u{1abe}'), ('\u{1b00}',
-        '\u{1b03}'), ('\u{1b34}', '\u{1b34}'), ('\u{1b36}', '\u{1b3a}'),
-        ('\u{1b3c}', '\u{1b3c}'), ('\u{1b42}', '\u{1b42}'), ('\u{1b6b}',
-        '\u{1b73}'), ('\u{1b80}', '\u{1b81}'), ('\u{1ba2}', '\u{1ba5}'),
-        ('\u{1ba8}', '\u{1ba9}'), ('\u{1bab}', '\u{1bad}'), ('\u{1be6}',
-        '\u{1be6}'), ('\u{1be8}', '\u{1be9}'), ('\u{1bed}', '\u{1bed}'),
-        ('\u{1bef}', '\u{1bf1}'), ('\u{1c2c}', '\u{1c33}'), ('\u{1c36}',
-        '\u{1c37}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce0}'),
-        ('\u{1ce2}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}',
-        '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), ('\u{1dc0}', '\u{1df5}'),
-        ('\u{1dfc}', '\u{1dff}'), ('\u{200c}', '\u{200d}'), ('\u{20d0}',
-        '\u{20f0}'), ('\u{2cef}', '\u{2cf1}'), ('\u{2d7f}', '\u{2d7f}'),
-        ('\u{2de0}', '\u{2dff}'), ('\u{302a}', '\u{302f}'), ('\u{3099}',
-        '\u{309a}'), ('\u{a66f}', '\u{a672}'), ('\u{a674}', '\u{a67d}'),
-        ('\u{a69e}', '\u{a69f}'), ('\u{a6f0}', '\u{a6f1}'), ('\u{a802}',
-        '\u{a802}'), ('\u{a806}', '\u{a806}'), ('\u{a80b}', '\u{a80b}'),
-        ('\u{a825}', '\u{a826}'), ('\u{a8c4}', '\u{a8c4}'), ('\u{a8e0}',
-        '\u{a8f1}'), ('\u{a926}', '\u{a92d}'), ('\u{a947}', '\u{a951}'),
-        ('\u{a980}', '\u{a982}'), ('\u{a9b3}', '\u{a9b3}'), ('\u{a9b6}',
-        '\u{a9b9}'), ('\u{a9bc}', '\u{a9bc}'), ('\u{a9e5}', '\u{a9e5}'),
-        ('\u{aa29}', '\u{aa2e}'), ('\u{aa31}', '\u{aa32}'), ('\u{aa35}',
-        '\u{aa36}'), ('\u{aa43}', '\u{aa43}'), ('\u{aa4c}', '\u{aa4c}'),
-        ('\u{aa7c}', '\u{aa7c}'), ('\u{aab0}', '\u{aab0}'), ('\u{aab2}',
-        '\u{aab4}'), ('\u{aab7}', '\u{aab8}'), ('\u{aabe}', '\u{aabf}'),
-        ('\u{aac1}', '\u{aac1}'), ('\u{aaec}', '\u{aaed}'), ('\u{aaf6}',
-        '\u{aaf6}'), ('\u{abe5}', '\u{abe5}'), ('\u{abe8}', '\u{abe8}'),
-        ('\u{abed}', '\u{abed}'), ('\u{fb1e}', '\u{fb1e}'), ('\u{fe00}',
-        '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'), ('\u{ff9e}', '\u{ff9f}'),
-        ('\u{101fd}', '\u{101fd}'), ('\u{102e0}', '\u{102e0}'), ('\u{10376}',
-        '\u{1037a}'), ('\u{10a01}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'),
-        ('\u{10a0c}', '\u{10a0f}'), ('\u{10a38}', '\u{10a3a}'), ('\u{10a3f}',
-        '\u{10a3f}'), ('\u{10ae5}', '\u{10ae6}'), ('\u{11001}', '\u{11001}'),
-        ('\u{11038}', '\u{11046}'), ('\u{1107f}', '\u{11081}'), ('\u{110b3}',
-        '\u{110b6}'), ('\u{110b9}', '\u{110ba}'), ('\u{11100}', '\u{11102}'),
-        ('\u{11127}', '\u{1112b}'), ('\u{1112d}', '\u{11134}'), ('\u{11173}',
-        '\u{11173}'), ('\u{11180}', '\u{11181}'), ('\u{111b6}', '\u{111be}'),
-        ('\u{111ca}', '\u{111cc}'), ('\u{1122f}', '\u{11231}'), ('\u{11234}',
-        '\u{11234}'), ('\u{11236}', '\u{11237}'), ('\u{112df}', '\u{112df}'),
-        ('\u{112e3}', '\u{112ea}'), ('\u{11300}', '\u{11301}'), ('\u{1133c}',
-        '\u{1133c}'), ('\u{1133e}', '\u{1133e}'), ('\u{11340}', '\u{11340}'),
-        ('\u{11357}', '\u{11357}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}',
-        '\u{11374}'), ('\u{114b0}', '\u{114b0}'), ('\u{114b3}', '\u{114b8}'),
-        ('\u{114ba}', '\u{114ba}'), ('\u{114bd}', '\u{114bd}'), ('\u{114bf}',
-        '\u{114c0}'), ('\u{114c2}', '\u{114c3}'), ('\u{115af}', '\u{115af}'),
-        ('\u{115b2}', '\u{115b5}'), ('\u{115bc}', '\u{115bd}'), ('\u{115bf}',
-        '\u{115c0}'), ('\u{115dc}', '\u{115dd}'), ('\u{11633}', '\u{1163a}'),
-        ('\u{1163d}', '\u{1163d}'), ('\u{1163f}', '\u{11640}'), ('\u{116ab}',
-        '\u{116ab}'), ('\u{116ad}', '\u{116ad}'), ('\u{116b0}', '\u{116b5}'),
-        ('\u{116b7}', '\u{116b7}'), ('\u{1171d}', '\u{1171f}'), ('\u{11722}',
-        '\u{11725}'), ('\u{11727}', '\u{1172b}'), ('\u{16af0}', '\u{16af4}'),
-        ('\u{16b30}', '\u{16b36}'), ('\u{16f8f}', '\u{16f92}'), ('\u{1bc9d}',
-        '\u{1bc9e}'), ('\u{1d165}', '\u{1d165}'), ('\u{1d167}', '\u{1d169}'),
-        ('\u{1d16e}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}',
-        '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), ('\u{1d242}', '\u{1d244}'),
-        ('\u{1da00}', '\u{1da36}'), ('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}',
-        '\u{1da75}'), ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'),
-        ('\u{1daa1}', '\u{1daaf}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{e0100}',
-        '\u{e01ef}')
-    ];
-
-    pub const Lowercase_table: &'static [(char, char)] = &[
-        ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'), ('\u{b5}', '\u{b5}'),
-        ('\u{ba}', '\u{ba}'), ('\u{df}', '\u{f6}'), ('\u{f8}', '\u{ff}'),
-        ('\u{101}', '\u{101}'), ('\u{103}', '\u{103}'), ('\u{105}', '\u{105}'),
-        ('\u{107}', '\u{107}'), ('\u{109}', '\u{109}'), ('\u{10b}', '\u{10b}'),
-        ('\u{10d}', '\u{10d}'), ('\u{10f}', '\u{10f}'), ('\u{111}', '\u{111}'),
-        ('\u{113}', '\u{113}'), ('\u{115}', '\u{115}'), ('\u{117}', '\u{117}'),
-        ('\u{119}', '\u{119}'), ('\u{11b}', '\u{11b}'), ('\u{11d}', '\u{11d}'),
-        ('\u{11f}', '\u{11f}'), ('\u{121}', '\u{121}'), ('\u{123}', '\u{123}'),
-        ('\u{125}', '\u{125}'), ('\u{127}', '\u{127}'), ('\u{129}', '\u{129}'),
-        ('\u{12b}', '\u{12b}'), ('\u{12d}', '\u{12d}'), ('\u{12f}', '\u{12f}'),
-        ('\u{131}', '\u{131}'), ('\u{133}', '\u{133}'), ('\u{135}', '\u{135}'),
-        ('\u{137}', '\u{138}'), ('\u{13a}', '\u{13a}'), ('\u{13c}', '\u{13c}'),
-        ('\u{13e}', '\u{13e}'), ('\u{140}', '\u{140}'), ('\u{142}', '\u{142}'),
-        ('\u{144}', '\u{144}'), ('\u{146}', '\u{146}'), ('\u{148}', '\u{149}'),
-        ('\u{14b}', '\u{14b}'), ('\u{14d}', '\u{14d}'), ('\u{14f}', '\u{14f}'),
-        ('\u{151}', '\u{151}'), ('\u{153}', '\u{153}'), ('\u{155}', '\u{155}'),
-        ('\u{157}', '\u{157}'), ('\u{159}', '\u{159}'), ('\u{15b}', '\u{15b}'),
-        ('\u{15d}', '\u{15d}'), ('\u{15f}', '\u{15f}'), ('\u{161}', '\u{161}'),
-        ('\u{163}', '\u{163}'), ('\u{165}', '\u{165}'), ('\u{167}', '\u{167}'),
-        ('\u{169}', '\u{169}'), ('\u{16b}', '\u{16b}'), ('\u{16d}', '\u{16d}'),
-        ('\u{16f}', '\u{16f}'), ('\u{171}', '\u{171}'), ('\u{173}', '\u{173}'),
-        ('\u{175}', '\u{175}'), ('\u{177}', '\u{177}'), ('\u{17a}', '\u{17a}'),
-        ('\u{17c}', '\u{17c}'), ('\u{17e}', '\u{180}'), ('\u{183}', '\u{183}'),
-        ('\u{185}', '\u{185}'), ('\u{188}', '\u{188}'), ('\u{18c}', '\u{18d}'),
-        ('\u{192}', '\u{192}'), ('\u{195}', '\u{195}'), ('\u{199}', '\u{19b}'),
-        ('\u{19e}', '\u{19e}'), ('\u{1a1}', '\u{1a1}'), ('\u{1a3}', '\u{1a3}'),
-        ('\u{1a5}', '\u{1a5}'), ('\u{1a8}', '\u{1a8}'), ('\u{1aa}', '\u{1ab}'),
-        ('\u{1ad}', '\u{1ad}'), ('\u{1b0}', '\u{1b0}'), ('\u{1b4}', '\u{1b4}'),
-        ('\u{1b6}', '\u{1b6}'), ('\u{1b9}', '\u{1ba}'), ('\u{1bd}', '\u{1bf}'),
-        ('\u{1c6}', '\u{1c6}'), ('\u{1c9}', '\u{1c9}'), ('\u{1cc}', '\u{1cc}'),
-        ('\u{1ce}', '\u{1ce}'), ('\u{1d0}', '\u{1d0}'), ('\u{1d2}', '\u{1d2}'),
-        ('\u{1d4}', '\u{1d4}'), ('\u{1d6}', '\u{1d6}'), ('\u{1d8}', '\u{1d8}'),
-        ('\u{1da}', '\u{1da}'), ('\u{1dc}', '\u{1dd}'), ('\u{1df}', '\u{1df}'),
-        ('\u{1e1}', '\u{1e1}'), ('\u{1e3}', '\u{1e3}'), ('\u{1e5}', '\u{1e5}'),
-        ('\u{1e7}', '\u{1e7}'), ('\u{1e9}', '\u{1e9}'), ('\u{1eb}', '\u{1eb}'),
-        ('\u{1ed}', '\u{1ed}'), ('\u{1ef}', '\u{1f0}'), ('\u{1f3}', '\u{1f3}'),
-        ('\u{1f5}', '\u{1f5}'), ('\u{1f9}', '\u{1f9}'), ('\u{1fb}', '\u{1fb}'),
-        ('\u{1fd}', '\u{1fd}'), ('\u{1ff}', '\u{1ff}'), ('\u{201}', '\u{201}'),
-        ('\u{203}', '\u{203}'), ('\u{205}', '\u{205}'), ('\u{207}', '\u{207}'),
-        ('\u{209}', '\u{209}'), ('\u{20b}', '\u{20b}'), ('\u{20d}', '\u{20d}'),
-        ('\u{20f}', '\u{20f}'), ('\u{211}', '\u{211}'), ('\u{213}', '\u{213}'),
-        ('\u{215}', '\u{215}'), ('\u{217}', '\u{217}'), ('\u{219}', '\u{219}'),
-        ('\u{21b}', '\u{21b}'), ('\u{21d}', '\u{21d}'), ('\u{21f}', '\u{21f}'),
-        ('\u{221}', '\u{221}'), ('\u{223}', '\u{223}'), ('\u{225}', '\u{225}'),
-        ('\u{227}', '\u{227}'), ('\u{229}', '\u{229}'), ('\u{22b}', '\u{22b}'),
-        ('\u{22d}', '\u{22d}'), ('\u{22f}', '\u{22f}'), ('\u{231}', '\u{231}'),
-        ('\u{233}', '\u{239}'), ('\u{23c}', '\u{23c}'), ('\u{23f}', '\u{240}'),
-        ('\u{242}', '\u{242}'), ('\u{247}', '\u{247}'), ('\u{249}', '\u{249}'),
-        ('\u{24b}', '\u{24b}'), ('\u{24d}', '\u{24d}'), ('\u{24f}', '\u{293}'),
-        ('\u{295}', '\u{2b8}'), ('\u{2c0}', '\u{2c1}'), ('\u{2e0}', '\u{2e4}'),
-        ('\u{345}', '\u{345}'), ('\u{371}', '\u{371}'), ('\u{373}', '\u{373}'),
-        ('\u{377}', '\u{377}'), ('\u{37a}', '\u{37d}'), ('\u{390}', '\u{390}'),
-        ('\u{3ac}', '\u{3ce}'), ('\u{3d0}', '\u{3d1}'), ('\u{3d5}', '\u{3d7}'),
-        ('\u{3d9}', '\u{3d9}'), ('\u{3db}', '\u{3db}'), ('\u{3dd}', '\u{3dd}'),
-        ('\u{3df}', '\u{3df}'), ('\u{3e1}', '\u{3e1}'), ('\u{3e3}', '\u{3e3}'),
-        ('\u{3e5}', '\u{3e5}'), ('\u{3e7}', '\u{3e7}'), ('\u{3e9}', '\u{3e9}'),
-        ('\u{3eb}', '\u{3eb}'), ('\u{3ed}', '\u{3ed}'), ('\u{3ef}', '\u{3f3}'),
-        ('\u{3f5}', '\u{3f5}'), ('\u{3f8}', '\u{3f8}'), ('\u{3fb}', '\u{3fc}'),
-        ('\u{430}', '\u{45f}'), ('\u{461}', '\u{461}'), ('\u{463}', '\u{463}'),
-        ('\u{465}', '\u{465}'), ('\u{467}', '\u{467}'), ('\u{469}', '\u{469}'),
-        ('\u{46b}', '\u{46b}'), ('\u{46d}', '\u{46d}'), ('\u{46f}', '\u{46f}'),
-        ('\u{471}', '\u{471}'), ('\u{473}', '\u{473}'), ('\u{475}', '\u{475}'),
-        ('\u{477}', '\u{477}'), ('\u{479}', '\u{479}'), ('\u{47b}', '\u{47b}'),
-        ('\u{47d}', '\u{47d}'), ('\u{47f}', '\u{47f}'), ('\u{481}', '\u{481}'),
-        ('\u{48b}', '\u{48b}'), ('\u{48d}', '\u{48d}'), ('\u{48f}', '\u{48f}'),
-        ('\u{491}', '\u{491}'), ('\u{493}', '\u{493}'), ('\u{495}', '\u{495}'),
-        ('\u{497}', '\u{497}'), ('\u{499}', '\u{499}'), ('\u{49b}', '\u{49b}'),
-        ('\u{49d}', '\u{49d}'), ('\u{49f}', '\u{49f}'), ('\u{4a1}', '\u{4a1}'),
-        ('\u{4a3}', '\u{4a3}'), ('\u{4a5}', '\u{4a5}'), ('\u{4a7}', '\u{4a7}'),
-        ('\u{4a9}', '\u{4a9}'), ('\u{4ab}', '\u{4ab}'), ('\u{4ad}', '\u{4ad}'),
-        ('\u{4af}', '\u{4af}'), ('\u{4b1}', '\u{4b1}'), ('\u{4b3}', '\u{4b3}'),
-        ('\u{4b5}', '\u{4b5}'), ('\u{4b7}', '\u{4b7}'), ('\u{4b9}', '\u{4b9}'),
-        ('\u{4bb}', '\u{4bb}'), ('\u{4bd}', '\u{4bd}'), ('\u{4bf}', '\u{4bf}'),
-        ('\u{4c2}', '\u{4c2}'), ('\u{4c4}', '\u{4c4}'), ('\u{4c6}', '\u{4c6}'),
-        ('\u{4c8}', '\u{4c8}'), ('\u{4ca}', '\u{4ca}'), ('\u{4cc}', '\u{4cc}'),
-        ('\u{4ce}', '\u{4cf}'), ('\u{4d1}', '\u{4d1}'), ('\u{4d3}', '\u{4d3}'),
-        ('\u{4d5}', '\u{4d5}'), ('\u{4d7}', '\u{4d7}'), ('\u{4d9}', '\u{4d9}'),
-        ('\u{4db}', '\u{4db}'), ('\u{4dd}', '\u{4dd}'), ('\u{4df}', '\u{4df}'),
-        ('\u{4e1}', '\u{4e1}'), ('\u{4e3}', '\u{4e3}'), ('\u{4e5}', '\u{4e5}'),
-        ('\u{4e7}', '\u{4e7}'), ('\u{4e9}', '\u{4e9}'), ('\u{4eb}', '\u{4eb}'),
-        ('\u{4ed}', '\u{4ed}'), ('\u{4ef}', '\u{4ef}'), ('\u{4f1}', '\u{4f1}'),
-        ('\u{4f3}', '\u{4f3}'), ('\u{4f5}', '\u{4f5}'), ('\u{4f7}', '\u{4f7}'),
-        ('\u{4f9}', '\u{4f9}'), ('\u{4fb}', '\u{4fb}'), ('\u{4fd}', '\u{4fd}'),
-        ('\u{4ff}', '\u{4ff}'), ('\u{501}', '\u{501}'), ('\u{503}', '\u{503}'),
-        ('\u{505}', '\u{505}'), ('\u{507}', '\u{507}'), ('\u{509}', '\u{509}'),
-        ('\u{50b}', '\u{50b}'), ('\u{50d}', '\u{50d}'), ('\u{50f}', '\u{50f}'),
-        ('\u{511}', '\u{511}'), ('\u{513}', '\u{513}'), ('\u{515}', '\u{515}'),
-        ('\u{517}', '\u{517}'), ('\u{519}', '\u{519}'), ('\u{51b}', '\u{51b}'),
-        ('\u{51d}', '\u{51d}'), ('\u{51f}', '\u{51f}'), ('\u{521}', '\u{521}'),
-        ('\u{523}', '\u{523}'), ('\u{525}', '\u{525}'), ('\u{527}', '\u{527}'),
-        ('\u{529}', '\u{529}'), ('\u{52b}', '\u{52b}'), ('\u{52d}', '\u{52d}'),
-        ('\u{52f}', '\u{52f}'), ('\u{561}', '\u{587}'), ('\u{13f8}',
-        '\u{13fd}'), ('\u{1d00}', '\u{1dbf}'), ('\u{1e01}', '\u{1e01}'),
-        ('\u{1e03}', '\u{1e03}'), ('\u{1e05}', '\u{1e05}'), ('\u{1e07}',
-        '\u{1e07}'), ('\u{1e09}', '\u{1e09}'), ('\u{1e0b}', '\u{1e0b}'),
-        ('\u{1e0d}', '\u{1e0d}'), ('\u{1e0f}', '\u{1e0f}'), ('\u{1e11}',
-        '\u{1e11}'), ('\u{1e13}', '\u{1e13}'), ('\u{1e15}', '\u{1e15}'),
-        ('\u{1e17}', '\u{1e17}'), ('\u{1e19}', '\u{1e19}'), ('\u{1e1b}',
-        '\u{1e1b}'), ('\u{1e1d}', '\u{1e1d}'), ('\u{1e1f}', '\u{1e1f}'),
-        ('\u{1e21}', '\u{1e21}'), ('\u{1e23}', '\u{1e23}'), ('\u{1e25}',
-        '\u{1e25}'), ('\u{1e27}', '\u{1e27}'), ('\u{1e29}', '\u{1e29}'),
-        ('\u{1e2b}', '\u{1e2b}'), ('\u{1e2d}', '\u{1e2d}'), ('\u{1e2f}',
-        '\u{1e2f}'), ('\u{1e31}', '\u{1e31}'), ('\u{1e33}', '\u{1e33}'),
-        ('\u{1e35}', '\u{1e35}'), ('\u{1e37}', '\u{1e37}'), ('\u{1e39}',
-        '\u{1e39}'), ('\u{1e3b}', '\u{1e3b}'), ('\u{1e3d}', '\u{1e3d}'),
-        ('\u{1e3f}', '\u{1e3f}'), ('\u{1e41}', '\u{1e41}'), ('\u{1e43}',
-        '\u{1e43}'), ('\u{1e45}', '\u{1e45}'), ('\u{1e47}', '\u{1e47}'),
-        ('\u{1e49}', '\u{1e49}'), ('\u{1e4b}', '\u{1e4b}'), ('\u{1e4d}',
-        '\u{1e4d}'), ('\u{1e4f}', '\u{1e4f}'), ('\u{1e51}', '\u{1e51}'),
-        ('\u{1e53}', '\u{1e53}'), ('\u{1e55}', '\u{1e55}'), ('\u{1e57}',
-        '\u{1e57}'), ('\u{1e59}', '\u{1e59}'), ('\u{1e5b}', '\u{1e5b}'),
-        ('\u{1e5d}', '\u{1e5d}'), ('\u{1e5f}', '\u{1e5f}'), ('\u{1e61}',
-        '\u{1e61}'), ('\u{1e63}', '\u{1e63}'), ('\u{1e65}', '\u{1e65}'),
-        ('\u{1e67}', '\u{1e67}'), ('\u{1e69}', '\u{1e69}'), ('\u{1e6b}',
-        '\u{1e6b}'), ('\u{1e6d}', '\u{1e6d}'), ('\u{1e6f}', '\u{1e6f}'),
-        ('\u{1e71}', '\u{1e71}'), ('\u{1e73}', '\u{1e73}'), ('\u{1e75}',
-        '\u{1e75}'), ('\u{1e77}', '\u{1e77}'), ('\u{1e79}', '\u{1e79}'),
-        ('\u{1e7b}', '\u{1e7b}'), ('\u{1e7d}', '\u{1e7d}'), ('\u{1e7f}',
-        '\u{1e7f}'), ('\u{1e81}', '\u{1e81}'), ('\u{1e83}', '\u{1e83}'),
-        ('\u{1e85}', '\u{1e85}'), ('\u{1e87}', '\u{1e87}'), ('\u{1e89}',
-        '\u{1e89}'), ('\u{1e8b}', '\u{1e8b}'), ('\u{1e8d}', '\u{1e8d}'),
-        ('\u{1e8f}', '\u{1e8f}'), ('\u{1e91}', '\u{1e91}'), ('\u{1e93}',
-        '\u{1e93}'), ('\u{1e95}', '\u{1e9d}'), ('\u{1e9f}', '\u{1e9f}'),
-        ('\u{1ea1}', '\u{1ea1}'), ('\u{1ea3}', '\u{1ea3}'), ('\u{1ea5}',
-        '\u{1ea5}'), ('\u{1ea7}', '\u{1ea7}'), ('\u{1ea9}', '\u{1ea9}'),
-        ('\u{1eab}', '\u{1eab}'), ('\u{1ead}', '\u{1ead}'), ('\u{1eaf}',
-        '\u{1eaf}'), ('\u{1eb1}', '\u{1eb1}'), ('\u{1eb3}', '\u{1eb3}'),
-        ('\u{1eb5}', '\u{1eb5}'), ('\u{1eb7}', '\u{1eb7}'), ('\u{1eb9}',
-        '\u{1eb9}'), ('\u{1ebb}', '\u{1ebb}'), ('\u{1ebd}', '\u{1ebd}'),
-        ('\u{1ebf}', '\u{1ebf}'), ('\u{1ec1}', '\u{1ec1}'), ('\u{1ec3}',
-        '\u{1ec3}'), ('\u{1ec5}', '\u{1ec5}'), ('\u{1ec7}', '\u{1ec7}'),
-        ('\u{1ec9}', '\u{1ec9}'), ('\u{1ecb}', '\u{1ecb}'), ('\u{1ecd}',
-        '\u{1ecd}'), ('\u{1ecf}', '\u{1ecf}'), ('\u{1ed1}', '\u{1ed1}'),
-        ('\u{1ed3}', '\u{1ed3}'), ('\u{1ed5}', '\u{1ed5}'), ('\u{1ed7}',
-        '\u{1ed7}'), ('\u{1ed9}', '\u{1ed9}'), ('\u{1edb}', '\u{1edb}'),
-        ('\u{1edd}', '\u{1edd}'), ('\u{1edf}', '\u{1edf}'), ('\u{1ee1}',
-        '\u{1ee1}'), ('\u{1ee3}', '\u{1ee3}'), ('\u{1ee5}', '\u{1ee5}'),
-        ('\u{1ee7}', '\u{1ee7}'), ('\u{1ee9}', '\u{1ee9}'), ('\u{1eeb}',
-        '\u{1eeb}'), ('\u{1eed}', '\u{1eed}'), ('\u{1eef}', '\u{1eef}'),
-        ('\u{1ef1}', '\u{1ef1}'), ('\u{1ef3}', '\u{1ef3}'), ('\u{1ef5}',
-        '\u{1ef5}'), ('\u{1ef7}', '\u{1ef7}'), ('\u{1ef9}', '\u{1ef9}'),
-        ('\u{1efb}', '\u{1efb}'), ('\u{1efd}', '\u{1efd}'), ('\u{1eff}',
-        '\u{1f07}'), ('\u{1f10}', '\u{1f15}'), ('\u{1f20}', '\u{1f27}'),
-        ('\u{1f30}', '\u{1f37}'), ('\u{1f40}', '\u{1f45}'), ('\u{1f50}',
-        '\u{1f57}'), ('\u{1f60}', '\u{1f67}'), ('\u{1f70}', '\u{1f7d}'),
-        ('\u{1f80}', '\u{1f87}'), ('\u{1f90}', '\u{1f97}'), ('\u{1fa0}',
-        '\u{1fa7}'), ('\u{1fb0}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fb7}'),
-        ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}',
-        '\u{1fc7}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}', '\u{1fd7}'),
-        ('\u{1fe0}', '\u{1fe7}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}',
-        '\u{1ff7}'), ('\u{2071}', '\u{2071}'), ('\u{207f}', '\u{207f}'),
-        ('\u{2090}', '\u{209c}'), ('\u{210a}', '\u{210a}'), ('\u{210e}',
-        '\u{210f}'), ('\u{2113}', '\u{2113}'), ('\u{212f}', '\u{212f}'),
-        ('\u{2134}', '\u{2134}'), ('\u{2139}', '\u{2139}'), ('\u{213c}',
-        '\u{213d}'), ('\u{2146}', '\u{2149}'), ('\u{214e}', '\u{214e}'),
-        ('\u{2170}', '\u{217f}'), ('\u{2184}', '\u{2184}'), ('\u{24d0}',
-        '\u{24e9}'), ('\u{2c30}', '\u{2c5e}'), ('\u{2c61}', '\u{2c61}'),
-        ('\u{2c65}', '\u{2c66}'), ('\u{2c68}', '\u{2c68}'), ('\u{2c6a}',
-        '\u{2c6a}'), ('\u{2c6c}', '\u{2c6c}'), ('\u{2c71}', '\u{2c71}'),
-        ('\u{2c73}', '\u{2c74}'), ('\u{2c76}', '\u{2c7d}'), ('\u{2c81}',
-        '\u{2c81}'), ('\u{2c83}', '\u{2c83}'), ('\u{2c85}', '\u{2c85}'),
-        ('\u{2c87}', '\u{2c87}'), ('\u{2c89}', '\u{2c89}'), ('\u{2c8b}',
-        '\u{2c8b}'), ('\u{2c8d}', '\u{2c8d}'), ('\u{2c8f}', '\u{2c8f}'),
-        ('\u{2c91}', '\u{2c91}'), ('\u{2c93}', '\u{2c93}'), ('\u{2c95}',
-        '\u{2c95}'), ('\u{2c97}', '\u{2c97}'), ('\u{2c99}', '\u{2c99}'),
-        ('\u{2c9b}', '\u{2c9b}'), ('\u{2c9d}', '\u{2c9d}'), ('\u{2c9f}',
-        '\u{2c9f}'), ('\u{2ca1}', '\u{2ca1}'), ('\u{2ca3}', '\u{2ca3}'),
-        ('\u{2ca5}', '\u{2ca5}'), ('\u{2ca7}', '\u{2ca7}'), ('\u{2ca9}',
-        '\u{2ca9}'), ('\u{2cab}', '\u{2cab}'), ('\u{2cad}', '\u{2cad}'),
-        ('\u{2caf}', '\u{2caf}'), ('\u{2cb1}', '\u{2cb1}'), ('\u{2cb3}',
-        '\u{2cb3}'), ('\u{2cb5}', '\u{2cb5}'), ('\u{2cb7}', '\u{2cb7}'),
-        ('\u{2cb9}', '\u{2cb9}'), ('\u{2cbb}', '\u{2cbb}'), ('\u{2cbd}',
-        '\u{2cbd}'), ('\u{2cbf}', '\u{2cbf}'), ('\u{2cc1}', '\u{2cc1}'),
-        ('\u{2cc3}', '\u{2cc3}'), ('\u{2cc5}', '\u{2cc5}'), ('\u{2cc7}',
-        '\u{2cc7}'), ('\u{2cc9}', '\u{2cc9}'), ('\u{2ccb}', '\u{2ccb}'),
-        ('\u{2ccd}', '\u{2ccd}'), ('\u{2ccf}', '\u{2ccf}'), ('\u{2cd1}',
-        '\u{2cd1}'), ('\u{2cd3}', '\u{2cd3}'), ('\u{2cd5}', '\u{2cd5}'),
-        ('\u{2cd7}', '\u{2cd7}'), ('\u{2cd9}', '\u{2cd9}'), ('\u{2cdb}',
-        '\u{2cdb}'), ('\u{2cdd}', '\u{2cdd}'), ('\u{2cdf}', '\u{2cdf}'),
-        ('\u{2ce1}', '\u{2ce1}'), ('\u{2ce3}', '\u{2ce4}'), ('\u{2cec}',
-        '\u{2cec}'), ('\u{2cee}', '\u{2cee}'), ('\u{2cf3}', '\u{2cf3}'),
-        ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}',
-        '\u{2d2d}'), ('\u{a641}', '\u{a641}'), ('\u{a643}', '\u{a643}'),
-        ('\u{a645}', '\u{a645}'), ('\u{a647}', '\u{a647}'), ('\u{a649}',
-        '\u{a649}'), ('\u{a64b}', '\u{a64b}'), ('\u{a64d}', '\u{a64d}'),
-        ('\u{a64f}', '\u{a64f}'), ('\u{a651}', '\u{a651}'), ('\u{a653}',
-        '\u{a653}'), ('\u{a655}', '\u{a655}'), ('\u{a657}', '\u{a657}'),
-        ('\u{a659}', '\u{a659}'), ('\u{a65b}', '\u{a65b}'), ('\u{a65d}',
-        '\u{a65d}'), ('\u{a65f}', '\u{a65f}'), ('\u{a661}', '\u{a661}'),
-        ('\u{a663}', '\u{a663}'), ('\u{a665}', '\u{a665}'), ('\u{a667}',
-        '\u{a667}'), ('\u{a669}', '\u{a669}'), ('\u{a66b}', '\u{a66b}'),
-        ('\u{a66d}', '\u{a66d}'), ('\u{a681}', '\u{a681}'), ('\u{a683}',
-        '\u{a683}'), ('\u{a685}', '\u{a685}'), ('\u{a687}', '\u{a687}'),
-        ('\u{a689}', '\u{a689}'), ('\u{a68b}', '\u{a68b}'), ('\u{a68d}',
-        '\u{a68d}'), ('\u{a68f}', '\u{a68f}'), ('\u{a691}', '\u{a691}'),
-        ('\u{a693}', '\u{a693}'), ('\u{a695}', '\u{a695}'), ('\u{a697}',
-        '\u{a697}'), ('\u{a699}', '\u{a699}'), ('\u{a69b}', '\u{a69d}'),
-        ('\u{a723}', '\u{a723}'), ('\u{a725}', '\u{a725}'), ('\u{a727}',
-        '\u{a727}'), ('\u{a729}', '\u{a729}'), ('\u{a72b}', '\u{a72b}'),
-        ('\u{a72d}', '\u{a72d}'), ('\u{a72f}', '\u{a731}'), ('\u{a733}',
-        '\u{a733}'), ('\u{a735}', '\u{a735}'), ('\u{a737}', '\u{a737}'),
-        ('\u{a739}', '\u{a739}'), ('\u{a73b}', '\u{a73b}'), ('\u{a73d}',
-        '\u{a73d}'), ('\u{a73f}', '\u{a73f}'), ('\u{a741}', '\u{a741}'),
-        ('\u{a743}', '\u{a743}'), ('\u{a745}', '\u{a745}'), ('\u{a747}',
-        '\u{a747}'), ('\u{a749}', '\u{a749}'), ('\u{a74b}', '\u{a74b}'),
-        ('\u{a74d}', '\u{a74d}'), ('\u{a74f}', '\u{a74f}'), ('\u{a751}',
-        '\u{a751}'), ('\u{a753}', '\u{a753}'), ('\u{a755}', '\u{a755}'),
-        ('\u{a757}', '\u{a757}'), ('\u{a759}', '\u{a759}'), ('\u{a75b}',
-        '\u{a75b}'), ('\u{a75d}', '\u{a75d}'), ('\u{a75f}', '\u{a75f}'),
-        ('\u{a761}', '\u{a761}'), ('\u{a763}', '\u{a763}'), ('\u{a765}',
-        '\u{a765}'), ('\u{a767}', '\u{a767}'), ('\u{a769}', '\u{a769}'),
-        ('\u{a76b}', '\u{a76b}'), ('\u{a76d}', '\u{a76d}'), ('\u{a76f}',
-        '\u{a778}'), ('\u{a77a}', '\u{a77a}'), ('\u{a77c}', '\u{a77c}'),
-        ('\u{a77f}', '\u{a77f}'), ('\u{a781}', '\u{a781}'), ('\u{a783}',
-        '\u{a783}'), ('\u{a785}', '\u{a785}'), ('\u{a787}', '\u{a787}'),
-        ('\u{a78c}', '\u{a78c}'), ('\u{a78e}', '\u{a78e}'), ('\u{a791}',
-        '\u{a791}'), ('\u{a793}', '\u{a795}'), ('\u{a797}', '\u{a797}'),
-        ('\u{a799}', '\u{a799}'), ('\u{a79b}', '\u{a79b}'), ('\u{a79d}',
-        '\u{a79d}'), ('\u{a79f}', '\u{a79f}'), ('\u{a7a1}', '\u{a7a1}'),
-        ('\u{a7a3}', '\u{a7a3}'), ('\u{a7a5}', '\u{a7a5}'), ('\u{a7a7}',
-        '\u{a7a7}'), ('\u{a7a9}', '\u{a7a9}'), ('\u{a7b5}', '\u{a7b5}'),
-        ('\u{a7b7}', '\u{a7b7}'), ('\u{a7f8}', '\u{a7fa}'), ('\u{ab30}',
-        '\u{ab5a}'), ('\u{ab5c}', '\u{ab65}'), ('\u{ab70}', '\u{abbf}'),
-        ('\u{fb00}', '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), ('\u{ff41}',
-        '\u{ff5a}'), ('\u{10428}', '\u{1044f}'), ('\u{10cc0}', '\u{10cf2}'),
-        ('\u{118c0}', '\u{118df}'), ('\u{1d41a}', '\u{1d433}'), ('\u{1d44e}',
-        '\u{1d454}'), ('\u{1d456}', '\u{1d467}'), ('\u{1d482}', '\u{1d49b}'),
-        ('\u{1d4b6}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}',
-        '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d4cf}'), ('\u{1d4ea}', '\u{1d503}'),
-        ('\u{1d51e}', '\u{1d537}'), ('\u{1d552}', '\u{1d56b}'), ('\u{1d586}',
-        '\u{1d59f}'), ('\u{1d5ba}', '\u{1d5d3}'), ('\u{1d5ee}', '\u{1d607}'),
-        ('\u{1d622}', '\u{1d63b}'), ('\u{1d656}', '\u{1d66f}'), ('\u{1d68a}',
-        '\u{1d6a5}'), ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6e1}'),
-        ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}', '\u{1d71b}'), ('\u{1d736}',
-        '\u{1d74e}'), ('\u{1d750}', '\u{1d755}'), ('\u{1d770}', '\u{1d788}'),
-        ('\u{1d78a}', '\u{1d78f}'), ('\u{1d7aa}', '\u{1d7c2}'), ('\u{1d7c4}',
-        '\u{1d7c9}'), ('\u{1d7cb}', '\u{1d7cb}')
-    ];
+    match query.canonicalize()? {
+        Binary(name) => {
+            property_set(property_bool::BY_NAME, name)
+                .map(hir_class)
+                .ok_or(Error::PropertyNotFound)
+        }
+        GeneralCategory("Any") => {
+            Ok(hir_class(&[('\0', '\u{10FFFF}')]))
+        }
+        GeneralCategory("Assigned") => {
+            let mut cls =
+                property_set(general_category::BY_NAME, "Unassigned")
+                    .map(hir_class)
+                    .ok_or(Error::PropertyNotFound)?;
+            cls.negate();
+            Ok(cls)
+        }
+        GeneralCategory("ASCII") => {
+            Ok(hir_class(&[('\0', '\x7F')]))
+        }
+        GeneralCategory(name) => {
+            property_set(general_category::BY_NAME, name)
+                .map(hir_class)
+                .ok_or(Error::PropertyValueNotFound)
+        }
+        Script(name) => {
+            property_set(script::BY_NAME, name)
+                .map(hir_class)
+                .ok_or(Error::PropertyValueNotFound)
+        }
+        ByValue { property_name: "Age", property_value } => {
+            let mut class = hir::ClassUnicode::empty();
+            for set in ages(property_value)? {
+                class.union(&hir_class(set));
+            }
+            Ok(class)
+        }
+        ByValue { property_name: "Script_Extensions", property_value } => {
+            property_set(script_extension::BY_NAME, property_value)
+                .map(hir_class)
+                .ok_or(Error::PropertyValueNotFound)
+        }
+        _ => {
+            // What else should we support?
+            Err(Error::PropertyNotFound)
+        }
+    }
+}
 
-    pub const Uppercase_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{de}'),
-        ('\u{100}', '\u{100}'), ('\u{102}', '\u{102}'), ('\u{104}', '\u{104}'),
-        ('\u{106}', '\u{106}'), ('\u{108}', '\u{108}'), ('\u{10a}', '\u{10a}'),
-        ('\u{10c}', '\u{10c}'), ('\u{10e}', '\u{10e}'), ('\u{110}', '\u{110}'),
-        ('\u{112}', '\u{112}'), ('\u{114}', '\u{114}'), ('\u{116}', '\u{116}'),
-        ('\u{118}', '\u{118}'), ('\u{11a}', '\u{11a}'), ('\u{11c}', '\u{11c}'),
-        ('\u{11e}', '\u{11e}'), ('\u{120}', '\u{120}'), ('\u{122}', '\u{122}'),
-        ('\u{124}', '\u{124}'), ('\u{126}', '\u{126}'), ('\u{128}', '\u{128}'),
-        ('\u{12a}', '\u{12a}'), ('\u{12c}', '\u{12c}'), ('\u{12e}', '\u{12e}'),
-        ('\u{130}', '\u{130}'), ('\u{132}', '\u{132}'), ('\u{134}', '\u{134}'),
-        ('\u{136}', '\u{136}'), ('\u{139}', '\u{139}'), ('\u{13b}', '\u{13b}'),
-        ('\u{13d}', '\u{13d}'), ('\u{13f}', '\u{13f}'), ('\u{141}', '\u{141}'),
-        ('\u{143}', '\u{143}'), ('\u{145}', '\u{145}'), ('\u{147}', '\u{147}'),
-        ('\u{14a}', '\u{14a}'), ('\u{14c}', '\u{14c}'), ('\u{14e}', '\u{14e}'),
-        ('\u{150}', '\u{150}'), ('\u{152}', '\u{152}'), ('\u{154}', '\u{154}'),
-        ('\u{156}', '\u{156}'), ('\u{158}', '\u{158}'), ('\u{15a}', '\u{15a}'),
-        ('\u{15c}', '\u{15c}'), ('\u{15e}', '\u{15e}'), ('\u{160}', '\u{160}'),
-        ('\u{162}', '\u{162}'), ('\u{164}', '\u{164}'), ('\u{166}', '\u{166}'),
-        ('\u{168}', '\u{168}'), ('\u{16a}', '\u{16a}'), ('\u{16c}', '\u{16c}'),
-        ('\u{16e}', '\u{16e}'), ('\u{170}', '\u{170}'), ('\u{172}', '\u{172}'),
-        ('\u{174}', '\u{174}'), ('\u{176}', '\u{176}'), ('\u{178}', '\u{179}'),
-        ('\u{17b}', '\u{17b}'), ('\u{17d}', '\u{17d}'), ('\u{181}', '\u{182}'),
-        ('\u{184}', '\u{184}'), ('\u{186}', '\u{187}'), ('\u{189}', '\u{18b}'),
-        ('\u{18e}', '\u{191}'), ('\u{193}', '\u{194}'), ('\u{196}', '\u{198}'),
-        ('\u{19c}', '\u{19d}'), ('\u{19f}', '\u{1a0}'), ('\u{1a2}', '\u{1a2}'),
-        ('\u{1a4}', '\u{1a4}'), ('\u{1a6}', '\u{1a7}'), ('\u{1a9}', '\u{1a9}'),
-        ('\u{1ac}', '\u{1ac}'), ('\u{1ae}', '\u{1af}'), ('\u{1b1}', '\u{1b3}'),
-        ('\u{1b5}', '\u{1b5}'), ('\u{1b7}', '\u{1b8}'), ('\u{1bc}', '\u{1bc}'),
-        ('\u{1c4}', '\u{1c4}'), ('\u{1c7}', '\u{1c7}'), ('\u{1ca}', '\u{1ca}'),
-        ('\u{1cd}', '\u{1cd}'), ('\u{1cf}', '\u{1cf}'), ('\u{1d1}', '\u{1d1}'),
-        ('\u{1d3}', '\u{1d3}'), ('\u{1d5}', '\u{1d5}'), ('\u{1d7}', '\u{1d7}'),
-        ('\u{1d9}', '\u{1d9}'), ('\u{1db}', '\u{1db}'), ('\u{1de}', '\u{1de}'),
-        ('\u{1e0}', '\u{1e0}'), ('\u{1e2}', '\u{1e2}'), ('\u{1e4}', '\u{1e4}'),
-        ('\u{1e6}', '\u{1e6}'), ('\u{1e8}', '\u{1e8}'), ('\u{1ea}', '\u{1ea}'),
-        ('\u{1ec}', '\u{1ec}'), ('\u{1ee}', '\u{1ee}'), ('\u{1f1}', '\u{1f1}'),
-        ('\u{1f4}', '\u{1f4}'), ('\u{1f6}', '\u{1f8}'), ('\u{1fa}', '\u{1fa}'),
-        ('\u{1fc}', '\u{1fc}'), ('\u{1fe}', '\u{1fe}'), ('\u{200}', '\u{200}'),
-        ('\u{202}', '\u{202}'), ('\u{204}', '\u{204}'), ('\u{206}', '\u{206}'),
-        ('\u{208}', '\u{208}'), ('\u{20a}', '\u{20a}'), ('\u{20c}', '\u{20c}'),
-        ('\u{20e}', '\u{20e}'), ('\u{210}', '\u{210}'), ('\u{212}', '\u{212}'),
-        ('\u{214}', '\u{214}'), ('\u{216}', '\u{216}'), ('\u{218}', '\u{218}'),
-        ('\u{21a}', '\u{21a}'), ('\u{21c}', '\u{21c}'), ('\u{21e}', '\u{21e}'),
-        ('\u{220}', '\u{220}'), ('\u{222}', '\u{222}'), ('\u{224}', '\u{224}'),
-        ('\u{226}', '\u{226}'), ('\u{228}', '\u{228}'), ('\u{22a}', '\u{22a}'),
-        ('\u{22c}', '\u{22c}'), ('\u{22e}', '\u{22e}'), ('\u{230}', '\u{230}'),
-        ('\u{232}', '\u{232}'), ('\u{23a}', '\u{23b}'), ('\u{23d}', '\u{23e}'),
-        ('\u{241}', '\u{241}'), ('\u{243}', '\u{246}'), ('\u{248}', '\u{248}'),
-        ('\u{24a}', '\u{24a}'), ('\u{24c}', '\u{24c}'), ('\u{24e}', '\u{24e}'),
-        ('\u{370}', '\u{370}'), ('\u{372}', '\u{372}'), ('\u{376}', '\u{376}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{38f}'), ('\u{391}', '\u{3a1}'),
-        ('\u{3a3}', '\u{3ab}'), ('\u{3cf}', '\u{3cf}'), ('\u{3d2}', '\u{3d4}'),
-        ('\u{3d8}', '\u{3d8}'), ('\u{3da}', '\u{3da}'), ('\u{3dc}', '\u{3dc}'),
-        ('\u{3de}', '\u{3de}'), ('\u{3e0}', '\u{3e0}'), ('\u{3e2}', '\u{3e2}'),
-        ('\u{3e4}', '\u{3e4}'), ('\u{3e6}', '\u{3e6}'), ('\u{3e8}', '\u{3e8}'),
-        ('\u{3ea}', '\u{3ea}'), ('\u{3ec}', '\u{3ec}'), ('\u{3ee}', '\u{3ee}'),
-        ('\u{3f4}', '\u{3f4}'), ('\u{3f7}', '\u{3f7}'), ('\u{3f9}', '\u{3fa}'),
-        ('\u{3fd}', '\u{42f}'), ('\u{460}', '\u{460}'), ('\u{462}', '\u{462}'),
-        ('\u{464}', '\u{464}'), ('\u{466}', '\u{466}'), ('\u{468}', '\u{468}'),
-        ('\u{46a}', '\u{46a}'), ('\u{46c}', '\u{46c}'), ('\u{46e}', '\u{46e}'),
-        ('\u{470}', '\u{470}'), ('\u{472}', '\u{472}'), ('\u{474}', '\u{474}'),
-        ('\u{476}', '\u{476}'), ('\u{478}', '\u{478}'), ('\u{47a}', '\u{47a}'),
-        ('\u{47c}', '\u{47c}'), ('\u{47e}', '\u{47e}'), ('\u{480}', '\u{480}'),
-        ('\u{48a}', '\u{48a}'), ('\u{48c}', '\u{48c}'), ('\u{48e}', '\u{48e}'),
-        ('\u{490}', '\u{490}'), ('\u{492}', '\u{492}'), ('\u{494}', '\u{494}'),
-        ('\u{496}', '\u{496}'), ('\u{498}', '\u{498}'), ('\u{49a}', '\u{49a}'),
-        ('\u{49c}', '\u{49c}'), ('\u{49e}', '\u{49e}'), ('\u{4a0}', '\u{4a0}'),
-        ('\u{4a2}', '\u{4a2}'), ('\u{4a4}', '\u{4a4}'), ('\u{4a6}', '\u{4a6}'),
-        ('\u{4a8}', '\u{4a8}'), ('\u{4aa}', '\u{4aa}'), ('\u{4ac}', '\u{4ac}'),
-        ('\u{4ae}', '\u{4ae}'), ('\u{4b0}', '\u{4b0}'), ('\u{4b2}', '\u{4b2}'),
-        ('\u{4b4}', '\u{4b4}'), ('\u{4b6}', '\u{4b6}'), ('\u{4b8}', '\u{4b8}'),
-        ('\u{4ba}', '\u{4ba}'), ('\u{4bc}', '\u{4bc}'), ('\u{4be}', '\u{4be}'),
-        ('\u{4c0}', '\u{4c1}'), ('\u{4c3}', '\u{4c3}'), ('\u{4c5}', '\u{4c5}'),
-        ('\u{4c7}', '\u{4c7}'), ('\u{4c9}', '\u{4c9}'), ('\u{4cb}', '\u{4cb}'),
-        ('\u{4cd}', '\u{4cd}'), ('\u{4d0}', '\u{4d0}'), ('\u{4d2}', '\u{4d2}'),
-        ('\u{4d4}', '\u{4d4}'), ('\u{4d6}', '\u{4d6}'), ('\u{4d8}', '\u{4d8}'),
-        ('\u{4da}', '\u{4da}'), ('\u{4dc}', '\u{4dc}'), ('\u{4de}', '\u{4de}'),
-        ('\u{4e0}', '\u{4e0}'), ('\u{4e2}', '\u{4e2}'), ('\u{4e4}', '\u{4e4}'),
-        ('\u{4e6}', '\u{4e6}'), ('\u{4e8}', '\u{4e8}'), ('\u{4ea}', '\u{4ea}'),
-        ('\u{4ec}', '\u{4ec}'), ('\u{4ee}', '\u{4ee}'), ('\u{4f0}', '\u{4f0}'),
-        ('\u{4f2}', '\u{4f2}'), ('\u{4f4}', '\u{4f4}'), ('\u{4f6}', '\u{4f6}'),
-        ('\u{4f8}', '\u{4f8}'), ('\u{4fa}', '\u{4fa}'), ('\u{4fc}', '\u{4fc}'),
-        ('\u{4fe}', '\u{4fe}'), ('\u{500}', '\u{500}'), ('\u{502}', '\u{502}'),
-        ('\u{504}', '\u{504}'), ('\u{506}', '\u{506}'), ('\u{508}', '\u{508}'),
-        ('\u{50a}', '\u{50a}'), ('\u{50c}', '\u{50c}'), ('\u{50e}', '\u{50e}'),
-        ('\u{510}', '\u{510}'), ('\u{512}', '\u{512}'), ('\u{514}', '\u{514}'),
-        ('\u{516}', '\u{516}'), ('\u{518}', '\u{518}'), ('\u{51a}', '\u{51a}'),
-        ('\u{51c}', '\u{51c}'), ('\u{51e}', '\u{51e}'), ('\u{520}', '\u{520}'),
-        ('\u{522}', '\u{522}'), ('\u{524}', '\u{524}'), ('\u{526}', '\u{526}'),
-        ('\u{528}', '\u{528}'), ('\u{52a}', '\u{52a}'), ('\u{52c}', '\u{52c}'),
-        ('\u{52e}', '\u{52e}'), ('\u{531}', '\u{556}'), ('\u{10a0}',
-        '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}', '\u{10cd}'),
-        ('\u{13a0}', '\u{13f5}'), ('\u{1e00}', '\u{1e00}'), ('\u{1e02}',
-        '\u{1e02}'), ('\u{1e04}', '\u{1e04}'), ('\u{1e06}', '\u{1e06}'),
-        ('\u{1e08}', '\u{1e08}'), ('\u{1e0a}', '\u{1e0a}'), ('\u{1e0c}',
-        '\u{1e0c}'), ('\u{1e0e}', '\u{1e0e}'), ('\u{1e10}', '\u{1e10}'),
-        ('\u{1e12}', '\u{1e12}'), ('\u{1e14}', '\u{1e14}'), ('\u{1e16}',
-        '\u{1e16}'), ('\u{1e18}', '\u{1e18}'), ('\u{1e1a}', '\u{1e1a}'),
-        ('\u{1e1c}', '\u{1e1c}'), ('\u{1e1e}', '\u{1e1e}'), ('\u{1e20}',
-        '\u{1e20}'), ('\u{1e22}', '\u{1e22}'), ('\u{1e24}', '\u{1e24}'),
-        ('\u{1e26}', '\u{1e26}'), ('\u{1e28}', '\u{1e28}'), ('\u{1e2a}',
-        '\u{1e2a}'), ('\u{1e2c}', '\u{1e2c}'), ('\u{1e2e}', '\u{1e2e}'),
-        ('\u{1e30}', '\u{1e30}'), ('\u{1e32}', '\u{1e32}'), ('\u{1e34}',
-        '\u{1e34}'), ('\u{1e36}', '\u{1e36}'), ('\u{1e38}', '\u{1e38}'),
-        ('\u{1e3a}', '\u{1e3a}'), ('\u{1e3c}', '\u{1e3c}'), ('\u{1e3e}',
-        '\u{1e3e}'), ('\u{1e40}', '\u{1e40}'), ('\u{1e42}', '\u{1e42}'),
-        ('\u{1e44}', '\u{1e44}'), ('\u{1e46}', '\u{1e46}'), ('\u{1e48}',
-        '\u{1e48}'), ('\u{1e4a}', '\u{1e4a}'), ('\u{1e4c}', '\u{1e4c}'),
-        ('\u{1e4e}', '\u{1e4e}'), ('\u{1e50}', '\u{1e50}'), ('\u{1e52}',
-        '\u{1e52}'), ('\u{1e54}', '\u{1e54}'), ('\u{1e56}', '\u{1e56}'),
-        ('\u{1e58}', '\u{1e58}'), ('\u{1e5a}', '\u{1e5a}'), ('\u{1e5c}',
-        '\u{1e5c}'), ('\u{1e5e}', '\u{1e5e}'), ('\u{1e60}', '\u{1e60}'),
-        ('\u{1e62}', '\u{1e62}'), ('\u{1e64}', '\u{1e64}'), ('\u{1e66}',
-        '\u{1e66}'), ('\u{1e68}', '\u{1e68}'), ('\u{1e6a}', '\u{1e6a}'),
-        ('\u{1e6c}', '\u{1e6c}'), ('\u{1e6e}', '\u{1e6e}'), ('\u{1e70}',
-        '\u{1e70}'), ('\u{1e72}', '\u{1e72}'), ('\u{1e74}', '\u{1e74}'),
-        ('\u{1e76}', '\u{1e76}'), ('\u{1e78}', '\u{1e78}'), ('\u{1e7a}',
-        '\u{1e7a}'), ('\u{1e7c}', '\u{1e7c}'), ('\u{1e7e}', '\u{1e7e}'),
-        ('\u{1e80}', '\u{1e80}'), ('\u{1e82}', '\u{1e82}'), ('\u{1e84}',
-        '\u{1e84}'), ('\u{1e86}', '\u{1e86}'), ('\u{1e88}', '\u{1e88}'),
-        ('\u{1e8a}', '\u{1e8a}'), ('\u{1e8c}', '\u{1e8c}'), ('\u{1e8e}',
-        '\u{1e8e}'), ('\u{1e90}', '\u{1e90}'), ('\u{1e92}', '\u{1e92}'),
-        ('\u{1e94}', '\u{1e94}'), ('\u{1e9e}', '\u{1e9e}'), ('\u{1ea0}',
-        '\u{1ea0}'), ('\u{1ea2}', '\u{1ea2}'), ('\u{1ea4}', '\u{1ea4}'),
-        ('\u{1ea6}', '\u{1ea6}'), ('\u{1ea8}', '\u{1ea8}'), ('\u{1eaa}',
-        '\u{1eaa}'), ('\u{1eac}', '\u{1eac}'), ('\u{1eae}', '\u{1eae}'),
-        ('\u{1eb0}', '\u{1eb0}'), ('\u{1eb2}', '\u{1eb2}'), ('\u{1eb4}',
-        '\u{1eb4}'), ('\u{1eb6}', '\u{1eb6}'), ('\u{1eb8}', '\u{1eb8}'),
-        ('\u{1eba}', '\u{1eba}'), ('\u{1ebc}', '\u{1ebc}'), ('\u{1ebe}',
-        '\u{1ebe}'), ('\u{1ec0}', '\u{1ec0}'), ('\u{1ec2}', '\u{1ec2}'),
-        ('\u{1ec4}', '\u{1ec4}'), ('\u{1ec6}', '\u{1ec6}'), ('\u{1ec8}',
-        '\u{1ec8}'), ('\u{1eca}', '\u{1eca}'), ('\u{1ecc}', '\u{1ecc}'),
-        ('\u{1ece}', '\u{1ece}'), ('\u{1ed0}', '\u{1ed0}'), ('\u{1ed2}',
-        '\u{1ed2}'), ('\u{1ed4}', '\u{1ed4}'), ('\u{1ed6}', '\u{1ed6}'),
-        ('\u{1ed8}', '\u{1ed8}'), ('\u{1eda}', '\u{1eda}'), ('\u{1edc}',
-        '\u{1edc}'), ('\u{1ede}', '\u{1ede}'), ('\u{1ee0}', '\u{1ee0}'),
-        ('\u{1ee2}', '\u{1ee2}'), ('\u{1ee4}', '\u{1ee4}'), ('\u{1ee6}',
-        '\u{1ee6}'), ('\u{1ee8}', '\u{1ee8}'), ('\u{1eea}', '\u{1eea}'),
-        ('\u{1eec}', '\u{1eec}'), ('\u{1eee}', '\u{1eee}'), ('\u{1ef0}',
-        '\u{1ef0}'), ('\u{1ef2}', '\u{1ef2}'), ('\u{1ef4}', '\u{1ef4}'),
-        ('\u{1ef6}', '\u{1ef6}'), ('\u{1ef8}', '\u{1ef8}'), ('\u{1efa}',
-        '\u{1efa}'), ('\u{1efc}', '\u{1efc}'), ('\u{1efe}', '\u{1efe}'),
-        ('\u{1f08}', '\u{1f0f}'), ('\u{1f18}', '\u{1f1d}'), ('\u{1f28}',
-        '\u{1f2f}'), ('\u{1f38}', '\u{1f3f}'), ('\u{1f48}', '\u{1f4d}'),
-        ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}',
-        '\u{1f5d}'), ('\u{1f5f}', '\u{1f5f}'), ('\u{1f68}', '\u{1f6f}'),
-        ('\u{1fb8}', '\u{1fbb}'), ('\u{1fc8}', '\u{1fcb}'), ('\u{1fd8}',
-        '\u{1fdb}'), ('\u{1fe8}', '\u{1fec}'), ('\u{1ff8}', '\u{1ffb}'),
-        ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210b}',
-        '\u{210d}'), ('\u{2110}', '\u{2112}'), ('\u{2115}', '\u{2115}'),
-        ('\u{2119}', '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}',
-        '\u{2126}'), ('\u{2128}', '\u{2128}'), ('\u{212a}', '\u{212d}'),
-        ('\u{2130}', '\u{2133}'), ('\u{213e}', '\u{213f}'), ('\u{2145}',
-        '\u{2145}'), ('\u{2160}', '\u{216f}'), ('\u{2183}', '\u{2183}'),
-        ('\u{24b6}', '\u{24cf}'), ('\u{2c00}', '\u{2c2e}'), ('\u{2c60}',
-        '\u{2c60}'), ('\u{2c62}', '\u{2c64}'), ('\u{2c67}', '\u{2c67}'),
-        ('\u{2c69}', '\u{2c69}'), ('\u{2c6b}', '\u{2c6b}'), ('\u{2c6d}',
-        '\u{2c70}'), ('\u{2c72}', '\u{2c72}'), ('\u{2c75}', '\u{2c75}'),
-        ('\u{2c7e}', '\u{2c80}'), ('\u{2c82}', '\u{2c82}'), ('\u{2c84}',
-        '\u{2c84}'), ('\u{2c86}', '\u{2c86}'), ('\u{2c88}', '\u{2c88}'),
-        ('\u{2c8a}', '\u{2c8a}'), ('\u{2c8c}', '\u{2c8c}'), ('\u{2c8e}',
-        '\u{2c8e}'), ('\u{2c90}', '\u{2c90}'), ('\u{2c92}', '\u{2c92}'),
-        ('\u{2c94}', '\u{2c94}'), ('\u{2c96}', '\u{2c96}'), ('\u{2c98}',
-        '\u{2c98}'), ('\u{2c9a}', '\u{2c9a}'), ('\u{2c9c}', '\u{2c9c}'),
-        ('\u{2c9e}', '\u{2c9e}'), ('\u{2ca0}', '\u{2ca0}'), ('\u{2ca2}',
-        '\u{2ca2}'), ('\u{2ca4}', '\u{2ca4}'), ('\u{2ca6}', '\u{2ca6}'),
-        ('\u{2ca8}', '\u{2ca8}'), ('\u{2caa}', '\u{2caa}'), ('\u{2cac}',
-        '\u{2cac}'), ('\u{2cae}', '\u{2cae}'), ('\u{2cb0}', '\u{2cb0}'),
-        ('\u{2cb2}', '\u{2cb2}'), ('\u{2cb4}', '\u{2cb4}'), ('\u{2cb6}',
-        '\u{2cb6}'), ('\u{2cb8}', '\u{2cb8}'), ('\u{2cba}', '\u{2cba}'),
-        ('\u{2cbc}', '\u{2cbc}'), ('\u{2cbe}', '\u{2cbe}'), ('\u{2cc0}',
-        '\u{2cc0}'), ('\u{2cc2}', '\u{2cc2}'), ('\u{2cc4}', '\u{2cc4}'),
-        ('\u{2cc6}', '\u{2cc6}'), ('\u{2cc8}', '\u{2cc8}'), ('\u{2cca}',
-        '\u{2cca}'), ('\u{2ccc}', '\u{2ccc}'), ('\u{2cce}', '\u{2cce}'),
-        ('\u{2cd0}', '\u{2cd0}'), ('\u{2cd2}', '\u{2cd2}'), ('\u{2cd4}',
-        '\u{2cd4}'), ('\u{2cd6}', '\u{2cd6}'), ('\u{2cd8}', '\u{2cd8}'),
-        ('\u{2cda}', '\u{2cda}'), ('\u{2cdc}', '\u{2cdc}'), ('\u{2cde}',
-        '\u{2cde}'), ('\u{2ce0}', '\u{2ce0}'), ('\u{2ce2}', '\u{2ce2}'),
-        ('\u{2ceb}', '\u{2ceb}'), ('\u{2ced}', '\u{2ced}'), ('\u{2cf2}',
-        '\u{2cf2}'), ('\u{a640}', '\u{a640}'), ('\u{a642}', '\u{a642}'),
-        ('\u{a644}', '\u{a644}'), ('\u{a646}', '\u{a646}'), ('\u{a648}',
-        '\u{a648}'), ('\u{a64a}', '\u{a64a}'), ('\u{a64c}', '\u{a64c}'),
-        ('\u{a64e}', '\u{a64e}'), ('\u{a650}', '\u{a650}'), ('\u{a652}',
-        '\u{a652}'), ('\u{a654}', '\u{a654}'), ('\u{a656}', '\u{a656}'),
-        ('\u{a658}', '\u{a658}'), ('\u{a65a}', '\u{a65a}'), ('\u{a65c}',
-        '\u{a65c}'), ('\u{a65e}', '\u{a65e}'), ('\u{a660}', '\u{a660}'),
-        ('\u{a662}', '\u{a662}'), ('\u{a664}', '\u{a664}'), ('\u{a666}',
-        '\u{a666}'), ('\u{a668}', '\u{a668}'), ('\u{a66a}', '\u{a66a}'),
-        ('\u{a66c}', '\u{a66c}'), ('\u{a680}', '\u{a680}'), ('\u{a682}',
-        '\u{a682}'), ('\u{a684}', '\u{a684}'), ('\u{a686}', '\u{a686}'),
-        ('\u{a688}', '\u{a688}'), ('\u{a68a}', '\u{a68a}'), ('\u{a68c}',
-        '\u{a68c}'), ('\u{a68e}', '\u{a68e}'), ('\u{a690}', '\u{a690}'),
-        ('\u{a692}', '\u{a692}'), ('\u{a694}', '\u{a694}'), ('\u{a696}',
-        '\u{a696}'), ('\u{a698}', '\u{a698}'), ('\u{a69a}', '\u{a69a}'),
-        ('\u{a722}', '\u{a722}'), ('\u{a724}', '\u{a724}'), ('\u{a726}',
-        '\u{a726}'), ('\u{a728}', '\u{a728}'), ('\u{a72a}', '\u{a72a}'),
-        ('\u{a72c}', '\u{a72c}'), ('\u{a72e}', '\u{a72e}'), ('\u{a732}',
-        '\u{a732}'), ('\u{a734}', '\u{a734}'), ('\u{a736}', '\u{a736}'),
-        ('\u{a738}', '\u{a738}'), ('\u{a73a}', '\u{a73a}'), ('\u{a73c}',
-        '\u{a73c}'), ('\u{a73e}', '\u{a73e}'), ('\u{a740}', '\u{a740}'),
-        ('\u{a742}', '\u{a742}'), ('\u{a744}', '\u{a744}'), ('\u{a746}',
-        '\u{a746}'), ('\u{a748}', '\u{a748}'), ('\u{a74a}', '\u{a74a}'),
-        ('\u{a74c}', '\u{a74c}'), ('\u{a74e}', '\u{a74e}'), ('\u{a750}',
-        '\u{a750}'), ('\u{a752}', '\u{a752}'), ('\u{a754}', '\u{a754}'),
-        ('\u{a756}', '\u{a756}'), ('\u{a758}', '\u{a758}'), ('\u{a75a}',
-        '\u{a75a}'), ('\u{a75c}', '\u{a75c}'), ('\u{a75e}', '\u{a75e}'),
-        ('\u{a760}', '\u{a760}'), ('\u{a762}', '\u{a762}'), ('\u{a764}',
-        '\u{a764}'), ('\u{a766}', '\u{a766}'), ('\u{a768}', '\u{a768}'),
-        ('\u{a76a}', '\u{a76a}'), ('\u{a76c}', '\u{a76c}'), ('\u{a76e}',
-        '\u{a76e}'), ('\u{a779}', '\u{a779}'), ('\u{a77b}', '\u{a77b}'),
-        ('\u{a77d}', '\u{a77e}'), ('\u{a780}', '\u{a780}'), ('\u{a782}',
-        '\u{a782}'), ('\u{a784}', '\u{a784}'), ('\u{a786}', '\u{a786}'),
-        ('\u{a78b}', '\u{a78b}'), ('\u{a78d}', '\u{a78d}'), ('\u{a790}',
-        '\u{a790}'), ('\u{a792}', '\u{a792}'), ('\u{a796}', '\u{a796}'),
-        ('\u{a798}', '\u{a798}'), ('\u{a79a}', '\u{a79a}'), ('\u{a79c}',
-        '\u{a79c}'), ('\u{a79e}', '\u{a79e}'), ('\u{a7a0}', '\u{a7a0}'),
-        ('\u{a7a2}', '\u{a7a2}'), ('\u{a7a4}', '\u{a7a4}'), ('\u{a7a6}',
-        '\u{a7a6}'), ('\u{a7a8}', '\u{a7a8}'), ('\u{a7aa}', '\u{a7ad}'),
-        ('\u{a7b0}', '\u{a7b4}'), ('\u{a7b6}', '\u{a7b6}'), ('\u{ff21}',
-        '\u{ff3a}'), ('\u{10400}', '\u{10427}'), ('\u{10c80}', '\u{10cb2}'),
-        ('\u{118a0}', '\u{118bf}'), ('\u{1d400}', '\u{1d419}'), ('\u{1d434}',
-        '\u{1d44d}'), ('\u{1d468}', '\u{1d481}'), ('\u{1d49c}', '\u{1d49c}'),
-        ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}',
-        '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b5}'),
-        ('\u{1d4d0}', '\u{1d4e9}'), ('\u{1d504}', '\u{1d505}'), ('\u{1d507}',
-        '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'),
-        ('\u{1d538}', '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}',
-        '\u{1d544}'), ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'),
-        ('\u{1d56c}', '\u{1d585}'), ('\u{1d5a0}', '\u{1d5b9}'), ('\u{1d5d4}',
-        '\u{1d5ed}'), ('\u{1d608}', '\u{1d621}'), ('\u{1d63c}', '\u{1d655}'),
-        ('\u{1d670}', '\u{1d689}'), ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6e2}',
-        '\u{1d6fa}'), ('\u{1d71c}', '\u{1d734}'), ('\u{1d756}', '\u{1d76e}'),
-        ('\u{1d790}', '\u{1d7a8}'), ('\u{1d7ca}', '\u{1d7ca}'), ('\u{1f130}',
-        '\u{1f149}'), ('\u{1f150}', '\u{1f169}'), ('\u{1f170}', '\u{1f189}')
-    ];
+/// Build a Unicode HIR class from a sequence of Unicode scalar value ranges.
+pub fn hir_class(ranges: &[(char, char)]) -> hir::ClassUnicode {
+    let hir_ranges: Vec<hir::ClassUnicodeRange> = ranges
+        .iter()
+        .map(|&(s, e)| hir::ClassUnicodeRange::new(s, e))
+        .collect();
+    hir::ClassUnicode::new(hir_ranges)
+}
+
+fn canonical_prop(normalized_name: &str) -> Option<&'static str> {
+    ucd_util::canonical_property_name(PROPERTY_NAMES, normalized_name)
+}
+
+fn canonical_gencat(normalized_value: &str) -> Option<&'static str> {
+    match normalized_value {
+        "any" => Some("Any"),
+        "assigned" => Some("Assigned"),
+        "ascii" => Some("ASCII"),
+        _ => {
+            let gencats = property_values("General_Category").unwrap();
+            canonical_value(gencats, normalized_value)
+        }
+    }
+}
 
-    pub const XID_Continue_table: &'static [(char, char)] = &[
-        ('\u{30}', '\u{39}'), ('\u{41}', '\u{5a}'), ('\u{5f}', '\u{5f}'),
-        ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'), ('\u{b5}', '\u{b5}'),
-        ('\u{b7}', '\u{b7}'), ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'),
-        ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'),
-        ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'),
-        ('\u{300}', '\u{374}'), ('\u{376}', '\u{377}'), ('\u{37b}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{38a}'), ('\u{38c}', '\u{38c}'),
-        ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'), ('\u{3f7}', '\u{481}'),
-        ('\u{483}', '\u{487}'), ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'),
-        ('\u{559}', '\u{559}'), ('\u{561}', '\u{587}'), ('\u{591}', '\u{5bd}'),
-        ('\u{5bf}', '\u{5bf}'), ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'),
-        ('\u{5c7}', '\u{5c7}'), ('\u{5d0}', '\u{5ea}'), ('\u{5f0}', '\u{5f2}'),
-        ('\u{610}', '\u{61a}'), ('\u{620}', '\u{669}'), ('\u{66e}', '\u{6d3}'),
-        ('\u{6d5}', '\u{6dc}'), ('\u{6df}', '\u{6e8}'), ('\u{6ea}', '\u{6fc}'),
-        ('\u{6ff}', '\u{6ff}'), ('\u{710}', '\u{74a}'), ('\u{74d}', '\u{7b1}'),
-        ('\u{7c0}', '\u{7f5}'), ('\u{7fa}', '\u{7fa}'), ('\u{800}', '\u{82d}'),
-        ('\u{840}', '\u{85b}'), ('\u{8a0}', '\u{8b4}'), ('\u{8e3}', '\u{963}'),
-        ('\u{966}', '\u{96f}'), ('\u{971}', '\u{983}'), ('\u{985}', '\u{98c}'),
-        ('\u{98f}', '\u{990}'), ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'),
-        ('\u{9b2}', '\u{9b2}'), ('\u{9b6}', '\u{9b9}'), ('\u{9bc}', '\u{9c4}'),
-        ('\u{9c7}', '\u{9c8}'), ('\u{9cb}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'),
-        ('\u{9dc}', '\u{9dd}'), ('\u{9df}', '\u{9e3}'), ('\u{9e6}', '\u{9f1}'),
-        ('\u{a01}', '\u{a03}'), ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'),
-        ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'),
-        ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'), ('\u{a3c}', '\u{a3c}'),
-        ('\u{a3e}', '\u{a42}'), ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'),
-        ('\u{a51}', '\u{a51}'), ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'),
-        ('\u{a66}', '\u{a75}'), ('\u{a81}', '\u{a83}'), ('\u{a85}', '\u{a8d}'),
-        ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'), ('\u{aaa}', '\u{ab0}'),
-        ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'), ('\u{abc}', '\u{ac5}'),
-        ('\u{ac7}', '\u{ac9}'), ('\u{acb}', '\u{acd}'), ('\u{ad0}', '\u{ad0}'),
-        ('\u{ae0}', '\u{ae3}'), ('\u{ae6}', '\u{aef}'), ('\u{af9}', '\u{af9}'),
-        ('\u{b01}', '\u{b03}'), ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'),
-        ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'),
-        ('\u{b35}', '\u{b39}'), ('\u{b3c}', '\u{b44}'), ('\u{b47}', '\u{b48}'),
-        ('\u{b4b}', '\u{b4d}'), ('\u{b56}', '\u{b57}'), ('\u{b5c}', '\u{b5d}'),
-        ('\u{b5f}', '\u{b63}'), ('\u{b66}', '\u{b6f}'), ('\u{b71}', '\u{b71}'),
-        ('\u{b82}', '\u{b83}'), ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'),
-        ('\u{b92}', '\u{b95}'), ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'),
-        ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'),
-        ('\u{bae}', '\u{bb9}'), ('\u{bbe}', '\u{bc2}'), ('\u{bc6}', '\u{bc8}'),
-        ('\u{bca}', '\u{bcd}'), ('\u{bd0}', '\u{bd0}'), ('\u{bd7}', '\u{bd7}'),
-        ('\u{be6}', '\u{bef}'), ('\u{c00}', '\u{c03}'), ('\u{c05}', '\u{c0c}'),
-        ('\u{c0e}', '\u{c10}'), ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'),
-        ('\u{c3d}', '\u{c44}'), ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'),
-        ('\u{c55}', '\u{c56}'), ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c63}'),
-        ('\u{c66}', '\u{c6f}'), ('\u{c81}', '\u{c83}'), ('\u{c85}', '\u{c8c}'),
-        ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'),
-        ('\u{cb5}', '\u{cb9}'), ('\u{cbc}', '\u{cc4}'), ('\u{cc6}', '\u{cc8}'),
-        ('\u{cca}', '\u{ccd}'), ('\u{cd5}', '\u{cd6}'), ('\u{cde}', '\u{cde}'),
-        ('\u{ce0}', '\u{ce3}'), ('\u{ce6}', '\u{cef}'), ('\u{cf1}', '\u{cf2}'),
-        ('\u{d01}', '\u{d03}'), ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'),
-        ('\u{d12}', '\u{d3a}'), ('\u{d3d}', '\u{d44}'), ('\u{d46}', '\u{d48}'),
-        ('\u{d4a}', '\u{d4e}'), ('\u{d57}', '\u{d57}'), ('\u{d5f}', '\u{d63}'),
-        ('\u{d66}', '\u{d6f}'), ('\u{d7a}', '\u{d7f}'), ('\u{d82}', '\u{d83}'),
-        ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'),
-        ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'), ('\u{dca}', '\u{dca}'),
-        ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'), ('\u{dd8}', '\u{ddf}'),
-        ('\u{de6}', '\u{def}'), ('\u{df2}', '\u{df3}'), ('\u{e01}', '\u{e3a}'),
-        ('\u{e40}', '\u{e4e}'), ('\u{e50}', '\u{e59}'), ('\u{e81}', '\u{e82}'),
-        ('\u{e84}', '\u{e84}'), ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'),
-        ('\u{e8d}', '\u{e8d}'), ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'),
-        ('\u{ea1}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'),
-        ('\u{eaa}', '\u{eab}'), ('\u{ead}', '\u{eb9}'), ('\u{ebb}', '\u{ebd}'),
-        ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'), ('\u{ec8}', '\u{ecd}'),
-        ('\u{ed0}', '\u{ed9}'), ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'),
-        ('\u{f18}', '\u{f19}'), ('\u{f20}', '\u{f29}'), ('\u{f35}', '\u{f35}'),
-        ('\u{f37}', '\u{f37}'), ('\u{f39}', '\u{f39}'), ('\u{f3e}', '\u{f47}'),
-        ('\u{f49}', '\u{f6c}'), ('\u{f71}', '\u{f84}'), ('\u{f86}', '\u{f97}'),
-        ('\u{f99}', '\u{fbc}'), ('\u{fc6}', '\u{fc6}'), ('\u{1000}',
-        '\u{1049}'), ('\u{1050}', '\u{109d}'), ('\u{10a0}', '\u{10c5}'),
-        ('\u{10c7}', '\u{10c7}'), ('\u{10cd}', '\u{10cd}'), ('\u{10d0}',
-        '\u{10fa}'), ('\u{10fc}', '\u{1248}'), ('\u{124a}', '\u{124d}'),
-        ('\u{1250}', '\u{1256}'), ('\u{1258}', '\u{1258}'), ('\u{125a}',
-        '\u{125d}'), ('\u{1260}', '\u{1288}'), ('\u{128a}', '\u{128d}'),
-        ('\u{1290}', '\u{12b0}'), ('\u{12b2}', '\u{12b5}'), ('\u{12b8}',
-        '\u{12be}'), ('\u{12c0}', '\u{12c0}'), ('\u{12c2}', '\u{12c5}'),
-        ('\u{12c8}', '\u{12d6}'), ('\u{12d8}', '\u{1310}'), ('\u{1312}',
-        '\u{1315}'), ('\u{1318}', '\u{135a}'), ('\u{135d}', '\u{135f}'),
-        ('\u{1369}', '\u{1371}'), ('\u{1380}', '\u{138f}'), ('\u{13a0}',
-        '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{1401}', '\u{166c}'),
-        ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'), ('\u{16a0}',
-        '\u{16ea}'), ('\u{16ee}', '\u{16f8}'), ('\u{1700}', '\u{170c}'),
-        ('\u{170e}', '\u{1714}'), ('\u{1720}', '\u{1734}'), ('\u{1740}',
-        '\u{1753}'), ('\u{1760}', '\u{176c}'), ('\u{176e}', '\u{1770}'),
-        ('\u{1772}', '\u{1773}'), ('\u{1780}', '\u{17d3}'), ('\u{17d7}',
-        '\u{17d7}'), ('\u{17dc}', '\u{17dd}'), ('\u{17e0}', '\u{17e9}'),
-        ('\u{180b}', '\u{180d}'), ('\u{1810}', '\u{1819}'), ('\u{1820}',
-        '\u{1877}'), ('\u{1880}', '\u{18aa}'), ('\u{18b0}', '\u{18f5}'),
-        ('\u{1900}', '\u{191e}'), ('\u{1920}', '\u{192b}'), ('\u{1930}',
-        '\u{193b}'), ('\u{1946}', '\u{196d}'), ('\u{1970}', '\u{1974}'),
-        ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{19d0}',
-        '\u{19da}'), ('\u{1a00}', '\u{1a1b}'), ('\u{1a20}', '\u{1a5e}'),
-        ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a89}'), ('\u{1a90}',
-        '\u{1a99}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1ab0}', '\u{1abd}'),
-        ('\u{1b00}', '\u{1b4b}'), ('\u{1b50}', '\u{1b59}'), ('\u{1b6b}',
-        '\u{1b73}'), ('\u{1b80}', '\u{1bf3}'), ('\u{1c00}', '\u{1c37}'),
-        ('\u{1c40}', '\u{1c49}'), ('\u{1c4d}', '\u{1c7d}'), ('\u{1cd0}',
-        '\u{1cd2}'), ('\u{1cd4}', '\u{1cf6}'), ('\u{1cf8}', '\u{1cf9}'),
-        ('\u{1d00}', '\u{1df5}'), ('\u{1dfc}', '\u{1f15}'), ('\u{1f18}',
-        '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}', '\u{1f4d}'),
-        ('\u{1f50}', '\u{1f57}'), ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}',
-        '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}', '\u{1f7d}'),
-        ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'), ('\u{1fbe}',
-        '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}', '\u{1fcc}'),
-        ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'), ('\u{1fe0}',
-        '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}', '\u{1ffc}'),
-        ('\u{203f}', '\u{2040}'), ('\u{2054}', '\u{2054}'), ('\u{2071}',
-        '\u{2071}'), ('\u{207f}', '\u{207f}'), ('\u{2090}', '\u{209c}'),
-        ('\u{20d0}', '\u{20dc}'), ('\u{20e1}', '\u{20e1}'), ('\u{20e5}',
-        '\u{20f0}'), ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'),
-        ('\u{210a}', '\u{2113}'), ('\u{2115}', '\u{2115}'), ('\u{2118}',
-        '\u{211d}'), ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'),
-        ('\u{2128}', '\u{2128}'), ('\u{212a}', '\u{2139}'), ('\u{213c}',
-        '\u{213f}'), ('\u{2145}', '\u{2149}'), ('\u{214e}', '\u{214e}'),
-        ('\u{2160}', '\u{2188}'), ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}',
-        '\u{2c5e}'), ('\u{2c60}', '\u{2ce4}'), ('\u{2ceb}', '\u{2cf3}'),
-        ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}',
-        '\u{2d2d}'), ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'),
-        ('\u{2d7f}', '\u{2d96}'), ('\u{2da0}', '\u{2da6}'), ('\u{2da8}',
-        '\u{2dae}'), ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'),
-        ('\u{2dc0}', '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}',
-        '\u{2dd6}'), ('\u{2dd8}', '\u{2dde}'), ('\u{2de0}', '\u{2dff}'),
-        ('\u{3005}', '\u{3007}'), ('\u{3021}', '\u{302f}'), ('\u{3031}',
-        '\u{3035}'), ('\u{3038}', '\u{303c}'), ('\u{3041}', '\u{3096}'),
-        ('\u{3099}', '\u{309a}'), ('\u{309d}', '\u{309f}'), ('\u{30a1}',
-        '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312d}'),
-        ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31ba}'), ('\u{31f0}',
-        '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}', '\u{9fd5}'),
-        ('\u{a000}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}',
-        '\u{a60c}'), ('\u{a610}', '\u{a62b}'), ('\u{a640}', '\u{a66f}'),
-        ('\u{a674}', '\u{a67d}'), ('\u{a67f}', '\u{a6f1}'), ('\u{a717}',
-        '\u{a71f}'), ('\u{a722}', '\u{a788}'), ('\u{a78b}', '\u{a7ad}'),
-        ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}', '\u{a827}'), ('\u{a840}',
-        '\u{a873}'), ('\u{a880}', '\u{a8c4}'), ('\u{a8d0}', '\u{a8d9}'),
-        ('\u{a8e0}', '\u{a8f7}'), ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}',
-        '\u{a8fd}'), ('\u{a900}', '\u{a92d}'), ('\u{a930}', '\u{a953}'),
-        ('\u{a960}', '\u{a97c}'), ('\u{a980}', '\u{a9c0}'), ('\u{a9cf}',
-        '\u{a9d9}'), ('\u{a9e0}', '\u{a9fe}'), ('\u{aa00}', '\u{aa36}'),
-        ('\u{aa40}', '\u{aa4d}'), ('\u{aa50}', '\u{aa59}'), ('\u{aa60}',
-        '\u{aa76}'), ('\u{aa7a}', '\u{aac2}'), ('\u{aadb}', '\u{aadd}'),
-        ('\u{aae0}', '\u{aaef}'), ('\u{aaf2}', '\u{aaf6}'), ('\u{ab01}',
-        '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'),
-        ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}',
-        '\u{ab5a}'), ('\u{ab5c}', '\u{ab65}'), ('\u{ab70}', '\u{abea}'),
-        ('\u{abec}', '\u{abed}'), ('\u{abf0}', '\u{abf9}'), ('\u{ac00}',
-        '\u{d7a3}'), ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'),
-        ('\u{f900}', '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'), ('\u{fb00}',
-        '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), ('\u{fb1d}', '\u{fb28}'),
-        ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}',
-        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'),
-        ('\u{fb46}', '\u{fbb1}'), ('\u{fbd3}', '\u{fc5d}'), ('\u{fc64}',
-        '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'),
-        ('\u{fdf0}', '\u{fdf9}'), ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}',
-        '\u{fe2f}'), ('\u{fe33}', '\u{fe34}'), ('\u{fe4d}', '\u{fe4f}'),
-        ('\u{fe71}', '\u{fe71}'), ('\u{fe73}', '\u{fe73}'), ('\u{fe77}',
-        '\u{fe77}'), ('\u{fe79}', '\u{fe79}'), ('\u{fe7b}', '\u{fe7b}'),
-        ('\u{fe7d}', '\u{fe7d}'), ('\u{fe7f}', '\u{fefc}'), ('\u{ff10}',
-        '\u{ff19}'), ('\u{ff21}', '\u{ff3a}'), ('\u{ff3f}', '\u{ff3f}'),
-        ('\u{ff41}', '\u{ff5a}'), ('\u{ff66}', '\u{ffbe}'), ('\u{ffc2}',
-        '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'),
-        ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
-        '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'),
-        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}',
-        '\u{100fa}'), ('\u{10140}', '\u{10174}'), ('\u{101fd}', '\u{101fd}'),
-        ('\u{10280}', '\u{1029c}'), ('\u{102a0}', '\u{102d0}'), ('\u{102e0}',
-        '\u{102e0}'), ('\u{10300}', '\u{1031f}'), ('\u{10330}', '\u{1034a}'),
-        ('\u{10350}', '\u{1037a}'), ('\u{10380}', '\u{1039d}'), ('\u{103a0}',
-        '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{103d1}', '\u{103d5}'),
-        ('\u{10400}', '\u{1049d}'), ('\u{104a0}', '\u{104a9}'), ('\u{10500}',
-        '\u{10527}'), ('\u{10530}', '\u{10563}'), ('\u{10600}', '\u{10736}'),
-        ('\u{10740}', '\u{10755}'), ('\u{10760}', '\u{10767}'), ('\u{10800}',
-        '\u{10805}'), ('\u{10808}', '\u{10808}'), ('\u{1080a}', '\u{10835}'),
-        ('\u{10837}', '\u{10838}'), ('\u{1083c}', '\u{1083c}'), ('\u{1083f}',
-        '\u{10855}'), ('\u{10860}', '\u{10876}'), ('\u{10880}', '\u{1089e}'),
-        ('\u{108e0}', '\u{108f2}'), ('\u{108f4}', '\u{108f5}'), ('\u{10900}',
-        '\u{10915}'), ('\u{10920}', '\u{10939}'), ('\u{10980}', '\u{109b7}'),
-        ('\u{109be}', '\u{109bf}'), ('\u{10a00}', '\u{10a03}'), ('\u{10a05}',
-        '\u{10a06}'), ('\u{10a0c}', '\u{10a13}'), ('\u{10a15}', '\u{10a17}'),
-        ('\u{10a19}', '\u{10a33}'), ('\u{10a38}', '\u{10a3a}'), ('\u{10a3f}',
-        '\u{10a3f}'), ('\u{10a60}', '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'),
-        ('\u{10ac0}', '\u{10ac7}'), ('\u{10ac9}', '\u{10ae6}'), ('\u{10b00}',
-        '\u{10b35}'), ('\u{10b40}', '\u{10b55}'), ('\u{10b60}', '\u{10b72}'),
-        ('\u{10b80}', '\u{10b91}'), ('\u{10c00}', '\u{10c48}'), ('\u{10c80}',
-        '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'), ('\u{11000}', '\u{11046}'),
-        ('\u{11066}', '\u{1106f}'), ('\u{1107f}', '\u{110ba}'), ('\u{110d0}',
-        '\u{110e8}'), ('\u{110f0}', '\u{110f9}'), ('\u{11100}', '\u{11134}'),
-        ('\u{11136}', '\u{1113f}'), ('\u{11150}', '\u{11173}'), ('\u{11176}',
-        '\u{11176}'), ('\u{11180}', '\u{111c4}'), ('\u{111ca}', '\u{111cc}'),
-        ('\u{111d0}', '\u{111da}'), ('\u{111dc}', '\u{111dc}'), ('\u{11200}',
-        '\u{11211}'), ('\u{11213}', '\u{11237}'), ('\u{11280}', '\u{11286}'),
-        ('\u{11288}', '\u{11288}'), ('\u{1128a}', '\u{1128d}'), ('\u{1128f}',
-        '\u{1129d}'), ('\u{1129f}', '\u{112a8}'), ('\u{112b0}', '\u{112ea}'),
-        ('\u{112f0}', '\u{112f9}'), ('\u{11300}', '\u{11303}'), ('\u{11305}',
-        '\u{1130c}'), ('\u{1130f}', '\u{11310}'), ('\u{11313}', '\u{11328}'),
-        ('\u{1132a}', '\u{11330}'), ('\u{11332}', '\u{11333}'), ('\u{11335}',
-        '\u{11339}'), ('\u{1133c}', '\u{11344}'), ('\u{11347}', '\u{11348}'),
-        ('\u{1134b}', '\u{1134d}'), ('\u{11350}', '\u{11350}'), ('\u{11357}',
-        '\u{11357}'), ('\u{1135d}', '\u{11363}'), ('\u{11366}', '\u{1136c}'),
-        ('\u{11370}', '\u{11374}'), ('\u{11480}', '\u{114c5}'), ('\u{114c7}',
-        '\u{114c7}'), ('\u{114d0}', '\u{114d9}'), ('\u{11580}', '\u{115b5}'),
-        ('\u{115b8}', '\u{115c0}'), ('\u{115d8}', '\u{115dd}'), ('\u{11600}',
-        '\u{11640}'), ('\u{11644}', '\u{11644}'), ('\u{11650}', '\u{11659}'),
-        ('\u{11680}', '\u{116b7}'), ('\u{116c0}', '\u{116c9}'), ('\u{11700}',
-        '\u{11719}'), ('\u{1171d}', '\u{1172b}'), ('\u{11730}', '\u{11739}'),
-        ('\u{118a0}', '\u{118e9}'), ('\u{118ff}', '\u{118ff}'), ('\u{11ac0}',
-        '\u{11af8}'), ('\u{12000}', '\u{12399}'), ('\u{12400}', '\u{1246e}'),
-        ('\u{12480}', '\u{12543}'), ('\u{13000}', '\u{1342e}'), ('\u{14400}',
-        '\u{14646}'), ('\u{16800}', '\u{16a38}'), ('\u{16a40}', '\u{16a5e}'),
-        ('\u{16a60}', '\u{16a69}'), ('\u{16ad0}', '\u{16aed}'), ('\u{16af0}',
-        '\u{16af4}'), ('\u{16b00}', '\u{16b36}'), ('\u{16b40}', '\u{16b43}'),
-        ('\u{16b50}', '\u{16b59}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}',
-        '\u{16b8f}'), ('\u{16f00}', '\u{16f44}'), ('\u{16f50}', '\u{16f7e}'),
-        ('\u{16f8f}', '\u{16f9f}'), ('\u{1b000}', '\u{1b001}'), ('\u{1bc00}',
-        '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}', '\u{1bc88}'),
-        ('\u{1bc90}', '\u{1bc99}'), ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1d165}',
-        '\u{1d169}'), ('\u{1d16d}', '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'),
-        ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}', '\u{1d1ad}'), ('\u{1d242}',
-        '\u{1d244}'), ('\u{1d400}', '\u{1d454}'), ('\u{1d456}', '\u{1d49c}'),
-        ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}',
-        '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b9}'),
-        ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}',
-        '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'),
-        ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}', '\u{1d539}'), ('\u{1d53b}',
-        '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'), ('\u{1d546}', '\u{1d546}'),
-        ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}', '\u{1d6a5}'), ('\u{1d6a8}',
-        '\u{1d6c0}'), ('\u{1d6c2}', '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6fa}'),
-        ('\u{1d6fc}', '\u{1d714}'), ('\u{1d716}', '\u{1d734}'), ('\u{1d736}',
-        '\u{1d74e}'), ('\u{1d750}', '\u{1d76e}'), ('\u{1d770}', '\u{1d788}'),
-        ('\u{1d78a}', '\u{1d7a8}'), ('\u{1d7aa}', '\u{1d7c2}'), ('\u{1d7c4}',
-        '\u{1d7cb}'), ('\u{1d7ce}', '\u{1d7ff}'), ('\u{1da00}', '\u{1da36}'),
-        ('\u{1da3b}', '\u{1da6c}'), ('\u{1da75}', '\u{1da75}'), ('\u{1da84}',
-        '\u{1da84}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'),
-        ('\u{1e800}', '\u{1e8c4}'), ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1ee00}',
-        '\u{1ee03}'), ('\u{1ee05}', '\u{1ee1f}'), ('\u{1ee21}', '\u{1ee22}'),
-        ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}', '\u{1ee27}'), ('\u{1ee29}',
-        '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'), ('\u{1ee39}', '\u{1ee39}'),
-        ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}', '\u{1ee42}'), ('\u{1ee47}',
-        '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'),
-        ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}', '\u{1ee52}'), ('\u{1ee54}',
-        '\u{1ee54}'), ('\u{1ee57}', '\u{1ee57}'), ('\u{1ee59}', '\u{1ee59}'),
-        ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}', '\u{1ee5d}'), ('\u{1ee5f}',
-        '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'), ('\u{1ee64}', '\u{1ee64}'),
-        ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}', '\u{1ee72}'), ('\u{1ee74}',
-        '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'),
-        ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}', '\u{1ee9b}'), ('\u{1eea1}',
-        '\u{1eea3}'), ('\u{1eea5}', '\u{1eea9}'), ('\u{1eeab}', '\u{1eebb}'),
-        ('\u{20000}', '\u{2a6d6}'), ('\u{2a700}', '\u{2b734}'), ('\u{2b740}',
-        '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'), ('\u{2f800}', '\u{2fa1d}'),
-        ('\u{e0100}', '\u{e01ef}')
-    ];
+fn canonical_script(normalized_value: &str) -> Option<&'static str> {
+    let scripts = property_values("Script").unwrap();
+    canonical_value(scripts, normalized_value)
+}
+
+fn canonical_value(
+    vals: PropertyValues,
+    normalized_value: &str,
+) -> Option<&'static str> {
+    ucd_util::canonical_property_value(vals, normalized_value)
+}
 
-    pub const XID_Start_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'),
-        ('\u{b5}', '\u{b5}'), ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'),
-        ('\u{d8}', '\u{f6}'), ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'),
-        ('\u{2e0}', '\u{2e4}'), ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'),
-        ('\u{370}', '\u{374}'), ('\u{376}', '\u{377}'), ('\u{37b}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'),
-        ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'),
-        ('\u{3f7}', '\u{481}'), ('\u{48a}', '\u{52f}'), ('\u{531}', '\u{556}'),
-        ('\u{559}', '\u{559}'), ('\u{561}', '\u{587}'), ('\u{5d0}', '\u{5ea}'),
-        ('\u{5f0}', '\u{5f2}'), ('\u{620}', '\u{64a}'), ('\u{66e}', '\u{66f}'),
-        ('\u{671}', '\u{6d3}'), ('\u{6d5}', '\u{6d5}'), ('\u{6e5}', '\u{6e6}'),
-        ('\u{6ee}', '\u{6ef}'), ('\u{6fa}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'),
-        ('\u{710}', '\u{710}'), ('\u{712}', '\u{72f}'), ('\u{74d}', '\u{7a5}'),
-        ('\u{7b1}', '\u{7b1}'), ('\u{7ca}', '\u{7ea}'), ('\u{7f4}', '\u{7f5}'),
-        ('\u{7fa}', '\u{7fa}'), ('\u{800}', '\u{815}'), ('\u{81a}', '\u{81a}'),
-        ('\u{824}', '\u{824}'), ('\u{828}', '\u{828}'), ('\u{840}', '\u{858}'),
-        ('\u{8a0}', '\u{8b4}'), ('\u{904}', '\u{939}'), ('\u{93d}', '\u{93d}'),
-        ('\u{950}', '\u{950}'), ('\u{958}', '\u{961}'), ('\u{971}', '\u{980}'),
-        ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'), ('\u{993}', '\u{9a8}'),
-        ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'), ('\u{9b6}', '\u{9b9}'),
-        ('\u{9bd}', '\u{9bd}'), ('\u{9ce}', '\u{9ce}'), ('\u{9dc}', '\u{9dd}'),
-        ('\u{9df}', '\u{9e1}'), ('\u{9f0}', '\u{9f1}'), ('\u{a05}', '\u{a0a}'),
-        ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'),
-        ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'),
-        ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'), ('\u{a72}', '\u{a74}'),
-        ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'), ('\u{a93}', '\u{aa8}'),
-        ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'), ('\u{ab5}', '\u{ab9}'),
-        ('\u{abd}', '\u{abd}'), ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae1}'),
-        ('\u{af9}', '\u{af9}'), ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'),
-        ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'),
-        ('\u{b35}', '\u{b39}'), ('\u{b3d}', '\u{b3d}'), ('\u{b5c}', '\u{b5d}'),
-        ('\u{b5f}', '\u{b61}'), ('\u{b71}', '\u{b71}'), ('\u{b83}', '\u{b83}'),
-        ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'), ('\u{b92}', '\u{b95}'),
-        ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'),
-        ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'),
-        ('\u{bd0}', '\u{bd0}'), ('\u{c05}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'),
-        ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c3d}'),
-        ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c61}'), ('\u{c85}', '\u{c8c}'),
-        ('\u{c8e}', '\u{c90}'), ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'),
-        ('\u{cb5}', '\u{cb9}'), ('\u{cbd}', '\u{cbd}'), ('\u{cde}', '\u{cde}'),
-        ('\u{ce0}', '\u{ce1}'), ('\u{cf1}', '\u{cf2}'), ('\u{d05}', '\u{d0c}'),
-        ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d3a}'), ('\u{d3d}', '\u{d3d}'),
-        ('\u{d4e}', '\u{d4e}'), ('\u{d5f}', '\u{d61}'), ('\u{d7a}', '\u{d7f}'),
-        ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'),
-        ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'), ('\u{e01}', '\u{e30}'),
-        ('\u{e32}', '\u{e32}'), ('\u{e40}', '\u{e46}'), ('\u{e81}', '\u{e82}'),
-        ('\u{e84}', '\u{e84}'), ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'),
-        ('\u{e8d}', '\u{e8d}'), ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'),
-        ('\u{ea1}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'),
-        ('\u{eaa}', '\u{eab}'), ('\u{ead}', '\u{eb0}'), ('\u{eb2}', '\u{eb2}'),
-        ('\u{ebd}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'),
-        ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'), ('\u{f40}', '\u{f47}'),
-        ('\u{f49}', '\u{f6c}'), ('\u{f88}', '\u{f8c}'), ('\u{1000}',
-        '\u{102a}'), ('\u{103f}', '\u{103f}'), ('\u{1050}', '\u{1055}'),
-        ('\u{105a}', '\u{105d}'), ('\u{1061}', '\u{1061}'), ('\u{1065}',
-        '\u{1066}'), ('\u{106e}', '\u{1070}'), ('\u{1075}', '\u{1081}'),
-        ('\u{108e}', '\u{108e}'), ('\u{10a0}', '\u{10c5}'), ('\u{10c7}',
-        '\u{10c7}'), ('\u{10cd}', '\u{10cd}'), ('\u{10d0}', '\u{10fa}'),
-        ('\u{10fc}', '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}',
-        '\u{1256}'), ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'),
-        ('\u{1260}', '\u{1288}'), ('\u{128a}', '\u{128d}'), ('\u{1290}',
-        '\u{12b0}'), ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'),
-        ('\u{12c0}', '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}',
-        '\u{12d6}'), ('\u{12d8}', '\u{1310}'), ('\u{1312}', '\u{1315}'),
-        ('\u{1318}', '\u{135a}'), ('\u{1380}', '\u{138f}'), ('\u{13a0}',
-        '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{1401}', '\u{166c}'),
-        ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'), ('\u{16a0}',
-        '\u{16ea}'), ('\u{16ee}', '\u{16f8}'), ('\u{1700}', '\u{170c}'),
-        ('\u{170e}', '\u{1711}'), ('\u{1720}', '\u{1731}'), ('\u{1740}',
-        '\u{1751}'), ('\u{1760}', '\u{176c}'), ('\u{176e}', '\u{1770}'),
-        ('\u{1780}', '\u{17b3}'), ('\u{17d7}', '\u{17d7}'), ('\u{17dc}',
-        '\u{17dc}'), ('\u{1820}', '\u{1877}'), ('\u{1880}', '\u{18a8}'),
-        ('\u{18aa}', '\u{18aa}'), ('\u{18b0}', '\u{18f5}'), ('\u{1900}',
-        '\u{191e}'), ('\u{1950}', '\u{196d}'), ('\u{1970}', '\u{1974}'),
-        ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{1a00}',
-        '\u{1a16}'), ('\u{1a20}', '\u{1a54}'), ('\u{1aa7}', '\u{1aa7}'),
-        ('\u{1b05}', '\u{1b33}'), ('\u{1b45}', '\u{1b4b}'), ('\u{1b83}',
-        '\u{1ba0}'), ('\u{1bae}', '\u{1baf}'), ('\u{1bba}', '\u{1be5}'),
-        ('\u{1c00}', '\u{1c23}'), ('\u{1c4d}', '\u{1c4f}'), ('\u{1c5a}',
-        '\u{1c7d}'), ('\u{1ce9}', '\u{1cec}'), ('\u{1cee}', '\u{1cf1}'),
-        ('\u{1cf5}', '\u{1cf6}'), ('\u{1d00}', '\u{1dbf}'), ('\u{1e00}',
-        '\u{1f15}'), ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'),
-        ('\u{1f48}', '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f59}',
-        '\u{1f59}'), ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'),
-        ('\u{1f5f}', '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}',
-        '\u{1fbc}'), ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'),
-        ('\u{1fc6}', '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}',
-        '\u{1fdb}'), ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'),
-        ('\u{1ff6}', '\u{1ffc}'), ('\u{2071}', '\u{2071}'), ('\u{207f}',
-        '\u{207f}'), ('\u{2090}', '\u{209c}'), ('\u{2102}', '\u{2102}'),
-        ('\u{2107}', '\u{2107}'), ('\u{210a}', '\u{2113}'), ('\u{2115}',
-        '\u{2115}'), ('\u{2118}', '\u{211d}'), ('\u{2124}', '\u{2124}'),
-        ('\u{2126}', '\u{2126}'), ('\u{2128}', '\u{2128}'), ('\u{212a}',
-        '\u{2139}'), ('\u{213c}', '\u{213f}'), ('\u{2145}', '\u{2149}'),
-        ('\u{214e}', '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{2c00}',
-        '\u{2c2e}'), ('\u{2c30}', '\u{2c5e}'), ('\u{2c60}', '\u{2ce4}'),
-        ('\u{2ceb}', '\u{2cee}'), ('\u{2cf2}', '\u{2cf3}'), ('\u{2d00}',
-        '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'),
-        ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'), ('\u{2d80}',
-        '\u{2d96}'), ('\u{2da0}', '\u{2da6}'), ('\u{2da8}', '\u{2dae}'),
-        ('\u{2db0}', '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}',
-        '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'),
-        ('\u{2dd8}', '\u{2dde}'), ('\u{3005}', '\u{3007}'), ('\u{3021}',
-        '\u{3029}'), ('\u{3031}', '\u{3035}'), ('\u{3038}', '\u{303c}'),
-        ('\u{3041}', '\u{3096}'), ('\u{309d}', '\u{309f}'), ('\u{30a1}',
-        '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312d}'),
-        ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31ba}'), ('\u{31f0}',
-        '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}', '\u{9fd5}'),
-        ('\u{a000}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}',
-        '\u{a60c}'), ('\u{a610}', '\u{a61f}'), ('\u{a62a}', '\u{a62b}'),
-        ('\u{a640}', '\u{a66e}'), ('\u{a67f}', '\u{a69d}'), ('\u{a6a0}',
-        '\u{a6ef}'), ('\u{a717}', '\u{a71f}'), ('\u{a722}', '\u{a788}'),
-        ('\u{a78b}', '\u{a7ad}'), ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}',
-        '\u{a801}'), ('\u{a803}', '\u{a805}'), ('\u{a807}', '\u{a80a}'),
-        ('\u{a80c}', '\u{a822}'), ('\u{a840}', '\u{a873}'), ('\u{a882}',
-        '\u{a8b3}'), ('\u{a8f2}', '\u{a8f7}'), ('\u{a8fb}', '\u{a8fb}'),
-        ('\u{a8fd}', '\u{a8fd}'), ('\u{a90a}', '\u{a925}'), ('\u{a930}',
-        '\u{a946}'), ('\u{a960}', '\u{a97c}'), ('\u{a984}', '\u{a9b2}'),
-        ('\u{a9cf}', '\u{a9cf}'), ('\u{a9e0}', '\u{a9e4}'), ('\u{a9e6}',
-        '\u{a9ef}'), ('\u{a9fa}', '\u{a9fe}'), ('\u{aa00}', '\u{aa28}'),
-        ('\u{aa40}', '\u{aa42}'), ('\u{aa44}', '\u{aa4b}'), ('\u{aa60}',
-        '\u{aa76}'), ('\u{aa7a}', '\u{aa7a}'), ('\u{aa7e}', '\u{aaaf}'),
-        ('\u{aab1}', '\u{aab1}'), ('\u{aab5}', '\u{aab6}'), ('\u{aab9}',
-        '\u{aabd}'), ('\u{aac0}', '\u{aac0}'), ('\u{aac2}', '\u{aac2}'),
-        ('\u{aadb}', '\u{aadd}'), ('\u{aae0}', '\u{aaea}'), ('\u{aaf2}',
-        '\u{aaf4}'), ('\u{ab01}', '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'),
-        ('\u{ab11}', '\u{ab16}'), ('\u{ab20}', '\u{ab26}'), ('\u{ab28}',
-        '\u{ab2e}'), ('\u{ab30}', '\u{ab5a}'), ('\u{ab5c}', '\u{ab65}'),
-        ('\u{ab70}', '\u{abe2}'), ('\u{ac00}', '\u{d7a3}'), ('\u{d7b0}',
-        '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'), ('\u{f900}', '\u{fa6d}'),
-        ('\u{fa70}', '\u{fad9}'), ('\u{fb00}', '\u{fb06}'), ('\u{fb13}',
-        '\u{fb17}'), ('\u{fb1d}', '\u{fb1d}'), ('\u{fb1f}', '\u{fb28}'),
-        ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}',
-        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'),
-        ('\u{fb46}', '\u{fbb1}'), ('\u{fbd3}', '\u{fc5d}'), ('\u{fc64}',
-        '\u{fd3d}'), ('\u{fd50}', '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'),
-        ('\u{fdf0}', '\u{fdf9}'), ('\u{fe71}', '\u{fe71}'), ('\u{fe73}',
-        '\u{fe73}'), ('\u{fe77}', '\u{fe77}'), ('\u{fe79}', '\u{fe79}'),
-        ('\u{fe7b}', '\u{fe7b}'), ('\u{fe7d}', '\u{fe7d}'), ('\u{fe7f}',
-        '\u{fefc}'), ('\u{ff21}', '\u{ff3a}'), ('\u{ff41}', '\u{ff5a}'),
-        ('\u{ff66}', '\u{ff9d}'), ('\u{ffa0}', '\u{ffbe}'), ('\u{ffc2}',
-        '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'),
-        ('\u{ffda}', '\u{ffdc}'), ('\u{10000}', '\u{1000b}'), ('\u{1000d}',
-        '\u{10026}'), ('\u{10028}', '\u{1003a}'), ('\u{1003c}', '\u{1003d}'),
-        ('\u{1003f}', '\u{1004d}'), ('\u{10050}', '\u{1005d}'), ('\u{10080}',
-        '\u{100fa}'), ('\u{10140}', '\u{10174}'), ('\u{10280}', '\u{1029c}'),
-        ('\u{102a0}', '\u{102d0}'), ('\u{10300}', '\u{1031f}'), ('\u{10330}',
-        '\u{1034a}'), ('\u{10350}', '\u{10375}'), ('\u{10380}', '\u{1039d}'),
-        ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103cf}'), ('\u{103d1}',
-        '\u{103d5}'), ('\u{10400}', '\u{1049d}'), ('\u{10500}', '\u{10527}'),
-        ('\u{10530}', '\u{10563}'), ('\u{10600}', '\u{10736}'), ('\u{10740}',
-        '\u{10755}'), ('\u{10760}', '\u{10767}'), ('\u{10800}', '\u{10805}'),
-        ('\u{10808}', '\u{10808}'), ('\u{1080a}', '\u{10835}'), ('\u{10837}',
-        '\u{10838}'), ('\u{1083c}', '\u{1083c}'), ('\u{1083f}', '\u{10855}'),
-        ('\u{10860}', '\u{10876}'), ('\u{10880}', '\u{1089e}'), ('\u{108e0}',
-        '\u{108f2}'), ('\u{108f4}', '\u{108f5}'), ('\u{10900}', '\u{10915}'),
-        ('\u{10920}', '\u{10939}'), ('\u{10980}', '\u{109b7}'), ('\u{109be}',
-        '\u{109bf}'), ('\u{10a00}', '\u{10a00}'), ('\u{10a10}', '\u{10a13}'),
-        ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a33}'), ('\u{10a60}',
-        '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'), ('\u{10ac0}', '\u{10ac7}'),
-        ('\u{10ac9}', '\u{10ae4}'), ('\u{10b00}', '\u{10b35}'), ('\u{10b40}',
-        '\u{10b55}'), ('\u{10b60}', '\u{10b72}'), ('\u{10b80}', '\u{10b91}'),
-        ('\u{10c00}', '\u{10c48}'), ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}',
-        '\u{10cf2}'), ('\u{11003}', '\u{11037}'), ('\u{11083}', '\u{110af}'),
-        ('\u{110d0}', '\u{110e8}'), ('\u{11103}', '\u{11126}'), ('\u{11150}',
-        '\u{11172}'), ('\u{11176}', '\u{11176}'), ('\u{11183}', '\u{111b2}'),
-        ('\u{111c1}', '\u{111c4}'), ('\u{111da}', '\u{111da}'), ('\u{111dc}',
-        '\u{111dc}'), ('\u{11200}', '\u{11211}'), ('\u{11213}', '\u{1122b}'),
-        ('\u{11280}', '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}',
-        '\u{1128d}'), ('\u{1128f}', '\u{1129d}'), ('\u{1129f}', '\u{112a8}'),
-        ('\u{112b0}', '\u{112de}'), ('\u{11305}', '\u{1130c}'), ('\u{1130f}',
-        '\u{11310}'), ('\u{11313}', '\u{11328}'), ('\u{1132a}', '\u{11330}'),
-        ('\u{11332}', '\u{11333}'), ('\u{11335}', '\u{11339}'), ('\u{1133d}',
-        '\u{1133d}'), ('\u{11350}', '\u{11350}'), ('\u{1135d}', '\u{11361}'),
-        ('\u{11480}', '\u{114af}'), ('\u{114c4}', '\u{114c5}'), ('\u{114c7}',
-        '\u{114c7}'), ('\u{11580}', '\u{115ae}'), ('\u{115d8}', '\u{115db}'),
-        ('\u{11600}', '\u{1162f}'), ('\u{11644}', '\u{11644}'), ('\u{11680}',
-        '\u{116aa}'), ('\u{11700}', '\u{11719}'), ('\u{118a0}', '\u{118df}'),
-        ('\u{118ff}', '\u{118ff}'), ('\u{11ac0}', '\u{11af8}'), ('\u{12000}',
-        '\u{12399}'), ('\u{12400}', '\u{1246e}'), ('\u{12480}', '\u{12543}'),
-        ('\u{13000}', '\u{1342e}'), ('\u{14400}', '\u{14646}'), ('\u{16800}',
-        '\u{16a38}'), ('\u{16a40}', '\u{16a5e}'), ('\u{16ad0}', '\u{16aed}'),
-        ('\u{16b00}', '\u{16b2f}'), ('\u{16b40}', '\u{16b43}'), ('\u{16b63}',
-        '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}'), ('\u{16f00}', '\u{16f44}'),
-        ('\u{16f50}', '\u{16f50}'), ('\u{16f93}', '\u{16f9f}'), ('\u{1b000}',
-        '\u{1b001}'), ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'),
-        ('\u{1bc80}', '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'), ('\u{1d400}',
-        '\u{1d454}'), ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'),
-        ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}',
-        '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'),
-        ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}',
-        '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'),
-        ('\u{1d51e}', '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}',
-        '\u{1d544}'), ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'),
-        ('\u{1d552}', '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}',
-        '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'),
-        ('\u{1d716}', '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}',
-        '\u{1d76e}'), ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'),
-        ('\u{1d7aa}', '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1e800}',
-        '\u{1e8c4}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}', '\u{1ee1f}'),
-        ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}',
-        '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'),
-        ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}',
-        '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'),
-        ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}',
-        '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}', '\u{1ee57}'),
-        ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}',
-        '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'),
-        ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}',
-        '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'),
-        ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}',
-        '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}', '\u{1eea9}'),
-        ('\u{1eeab}', '\u{1eebb}'), ('\u{20000}', '\u{2a6d6}'), ('\u{2a700}',
-        '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'),
-        ('\u{2f800}', '\u{2fa1d}')
-    ];
+fn normalize(x: &str) -> String {
+    let mut x = x.to_string();
+    ucd_util::symbolic_name_normalize(&mut x);
+    x
+}
 
+fn property_values(
+    canonical_property_name: &'static str,
+) -> Option<PropertyValues>
+{
+    ucd_util::property_values(PROPERTY_VALUES, canonical_property_name)
+}
+
+fn property_set(
+    name_map: &'static [(&'static str, &'static [(char, char)])],
+    canonical: &'static str,
+) -> Option<&'static [(char, char)]> {
+    name_map
+        .binary_search_by_key(&canonical, |x| x.0)
+        .ok()
+        .map(|i| name_map[i].1)
 }
 
-pub mod script {
-    pub const Ahom_table: &'static [(char, char)] = &[
-        ('\u{11700}', '\u{11719}'), ('\u{1171d}', '\u{1172b}'), ('\u{11730}',
-        '\u{1173f}')
-    ];
-
-    pub const Anatolian_Hieroglyphs_table: &'static [(char, char)] = &[
-        ('\u{14400}', '\u{14646}')
-    ];
-
-    pub const Arabic_table: &'static [(char, char)] = &[
-        ('\u{600}', '\u{604}'), ('\u{606}', '\u{60b}'), ('\u{60d}', '\u{61a}'),
-        ('\u{61e}', '\u{61e}'), ('\u{620}', '\u{63f}'), ('\u{641}', '\u{64a}'),
-        ('\u{656}', '\u{66f}'), ('\u{671}', '\u{6dc}'), ('\u{6de}', '\u{6ff}'),
-        ('\u{750}', '\u{77f}'), ('\u{8a0}', '\u{8b4}'), ('\u{8e3}', '\u{8ff}'),
-        ('\u{fb50}', '\u{fbc1}'), ('\u{fbd3}', '\u{fd3d}'), ('\u{fd50}',
-        '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfd}'),
-        ('\u{fe70}', '\u{fe74}'), ('\u{fe76}', '\u{fefc}'), ('\u{10e60}',
-        '\u{10e7e}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}', '\u{1ee1f}'),
-        ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'), ('\u{1ee27}',
-        '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}', '\u{1ee37}'),
-        ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'), ('\u{1ee42}',
-        '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}', '\u{1ee49}'),
-        ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'), ('\u{1ee51}',
-        '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}', '\u{1ee57}'),
-        ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'), ('\u{1ee5d}',
-        '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}', '\u{1ee62}'),
-        ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'), ('\u{1ee6c}',
-        '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}', '\u{1ee7c}'),
-        ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'), ('\u{1ee8b}',
-        '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}', '\u{1eea9}'),
-        ('\u{1eeab}', '\u{1eebb}'), ('\u{1eef0}', '\u{1eef1}')
-    ];
-
-    pub const Armenian_table: &'static [(char, char)] = &[
-        ('\u{531}', '\u{556}'), ('\u{559}', '\u{55f}'), ('\u{561}', '\u{587}'),
-        ('\u{58a}', '\u{58a}'), ('\u{58d}', '\u{58f}'), ('\u{fb13}',
-        '\u{fb17}')
-    ];
-
-    pub const Avestan_table: &'static [(char, char)] = &[
-        ('\u{10b00}', '\u{10b35}'), ('\u{10b39}', '\u{10b3f}')
-    ];
-
-    pub const Balinese_table: &'static [(char, char)] = &[
-        ('\u{1b00}', '\u{1b4b}'), ('\u{1b50}', '\u{1b7c}')
-    ];
-
-    pub const Bamum_table: &'static [(char, char)] = &[
-        ('\u{a6a0}', '\u{a6f7}'), ('\u{16800}', '\u{16a38}')
-    ];
-
-    pub const Bassa_Vah_table: &'static [(char, char)] = &[
-        ('\u{16ad0}', '\u{16aed}'), ('\u{16af0}', '\u{16af5}')
-    ];
-
-    pub const Batak_table: &'static [(char, char)] = &[
-        ('\u{1bc0}', '\u{1bf3}'), ('\u{1bfc}', '\u{1bff}')
-    ];
-
-    pub const Bengali_table: &'static [(char, char)] = &[
-        ('\u{980}', '\u{983}'), ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'),
-        ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'),
-        ('\u{9b6}', '\u{9b9}'), ('\u{9bc}', '\u{9c4}'), ('\u{9c7}', '\u{9c8}'),
-        ('\u{9cb}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'), ('\u{9dc}', '\u{9dd}'),
-        ('\u{9df}', '\u{9e3}'), ('\u{9e6}', '\u{9fb}')
-    ];
-
-    pub const Bopomofo_table: &'static [(char, char)] = &[
-        ('\u{2ea}', '\u{2eb}'), ('\u{3105}', '\u{312d}'), ('\u{31a0}',
-        '\u{31ba}')
-    ];
-
-    pub const Brahmi_table: &'static [(char, char)] = &[
-        ('\u{11000}', '\u{1104d}'), ('\u{11052}', '\u{1106f}'), ('\u{1107f}',
-        '\u{1107f}')
-    ];
-
-    pub const Braille_table: &'static [(char, char)] = &[
-        ('\u{2800}', '\u{28ff}')
-    ];
-
-    pub const Buginese_table: &'static [(char, char)] = &[
-        ('\u{1a00}', '\u{1a1b}'), ('\u{1a1e}', '\u{1a1f}')
-    ];
-
-    pub const Buhid_table: &'static [(char, char)] = &[
-        ('\u{1740}', '\u{1753}')
-    ];
-
-    pub const Canadian_Aboriginal_table: &'static [(char, char)] = &[
-        ('\u{1400}', '\u{167f}'), ('\u{18b0}', '\u{18f5}')
-    ];
-
-    pub const Carian_table: &'static [(char, char)] = &[
-        ('\u{102a0}', '\u{102d0}')
-    ];
-
-    pub const Caucasian_Albanian_table: &'static [(char, char)] = &[
-        ('\u{10530}', '\u{10563}'), ('\u{1056f}', '\u{1056f}')
-    ];
-
-    pub const Chakma_table: &'static [(char, char)] = &[
-        ('\u{11100}', '\u{11134}'), ('\u{11136}', '\u{11143}')
-    ];
-
-    pub const Cham_table: &'static [(char, char)] = &[
-        ('\u{aa00}', '\u{aa36}'), ('\u{aa40}', '\u{aa4d}'), ('\u{aa50}',
-        '\u{aa59}'), ('\u{aa5c}', '\u{aa5f}')
-    ];
-
-    pub const Cherokee_table: &'static [(char, char)] = &[
-        ('\u{13a0}', '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{ab70}',
-        '\u{abbf}')
-    ];
-
-    pub const Common_table: &'static [(char, char)] = &[
-        ('\u{0}', '\u{40}'), ('\u{5b}', '\u{60}'), ('\u{7b}', '\u{a9}'),
-        ('\u{ab}', '\u{b9}'), ('\u{bb}', '\u{bf}'), ('\u{d7}', '\u{d7}'),
-        ('\u{f7}', '\u{f7}'), ('\u{2b9}', '\u{2df}'), ('\u{2e5}', '\u{2e9}'),
-        ('\u{2ec}', '\u{2ff}'), ('\u{374}', '\u{374}'), ('\u{37e}', '\u{37e}'),
-        ('\u{385}', '\u{385}'), ('\u{387}', '\u{387}'), ('\u{589}', '\u{589}'),
-        ('\u{605}', '\u{605}'), ('\u{60c}', '\u{60c}'), ('\u{61b}', '\u{61c}'),
-        ('\u{61f}', '\u{61f}'), ('\u{640}', '\u{640}'), ('\u{6dd}', '\u{6dd}'),
-        ('\u{964}', '\u{965}'), ('\u{e3f}', '\u{e3f}'), ('\u{fd5}', '\u{fd8}'),
-        ('\u{10fb}', '\u{10fb}'), ('\u{16eb}', '\u{16ed}'), ('\u{1735}',
-        '\u{1736}'), ('\u{1802}', '\u{1803}'), ('\u{1805}', '\u{1805}'),
-        ('\u{1cd3}', '\u{1cd3}'), ('\u{1ce1}', '\u{1ce1}'), ('\u{1ce9}',
-        '\u{1cec}'), ('\u{1cee}', '\u{1cf3}'), ('\u{1cf5}', '\u{1cf6}'),
-        ('\u{2000}', '\u{200b}'), ('\u{200e}', '\u{2064}'), ('\u{2066}',
-        '\u{2070}'), ('\u{2074}', '\u{207e}'), ('\u{2080}', '\u{208e}'),
-        ('\u{20a0}', '\u{20be}'), ('\u{2100}', '\u{2125}'), ('\u{2127}',
-        '\u{2129}'), ('\u{212c}', '\u{2131}'), ('\u{2133}', '\u{214d}'),
-        ('\u{214f}', '\u{215f}'), ('\u{2189}', '\u{218b}'), ('\u{2190}',
-        '\u{23fa}'), ('\u{2400}', '\u{2426}'), ('\u{2440}', '\u{244a}'),
-        ('\u{2460}', '\u{27ff}'), ('\u{2900}', '\u{2b73}'), ('\u{2b76}',
-        '\u{2b95}'), ('\u{2b98}', '\u{2bb9}'), ('\u{2bbd}', '\u{2bc8}'),
-        ('\u{2bca}', '\u{2bd1}'), ('\u{2bec}', '\u{2bef}'), ('\u{2e00}',
-        '\u{2e42}'), ('\u{2ff0}', '\u{2ffb}'), ('\u{3000}', '\u{3004}'),
-        ('\u{3006}', '\u{3006}'), ('\u{3008}', '\u{3020}'), ('\u{3030}',
-        '\u{3037}'), ('\u{303c}', '\u{303f}'), ('\u{309b}', '\u{309c}'),
-        ('\u{30a0}', '\u{30a0}'), ('\u{30fb}', '\u{30fc}'), ('\u{3190}',
-        '\u{319f}'), ('\u{31c0}', '\u{31e3}'), ('\u{3220}', '\u{325f}'),
-        ('\u{327f}', '\u{32cf}'), ('\u{3358}', '\u{33ff}'), ('\u{4dc0}',
-        '\u{4dff}'), ('\u{a700}', '\u{a721}'), ('\u{a788}', '\u{a78a}'),
-        ('\u{a830}', '\u{a839}'), ('\u{a92e}', '\u{a92e}'), ('\u{a9cf}',
-        '\u{a9cf}'), ('\u{ab5b}', '\u{ab5b}'), ('\u{fd3e}', '\u{fd3f}'),
-        ('\u{fe10}', '\u{fe19}'), ('\u{fe30}', '\u{fe52}'), ('\u{fe54}',
-        '\u{fe66}'), ('\u{fe68}', '\u{fe6b}'), ('\u{feff}', '\u{feff}'),
-        ('\u{ff01}', '\u{ff20}'), ('\u{ff3b}', '\u{ff40}'), ('\u{ff5b}',
-        '\u{ff65}'), ('\u{ff70}', '\u{ff70}'), ('\u{ff9e}', '\u{ff9f}'),
-        ('\u{ffe0}', '\u{ffe6}'), ('\u{ffe8}', '\u{ffee}'), ('\u{fff9}',
-        '\u{fffd}'), ('\u{10100}', '\u{10102}'), ('\u{10107}', '\u{10133}'),
-        ('\u{10137}', '\u{1013f}'), ('\u{10190}', '\u{1019b}'), ('\u{101d0}',
-        '\u{101fc}'), ('\u{102e1}', '\u{102fb}'), ('\u{1bca0}', '\u{1bca3}'),
-        ('\u{1d000}', '\u{1d0f5}'), ('\u{1d100}', '\u{1d126}'), ('\u{1d129}',
-        '\u{1d166}'), ('\u{1d16a}', '\u{1d17a}'), ('\u{1d183}', '\u{1d184}'),
-        ('\u{1d18c}', '\u{1d1a9}'), ('\u{1d1ae}', '\u{1d1e8}'), ('\u{1d300}',
-        '\u{1d356}'), ('\u{1d360}', '\u{1d371}'), ('\u{1d400}', '\u{1d454}'),
-        ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'), ('\u{1d4a2}',
-        '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}', '\u{1d4ac}'),
-        ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'), ('\u{1d4bd}',
-        '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}', '\u{1d50a}'),
-        ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'), ('\u{1d51e}',
-        '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}', '\u{1d544}'),
-        ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'), ('\u{1d552}',
-        '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d7cb}'), ('\u{1d7ce}', '\u{1d7ff}'),
-        ('\u{1f000}', '\u{1f02b}'), ('\u{1f030}', '\u{1f093}'), ('\u{1f0a0}',
-        '\u{1f0ae}'), ('\u{1f0b1}', '\u{1f0bf}'), ('\u{1f0c1}', '\u{1f0cf}'),
-        ('\u{1f0d1}', '\u{1f0f5}'), ('\u{1f100}', '\u{1f10c}'), ('\u{1f110}',
-        '\u{1f12e}'), ('\u{1f130}', '\u{1f16b}'), ('\u{1f170}', '\u{1f19a}'),
-        ('\u{1f1e6}', '\u{1f1ff}'), ('\u{1f201}', '\u{1f202}'), ('\u{1f210}',
-        '\u{1f23a}'), ('\u{1f240}', '\u{1f248}'), ('\u{1f250}', '\u{1f251}'),
-        ('\u{1f300}', '\u{1f579}'), ('\u{1f57b}', '\u{1f5a3}'), ('\u{1f5a5}',
-        '\u{1f6d0}'), ('\u{1f6e0}', '\u{1f6ec}'), ('\u{1f6f0}', '\u{1f6f3}'),
-        ('\u{1f700}', '\u{1f773}'), ('\u{1f780}', '\u{1f7d4}'), ('\u{1f800}',
-        '\u{1f80b}'), ('\u{1f810}', '\u{1f847}'), ('\u{1f850}', '\u{1f859}'),
-        ('\u{1f860}', '\u{1f887}'), ('\u{1f890}', '\u{1f8ad}'), ('\u{1f910}',
-        '\u{1f918}'), ('\u{1f980}', '\u{1f984}'), ('\u{1f9c0}', '\u{1f9c0}'),
-        ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}')
-    ];
-
-    pub const Coptic_table: &'static [(char, char)] = &[
-        ('\u{3e2}', '\u{3ef}'), ('\u{2c80}', '\u{2cf3}'), ('\u{2cf9}',
-        '\u{2cff}')
-    ];
-
-    pub const Cuneiform_table: &'static [(char, char)] = &[
-        ('\u{12000}', '\u{12399}'), ('\u{12400}', '\u{1246e}'), ('\u{12470}',
-        '\u{12474}'), ('\u{12480}', '\u{12543}')
-    ];
-
-    pub const Cypriot_table: &'static [(char, char)] = &[
-        ('\u{10800}', '\u{10805}'), ('\u{10808}', '\u{10808}'), ('\u{1080a}',
-        '\u{10835}'), ('\u{10837}', '\u{10838}'), ('\u{1083c}', '\u{1083c}'),
-        ('\u{1083f}', '\u{1083f}')
-    ];
-
-    pub const Cyrillic_table: &'static [(char, char)] = &[
-        ('\u{400}', '\u{484}'), ('\u{487}', '\u{52f}'), ('\u{1d2b}',
-        '\u{1d2b}'), ('\u{1d78}', '\u{1d78}'), ('\u{2de0}', '\u{2dff}'),
-        ('\u{a640}', '\u{a69f}'), ('\u{fe2e}', '\u{fe2f}')
-    ];
-
-    pub const Deseret_table: &'static [(char, char)] = &[
-        ('\u{10400}', '\u{1044f}')
-    ];
-
-    pub const Devanagari_table: &'static [(char, char)] = &[
-        ('\u{900}', '\u{950}'), ('\u{953}', '\u{963}'), ('\u{966}', '\u{97f}'),
-        ('\u{a8e0}', '\u{a8fd}')
-    ];
-
-    pub const Duployan_table: &'static [(char, char)] = &[
-        ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}', '\u{1bc7c}'), ('\u{1bc80}',
-        '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'), ('\u{1bc9c}', '\u{1bc9f}')
-    ];
-
-    pub const Egyptian_Hieroglyphs_table: &'static [(char, char)] = &[
-        ('\u{13000}', '\u{1342e}')
-    ];
-
-    pub const Elbasan_table: &'static [(char, char)] = &[
-        ('\u{10500}', '\u{10527}')
-    ];
-
-    pub const Ethiopic_table: &'static [(char, char)] = &[
-        ('\u{1200}', '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}',
-        '\u{1256}'), ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'),
-        ('\u{1260}', '\u{1288}'), ('\u{128a}', '\u{128d}'), ('\u{1290}',
-        '\u{12b0}'), ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'),
-        ('\u{12c0}', '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}',
-        '\u{12d6}'), ('\u{12d8}', '\u{1310}'), ('\u{1312}', '\u{1315}'),
-        ('\u{1318}', '\u{135a}'), ('\u{135d}', '\u{137c}'), ('\u{1380}',
-        '\u{1399}'), ('\u{2d80}', '\u{2d96}'), ('\u{2da0}', '\u{2da6}'),
-        ('\u{2da8}', '\u{2dae}'), ('\u{2db0}', '\u{2db6}'), ('\u{2db8}',
-        '\u{2dbe}'), ('\u{2dc0}', '\u{2dc6}'), ('\u{2dc8}', '\u{2dce}'),
-        ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}', '\u{2dde}'), ('\u{ab01}',
-        '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'),
-        ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}')
-    ];
-
-    pub const Georgian_table: &'static [(char, char)] = &[
-        ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'), ('\u{10cd}',
-        '\u{10cd}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fc}', '\u{10ff}'),
-        ('\u{2d00}', '\u{2d25}'), ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}',
-        '\u{2d2d}')
-    ];
-
-    pub const Glagolitic_table: &'static [(char, char)] = &[
-        ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}', '\u{2c5e}')
-    ];
-
-    pub const Gothic_table: &'static [(char, char)] = &[
-        ('\u{10330}', '\u{1034a}')
-    ];
-
-    pub const Grantha_table: &'static [(char, char)] = &[
-        ('\u{11300}', '\u{11303}'), ('\u{11305}', '\u{1130c}'), ('\u{1130f}',
-        '\u{11310}'), ('\u{11313}', '\u{11328}'), ('\u{1132a}', '\u{11330}'),
-        ('\u{11332}', '\u{11333}'), ('\u{11335}', '\u{11339}'), ('\u{1133c}',
-        '\u{11344}'), ('\u{11347}', '\u{11348}'), ('\u{1134b}', '\u{1134d}'),
-        ('\u{11350}', '\u{11350}'), ('\u{11357}', '\u{11357}'), ('\u{1135d}',
-        '\u{11363}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}')
-    ];
-
-    pub const Greek_table: &'static [(char, char)] = &[
-        ('\u{370}', '\u{373}'), ('\u{375}', '\u{377}'), ('\u{37a}', '\u{37d}'),
-        ('\u{37f}', '\u{37f}'), ('\u{384}', '\u{384}'), ('\u{386}', '\u{386}'),
-        ('\u{388}', '\u{38a}'), ('\u{38c}', '\u{38c}'), ('\u{38e}', '\u{3a1}'),
-        ('\u{3a3}', '\u{3e1}'), ('\u{3f0}', '\u{3ff}'), ('\u{1d26}',
-        '\u{1d2a}'), ('\u{1d5d}', '\u{1d61}'), ('\u{1d66}', '\u{1d6a}'),
-        ('\u{1dbf}', '\u{1dbf}'), ('\u{1f00}', '\u{1f15}'), ('\u{1f18}',
-        '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}', '\u{1f4d}'),
-        ('\u{1f50}', '\u{1f57}'), ('\u{1f59}', '\u{1f59}'), ('\u{1f5b}',
-        '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}', '\u{1f7d}'),
-        ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fc4}'), ('\u{1fc6}',
-        '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'), ('\u{1fdd}', '\u{1fef}'),
-        ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}', '\u{1ffe}'), ('\u{2126}',
-        '\u{2126}'), ('\u{ab65}', '\u{ab65}'), ('\u{10140}', '\u{1018c}'),
-        ('\u{101a0}', '\u{101a0}'), ('\u{1d200}', '\u{1d245}')
-    ];
-
-    pub const Gujarati_table: &'static [(char, char)] = &[
-        ('\u{a81}', '\u{a83}'), ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'),
-        ('\u{a93}', '\u{aa8}'), ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'),
-        ('\u{ab5}', '\u{ab9}'), ('\u{abc}', '\u{ac5}'), ('\u{ac7}', '\u{ac9}'),
-        ('\u{acb}', '\u{acd}'), ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae3}'),
-        ('\u{ae6}', '\u{af1}'), ('\u{af9}', '\u{af9}')
-    ];
-
-    pub const Gurmukhi_table: &'static [(char, char)] = &[
-        ('\u{a01}', '\u{a03}'), ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'),
-        ('\u{a13}', '\u{a28}'), ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'),
-        ('\u{a35}', '\u{a36}'), ('\u{a38}', '\u{a39}'), ('\u{a3c}', '\u{a3c}'),
-        ('\u{a3e}', '\u{a42}'), ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'),
-        ('\u{a51}', '\u{a51}'), ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'),
-        ('\u{a66}', '\u{a75}')
-    ];
-
-    pub const Han_table: &'static [(char, char)] = &[
-        ('\u{2e80}', '\u{2e99}'), ('\u{2e9b}', '\u{2ef3}'), ('\u{2f00}',
-        '\u{2fd5}'), ('\u{3005}', '\u{3005}'), ('\u{3007}', '\u{3007}'),
-        ('\u{3021}', '\u{3029}'), ('\u{3038}', '\u{303b}'), ('\u{3400}',
-        '\u{4db5}'), ('\u{4e00}', '\u{9fd5}'), ('\u{f900}', '\u{fa6d}'),
-        ('\u{fa70}', '\u{fad9}'), ('\u{20000}', '\u{2a6d6}'), ('\u{2a700}',
-        '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'), ('\u{2b820}', '\u{2cea1}'),
-        ('\u{2f800}', '\u{2fa1d}')
-    ];
-
-    pub const Hangul_table: &'static [(char, char)] = &[
-        ('\u{1100}', '\u{11ff}'), ('\u{302e}', '\u{302f}'), ('\u{3131}',
-        '\u{318e}'), ('\u{3200}', '\u{321e}'), ('\u{3260}', '\u{327e}'),
-        ('\u{a960}', '\u{a97c}'), ('\u{ac00}', '\u{d7a3}'), ('\u{d7b0}',
-        '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'), ('\u{ffa0}', '\u{ffbe}'),
-        ('\u{ffc2}', '\u{ffc7}'), ('\u{ffca}', '\u{ffcf}'), ('\u{ffd2}',
-        '\u{ffd7}'), ('\u{ffda}', '\u{ffdc}')
-    ];
-
-    pub const Hanunoo_table: &'static [(char, char)] = &[
-        ('\u{1720}', '\u{1734}')
-    ];
-
-    pub const Hatran_table: &'static [(char, char)] = &[
-        ('\u{108e0}', '\u{108f2}'), ('\u{108f4}', '\u{108f5}'), ('\u{108fb}',
-        '\u{108ff}')
-    ];
-
-    pub const Hebrew_table: &'static [(char, char)] = &[
-        ('\u{591}', '\u{5c7}'), ('\u{5d0}', '\u{5ea}'), ('\u{5f0}', '\u{5f4}'),
-        ('\u{fb1d}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}',
-        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'),
-        ('\u{fb46}', '\u{fb4f}')
-    ];
-
-    pub const Hiragana_table: &'static [(char, char)] = &[
-        ('\u{3041}', '\u{3096}'), ('\u{309d}', '\u{309f}'), ('\u{1b001}',
-        '\u{1b001}'), ('\u{1f200}', '\u{1f200}')
-    ];
-
-    pub const Imperial_Aramaic_table: &'static [(char, char)] = &[
-        ('\u{10840}', '\u{10855}'), ('\u{10857}', '\u{1085f}')
-    ];
-
-    pub const Inherited_table: &'static [(char, char)] = &[
-        ('\u{300}', '\u{36f}'), ('\u{485}', '\u{486}'), ('\u{64b}', '\u{655}'),
-        ('\u{670}', '\u{670}'), ('\u{951}', '\u{952}'), ('\u{1ab0}',
-        '\u{1abe}'), ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1ce0}'),
-        ('\u{1ce2}', '\u{1ce8}'), ('\u{1ced}', '\u{1ced}'), ('\u{1cf4}',
-        '\u{1cf4}'), ('\u{1cf8}', '\u{1cf9}'), ('\u{1dc0}', '\u{1df5}'),
-        ('\u{1dfc}', '\u{1dff}'), ('\u{200c}', '\u{200d}'), ('\u{20d0}',
-        '\u{20f0}'), ('\u{302a}', '\u{302d}'), ('\u{3099}', '\u{309a}'),
-        ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2d}'), ('\u{101fd}',
-        '\u{101fd}'), ('\u{102e0}', '\u{102e0}'), ('\u{1d167}', '\u{1d169}'),
-        ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'), ('\u{1d1aa}',
-        '\u{1d1ad}'), ('\u{e0100}', '\u{e01ef}')
-    ];
-
-    pub const Inscriptional_Pahlavi_table: &'static [(char, char)] = &[
-        ('\u{10b60}', '\u{10b72}'), ('\u{10b78}', '\u{10b7f}')
-    ];
-
-    pub const Inscriptional_Parthian_table: &'static [(char, char)] = &[
-        ('\u{10b40}', '\u{10b55}'), ('\u{10b58}', '\u{10b5f}')
-    ];
-
-    pub const Javanese_table: &'static [(char, char)] = &[
-        ('\u{a980}', '\u{a9cd}'), ('\u{a9d0}', '\u{a9d9}'), ('\u{a9de}',
-        '\u{a9df}')
-    ];
-
-    pub const Kaithi_table: &'static [(char, char)] = &[
-        ('\u{11080}', '\u{110c1}')
-    ];
-
-    pub const Kannada_table: &'static [(char, char)] = &[
-        ('\u{c81}', '\u{c83}'), ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'),
-        ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'),
-        ('\u{cbc}', '\u{cc4}'), ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccd}'),
-        ('\u{cd5}', '\u{cd6}'), ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce3}'),
-        ('\u{ce6}', '\u{cef}'), ('\u{cf1}', '\u{cf2}')
-    ];
-
-    pub const Katakana_table: &'static [(char, char)] = &[
-        ('\u{30a1}', '\u{30fa}'), ('\u{30fd}', '\u{30ff}'), ('\u{31f0}',
-        '\u{31ff}'), ('\u{32d0}', '\u{32fe}'), ('\u{3300}', '\u{3357}'),
-        ('\u{ff66}', '\u{ff6f}'), ('\u{ff71}', '\u{ff9d}'), ('\u{1b000}',
-        '\u{1b000}')
-    ];
-
-    pub const Kayah_Li_table: &'static [(char, char)] = &[
-        ('\u{a900}', '\u{a92d}'), ('\u{a92f}', '\u{a92f}')
-    ];
-
-    pub const Kharoshthi_table: &'static [(char, char)] = &[
-        ('\u{10a00}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}',
-        '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a33}'),
-        ('\u{10a38}', '\u{10a3a}'), ('\u{10a3f}', '\u{10a47}'), ('\u{10a50}',
-        '\u{10a58}')
-    ];
-
-    pub const Khmer_table: &'static [(char, char)] = &[
-        ('\u{1780}', '\u{17dd}'), ('\u{17e0}', '\u{17e9}'), ('\u{17f0}',
-        '\u{17f9}'), ('\u{19e0}', '\u{19ff}')
-    ];
-
-    pub const Khojki_table: &'static [(char, char)] = &[
-        ('\u{11200}', '\u{11211}'), ('\u{11213}', '\u{1123d}')
-    ];
-
-    pub const Khudawadi_table: &'static [(char, char)] = &[
-        ('\u{112b0}', '\u{112ea}'), ('\u{112f0}', '\u{112f9}')
-    ];
-
-    pub const Lao_table: &'static [(char, char)] = &[
-        ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'), ('\u{e87}', '\u{e88}'),
-        ('\u{e8a}', '\u{e8a}'), ('\u{e8d}', '\u{e8d}'), ('\u{e94}', '\u{e97}'),
-        ('\u{e99}', '\u{e9f}'), ('\u{ea1}', '\u{ea3}'), ('\u{ea5}', '\u{ea5}'),
-        ('\u{ea7}', '\u{ea7}'), ('\u{eaa}', '\u{eab}'), ('\u{ead}', '\u{eb9}'),
-        ('\u{ebb}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'), ('\u{ec6}', '\u{ec6}'),
-        ('\u{ec8}', '\u{ecd}'), ('\u{ed0}', '\u{ed9}'), ('\u{edc}', '\u{edf}')
-    ];
-
-    pub const Latin_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{5a}'), ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'),
-        ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{f6}'),
-        ('\u{f8}', '\u{2b8}'), ('\u{2e0}', '\u{2e4}'), ('\u{1d00}', '\u{1d25}'),
-        ('\u{1d2c}', '\u{1d5c}'), ('\u{1d62}', '\u{1d65}'), ('\u{1d6b}',
-        '\u{1d77}'), ('\u{1d79}', '\u{1dbe}'), ('\u{1e00}', '\u{1eff}'),
-        ('\u{2071}', '\u{2071}'), ('\u{207f}', '\u{207f}'), ('\u{2090}',
-        '\u{209c}'), ('\u{212a}', '\u{212b}'), ('\u{2132}', '\u{2132}'),
-        ('\u{214e}', '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{2c60}',
-        '\u{2c7f}'), ('\u{a722}', '\u{a787}'), ('\u{a78b}', '\u{a7ad}'),
-        ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}', '\u{a7ff}'), ('\u{ab30}',
-        '\u{ab5a}'), ('\u{ab5c}', '\u{ab64}'), ('\u{fb00}', '\u{fb06}'),
-        ('\u{ff21}', '\u{ff3a}'), ('\u{ff41}', '\u{ff5a}')
-    ];
-
-    pub const Lepcha_table: &'static [(char, char)] = &[
-        ('\u{1c00}', '\u{1c37}'), ('\u{1c3b}', '\u{1c49}'), ('\u{1c4d}',
-        '\u{1c4f}')
-    ];
-
-    pub const Limbu_table: &'static [(char, char)] = &[
-        ('\u{1900}', '\u{191e}'), ('\u{1920}', '\u{192b}'), ('\u{1930}',
-        '\u{193b}'), ('\u{1940}', '\u{1940}'), ('\u{1944}', '\u{194f}')
-    ];
-
-    pub const Linear_A_table: &'static [(char, char)] = &[
-        ('\u{10600}', '\u{10736}'), ('\u{10740}', '\u{10755}'), ('\u{10760}',
-        '\u{10767}')
-    ];
-
-    pub const Linear_B_table: &'static [(char, char)] = &[
-        ('\u{10000}', '\u{1000b}'), ('\u{1000d}', '\u{10026}'), ('\u{10028}',
-        '\u{1003a}'), ('\u{1003c}', '\u{1003d}'), ('\u{1003f}', '\u{1004d}'),
-        ('\u{10050}', '\u{1005d}'), ('\u{10080}', '\u{100fa}')
-    ];
-
-    pub const Lisu_table: &'static [(char, char)] = &[
-        ('\u{a4d0}', '\u{a4ff}')
-    ];
-
-    pub const Lycian_table: &'static [(char, char)] = &[
-        ('\u{10280}', '\u{1029c}')
-    ];
-
-    pub const Lydian_table: &'static [(char, char)] = &[
-        ('\u{10920}', '\u{10939}'), ('\u{1093f}', '\u{1093f}')
-    ];
-
-    pub const Mahajani_table: &'static [(char, char)] = &[
-        ('\u{11150}', '\u{11176}')
-    ];
-
-    pub const Malayalam_table: &'static [(char, char)] = &[
-        ('\u{d01}', '\u{d03}'), ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'),
-        ('\u{d12}', '\u{d3a}'), ('\u{d3d}', '\u{d44}'), ('\u{d46}', '\u{d48}'),
-        ('\u{d4a}', '\u{d4e}'), ('\u{d57}', '\u{d57}'), ('\u{d5f}', '\u{d63}'),
-        ('\u{d66}', '\u{d75}'), ('\u{d79}', '\u{d7f}')
-    ];
-
-    pub const Mandaic_table: &'static [(char, char)] = &[
-        ('\u{840}', '\u{85b}'), ('\u{85e}', '\u{85e}')
-    ];
-
-    pub const Manichaean_table: &'static [(char, char)] = &[
-        ('\u{10ac0}', '\u{10ae6}'), ('\u{10aeb}', '\u{10af6}')
-    ];
-
-    pub const Meetei_Mayek_table: &'static [(char, char)] = &[
-        ('\u{aae0}', '\u{aaf6}'), ('\u{abc0}', '\u{abed}'), ('\u{abf0}',
-        '\u{abf9}')
-    ];
-
-    pub const Mende_Kikakui_table: &'static [(char, char)] = &[
-        ('\u{1e800}', '\u{1e8c4}'), ('\u{1e8c7}', '\u{1e8d6}')
-    ];
-
-    pub const Meroitic_Cursive_table: &'static [(char, char)] = &[
-        ('\u{109a0}', '\u{109b7}'), ('\u{109bc}', '\u{109cf}'), ('\u{109d2}',
-        '\u{109ff}')
-    ];
-
-    pub const Meroitic_Hieroglyphs_table: &'static [(char, char)] = &[
-        ('\u{10980}', '\u{1099f}')
-    ];
-
-    pub const Miao_table: &'static [(char, char)] = &[
-        ('\u{16f00}', '\u{16f44}'), ('\u{16f50}', '\u{16f7e}'), ('\u{16f8f}',
-        '\u{16f9f}')
-    ];
-
-    pub const Modi_table: &'static [(char, char)] = &[
-        ('\u{11600}', '\u{11644}'), ('\u{11650}', '\u{11659}')
-    ];
-
-    pub const Mongolian_table: &'static [(char, char)] = &[
-        ('\u{1800}', '\u{1801}'), ('\u{1804}', '\u{1804}'), ('\u{1806}',
-        '\u{180e}'), ('\u{1810}', '\u{1819}'), ('\u{1820}', '\u{1877}'),
-        ('\u{1880}', '\u{18aa}')
-    ];
-
-    pub const Mro_table: &'static [(char, char)] = &[
-        ('\u{16a40}', '\u{16a5e}'), ('\u{16a60}', '\u{16a69}'), ('\u{16a6e}',
-        '\u{16a6f}')
-    ];
-
-    pub const Multani_table: &'static [(char, char)] = &[
-        ('\u{11280}', '\u{11286}'), ('\u{11288}', '\u{11288}'), ('\u{1128a}',
-        '\u{1128d}'), ('\u{1128f}', '\u{1129d}'), ('\u{1129f}', '\u{112a9}')
-    ];
-
-    pub const Myanmar_table: &'static [(char, char)] = &[
-        ('\u{1000}', '\u{109f}'), ('\u{a9e0}', '\u{a9fe}'), ('\u{aa60}',
-        '\u{aa7f}')
-    ];
-
-    pub const Nabataean_table: &'static [(char, char)] = &[
-        ('\u{10880}', '\u{1089e}'), ('\u{108a7}', '\u{108af}')
-    ];
-
-    pub const New_Tai_Lue_table: &'static [(char, char)] = &[
-        ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'), ('\u{19d0}',
-        '\u{19da}'), ('\u{19de}', '\u{19df}')
-    ];
-
-    pub const Nko_table: &'static [(char, char)] = &[
-        ('\u{7c0}', '\u{7fa}')
-    ];
-
-    pub const Ogham_table: &'static [(char, char)] = &[
-        ('\u{1680}', '\u{169c}')
-    ];
-
-    pub const Ol_Chiki_table: &'static [(char, char)] = &[
-        ('\u{1c50}', '\u{1c7f}')
-    ];
-
-    pub const Old_Hungarian_table: &'static [(char, char)] = &[
-        ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}', '\u{10cf2}'), ('\u{10cfa}',
-        '\u{10cff}')
-    ];
-
-    pub const Old_Italic_table: &'static [(char, char)] = &[
-        ('\u{10300}', '\u{10323}')
-    ];
-
-    pub const Old_North_Arabian_table: &'static [(char, char)] = &[
-        ('\u{10a80}', '\u{10a9f}')
-    ];
-
-    pub const Old_Permic_table: &'static [(char, char)] = &[
-        ('\u{10350}', '\u{1037a}')
-    ];
-
-    pub const Old_Persian_table: &'static [(char, char)] = &[
-        ('\u{103a0}', '\u{103c3}'), ('\u{103c8}', '\u{103d5}')
-    ];
-
-    pub const Old_South_Arabian_table: &'static [(char, char)] = &[
-        ('\u{10a60}', '\u{10a7f}')
-    ];
-
-    pub const Old_Turkic_table: &'static [(char, char)] = &[
-        ('\u{10c00}', '\u{10c48}')
-    ];
-
-    pub const Oriya_table: &'static [(char, char)] = &[
-        ('\u{b01}', '\u{b03}'), ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'),
-        ('\u{b13}', '\u{b28}'), ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'),
-        ('\u{b35}', '\u{b39}'), ('\u{b3c}', '\u{b44}'), ('\u{b47}', '\u{b48}'),
-        ('\u{b4b}', '\u{b4d}'), ('\u{b56}', '\u{b57}'), ('\u{b5c}', '\u{b5d}'),
-        ('\u{b5f}', '\u{b63}'), ('\u{b66}', '\u{b77}')
-    ];
-
-    pub const Osmanya_table: &'static [(char, char)] = &[
-        ('\u{10480}', '\u{1049d}'), ('\u{104a0}', '\u{104a9}')
-    ];
-
-    pub const Pahawh_Hmong_table: &'static [(char, char)] = &[
-        ('\u{16b00}', '\u{16b45}'), ('\u{16b50}', '\u{16b59}'), ('\u{16b5b}',
-        '\u{16b61}'), ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}')
-    ];
-
-    pub const Palmyrene_table: &'static [(char, char)] = &[
-        ('\u{10860}', '\u{1087f}')
-    ];
-
-    pub const Pau_Cin_Hau_table: &'static [(char, char)] = &[
-        ('\u{11ac0}', '\u{11af8}')
-    ];
-
-    pub const Phags_Pa_table: &'static [(char, char)] = &[
-        ('\u{a840}', '\u{a877}')
-    ];
-
-    pub const Phoenician_table: &'static [(char, char)] = &[
-        ('\u{10900}', '\u{1091b}'), ('\u{1091f}', '\u{1091f}')
-    ];
-
-    pub const Psalter_Pahlavi_table: &'static [(char, char)] = &[
-        ('\u{10b80}', '\u{10b91}'), ('\u{10b99}', '\u{10b9c}'), ('\u{10ba9}',
-        '\u{10baf}')
-    ];
-
-    pub const Rejang_table: &'static [(char, char)] = &[
-        ('\u{a930}', '\u{a953}'), ('\u{a95f}', '\u{a95f}')
-    ];
-
-    pub const Runic_table: &'static [(char, char)] = &[
-        ('\u{16a0}', '\u{16ea}'), ('\u{16ee}', '\u{16f8}')
-    ];
-
-    pub const Samaritan_table: &'static [(char, char)] = &[
-        ('\u{800}', '\u{82d}'), ('\u{830}', '\u{83e}')
-    ];
-
-    pub const Saurashtra_table: &'static [(char, char)] = &[
-        ('\u{a880}', '\u{a8c4}'), ('\u{a8ce}', '\u{a8d9}')
-    ];
-
-    pub const Sharada_table: &'static [(char, char)] = &[
-        ('\u{11180}', '\u{111cd}'), ('\u{111d0}', '\u{111df}')
-    ];
-
-    pub const Shavian_table: &'static [(char, char)] = &[
-        ('\u{10450}', '\u{1047f}')
-    ];
-
-    pub const Siddham_table: &'static [(char, char)] = &[
-        ('\u{11580}', '\u{115b5}'), ('\u{115b8}', '\u{115dd}')
-    ];
-
-    pub const SignWriting_table: &'static [(char, char)] = &[
-        ('\u{1d800}', '\u{1da8b}'), ('\u{1da9b}', '\u{1da9f}'), ('\u{1daa1}',
-        '\u{1daaf}')
-    ];
-
-    pub const Sinhala_table: &'static [(char, char)] = &[
-        ('\u{d82}', '\u{d83}'), ('\u{d85}', '\u{d96}'), ('\u{d9a}', '\u{db1}'),
-        ('\u{db3}', '\u{dbb}'), ('\u{dbd}', '\u{dbd}'), ('\u{dc0}', '\u{dc6}'),
-        ('\u{dca}', '\u{dca}'), ('\u{dcf}', '\u{dd4}'), ('\u{dd6}', '\u{dd6}'),
-        ('\u{dd8}', '\u{ddf}'), ('\u{de6}', '\u{def}'), ('\u{df2}', '\u{df4}'),
-        ('\u{111e1}', '\u{111f4}')
-    ];
-
-    pub const Sora_Sompeng_table: &'static [(char, char)] = &[
-        ('\u{110d0}', '\u{110e8}'), ('\u{110f0}', '\u{110f9}')
-    ];
-
-    pub const Sundanese_table: &'static [(char, char)] = &[
-        ('\u{1b80}', '\u{1bbf}'), ('\u{1cc0}', '\u{1cc7}')
-    ];
-
-    pub const Syloti_Nagri_table: &'static [(char, char)] = &[
-        ('\u{a800}', '\u{a82b}')
-    ];
-
-    pub const Syriac_table: &'static [(char, char)] = &[
-        ('\u{700}', '\u{70d}'), ('\u{70f}', '\u{74a}'), ('\u{74d}', '\u{74f}')
-    ];
-
-    pub const Tagalog_table: &'static [(char, char)] = &[
-        ('\u{1700}', '\u{170c}'), ('\u{170e}', '\u{1714}')
-    ];
-
-    pub const Tagbanwa_table: &'static [(char, char)] = &[
-        ('\u{1760}', '\u{176c}'), ('\u{176e}', '\u{1770}'), ('\u{1772}',
-        '\u{1773}')
-    ];
-
-    pub const Tai_Le_table: &'static [(char, char)] = &[
-        ('\u{1950}', '\u{196d}'), ('\u{1970}', '\u{1974}')
-    ];
-
-    pub const Tai_Tham_table: &'static [(char, char)] = &[
-        ('\u{1a20}', '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}',
-        '\u{1a89}'), ('\u{1a90}', '\u{1a99}'), ('\u{1aa0}', '\u{1aad}')
-    ];
-
-    pub const Tai_Viet_table: &'static [(char, char)] = &[
-        ('\u{aa80}', '\u{aac2}'), ('\u{aadb}', '\u{aadf}')
-    ];
-
-    pub const Takri_table: &'static [(char, char)] = &[
-        ('\u{11680}', '\u{116b7}'), ('\u{116c0}', '\u{116c9}')
-    ];
-
-    pub const Tamil_table: &'static [(char, char)] = &[
-        ('\u{b82}', '\u{b83}'), ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'),
-        ('\u{b92}', '\u{b95}'), ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'),
-        ('\u{b9e}', '\u{b9f}'), ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'),
-        ('\u{bae}', '\u{bb9}'), ('\u{bbe}', '\u{bc2}'), ('\u{bc6}', '\u{bc8}'),
-        ('\u{bca}', '\u{bcd}'), ('\u{bd0}', '\u{bd0}'), ('\u{bd7}', '\u{bd7}'),
-        ('\u{be6}', '\u{bfa}')
-    ];
-
-    pub const Telugu_table: &'static [(char, char)] = &[
-        ('\u{c00}', '\u{c03}'), ('\u{c05}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'),
-        ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c44}'),
-        ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'),
-        ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c63}'), ('\u{c66}', '\u{c6f}'),
-        ('\u{c78}', '\u{c7f}')
-    ];
-
-    pub const Thaana_table: &'static [(char, char)] = &[
-        ('\u{780}', '\u{7b1}')
-    ];
-
-    pub const Thai_table: &'static [(char, char)] = &[
-        ('\u{e01}', '\u{e3a}'), ('\u{e40}', '\u{e5b}')
-    ];
-
-    pub const Tibetan_table: &'static [(char, char)] = &[
-        ('\u{f00}', '\u{f47}'), ('\u{f49}', '\u{f6c}'), ('\u{f71}', '\u{f97}'),
-        ('\u{f99}', '\u{fbc}'), ('\u{fbe}', '\u{fcc}'), ('\u{fce}', '\u{fd4}'),
-        ('\u{fd9}', '\u{fda}')
-    ];
-
-    pub const Tifinagh_table: &'static [(char, char)] = &[
-        ('\u{2d30}', '\u{2d67}'), ('\u{2d6f}', '\u{2d70}'), ('\u{2d7f}',
-        '\u{2d7f}')
-    ];
-
-    pub const Tirhuta_table: &'static [(char, char)] = &[
-        ('\u{11480}', '\u{114c7}'), ('\u{114d0}', '\u{114d9}')
-    ];
-
-    pub const Ugaritic_table: &'static [(char, char)] = &[
-        ('\u{10380}', '\u{1039d}'), ('\u{1039f}', '\u{1039f}')
-    ];
-
-    pub const Vai_table: &'static [(char, char)] = &[
-        ('\u{a500}', '\u{a62b}')
-    ];
-
-    pub const Warang_Citi_table: &'static [(char, char)] = &[
-        ('\u{118a0}', '\u{118f2}'), ('\u{118ff}', '\u{118ff}')
-    ];
-
-    pub const Yi_table: &'static [(char, char)] = &[
-        ('\u{a000}', '\u{a48c}'), ('\u{a490}', '\u{a4c6}')
-    ];
-
+/// An iterator over Unicode Age sets. Each item corresponds to a set of
+/// codepoints that were added in a particular revision of Unicode. The
+/// iterator yields items in chronological order.
+#[derive(Debug)]
+struct AgeIter {
+    ages: &'static [(&'static str, &'static [(char, char)])],
 }
 
-pub mod property {
-    pub const Join_Control_table: &'static [(char, char)] = &[
-        ('\u{200c}', '\u{200d}')
+fn ages(canonical_age: &str) -> Result<AgeIter> {
+    const AGES: &'static [(&'static str, &'static [(char, char)])] = &[
+        ("V1_1", age::V1_1),
+        ("V2_0", age::V2_0),
+        ("V2_1", age::V2_1),
+        ("V3_0", age::V3_0),
+        ("V3_1", age::V3_1),
+        ("V3_2", age::V3_2),
+        ("V4_0", age::V4_0),
+        ("V4_1", age::V4_1),
+        ("V5_0", age::V5_0),
+        ("V5_1", age::V5_1),
+        ("V5_2", age::V5_2),
+        ("V6_0", age::V6_0),
+        ("V6_1", age::V6_1),
+        ("V6_2", age::V6_2),
+        ("V6_3", age::V6_3),
+        ("V7_0", age::V7_0),
+        ("V8_0", age::V8_0),
+        ("V9_0", age::V9_0),
+        ("V10_0", age::V10_0),
     ];
+    assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
 
-    pub const Noncharacter_Code_Point_table: &'static [(char, char)] = &[
-        ('\u{fdd0}', '\u{fdef}'), ('\u{fffe}', '\u{ffff}'), ('\u{1fffe}',
-        '\u{1ffff}'), ('\u{2fffe}', '\u{2ffff}'), ('\u{3fffe}', '\u{3ffff}'),
-        ('\u{4fffe}', '\u{4ffff}'), ('\u{5fffe}', '\u{5ffff}'), ('\u{6fffe}',
-        '\u{6ffff}'), ('\u{7fffe}', '\u{7ffff}'), ('\u{8fffe}', '\u{8ffff}'),
-        ('\u{9fffe}', '\u{9ffff}'), ('\u{afffe}', '\u{affff}'), ('\u{bfffe}',
-        '\u{bffff}'), ('\u{cfffe}', '\u{cffff}'), ('\u{dfffe}', '\u{dffff}'),
-        ('\u{efffe}', '\u{effff}'), ('\u{ffffe}', '\u{fffff}'), ('\u{10fffe}',
-        '\u{10ffff}')
-    ];
+    let pos = AGES.iter().position(|&(age, _)| canonical_age == age);
+    match pos {
+        None => Err(Error::PropertyValueNotFound),
+        Some(i) => Ok(AgeIter { ages: &AGES[..i+1] }),
+    }
+}
+
+impl Iterator for AgeIter {
+    type Item = &'static [(char, char)];
 
-    pub const White_Space_table: &'static [(char, char)] = &[
-        ('\u{9}', '\u{d}'), ('\u{20}', '\u{20}'), ('\u{85}', '\u{85}'),
-        ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'), ('\u{2000}',
-        '\u{200a}'), ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
-        ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}')
-    ];
-
+    fn next(&mut self) -> Option<&'static [(char, char)]> {
+        if self.ages.is_empty() {
+            None
+        } else {
+            let set = self.ages[0];
+            self.ages = &self.ages[1..];
+            Some(set.1)
+        }
+    }
 }
 
-pub mod regex {
-    pub const UNICODE_CLASSES: &'static [(&'static str, &'static [(char, char)])] = &[
-        ("Ahom", super::script::Ahom_table), ("Alphabetic",
-        super::derived_property::Alphabetic_table), ("Anatolian_Hieroglyphs",
-        super::script::Anatolian_Hieroglyphs_table), ("Arabic",
-        super::script::Arabic_table), ("Armenian",
-        super::script::Armenian_table), ("Avestan",
-        super::script::Avestan_table), ("Balinese",
-        super::script::Balinese_table), ("Bamum", super::script::Bamum_table),
-        ("Bassa_Vah", super::script::Bassa_Vah_table), ("Batak",
-        super::script::Batak_table), ("Bengali", super::script::Bengali_table),
-        ("Bopomofo", super::script::Bopomofo_table), ("Brahmi",
-        super::script::Brahmi_table), ("Braille", super::script::Braille_table),
-        ("Buginese", super::script::Buginese_table), ("Buhid",
-        super::script::Buhid_table), ("C", super::general_category::C_table),
-        ("Canadian_Aboriginal", super::script::Canadian_Aboriginal_table),
-        ("Carian", super::script::Carian_table), ("Caucasian_Albanian",
-        super::script::Caucasian_Albanian_table), ("Cc",
-        super::general_category::Cc_table), ("Cf",
-        super::general_category::Cf_table), ("Chakma",
-        super::script::Chakma_table), ("Cham", super::script::Cham_table),
-        ("Cherokee", super::script::Cherokee_table), ("Cn",
-        super::general_category::Cn_table), ("Co",
-        super::general_category::Co_table), ("Common",
-        super::script::Common_table), ("Coptic", super::script::Coptic_table),
-        ("Cuneiform", super::script::Cuneiform_table), ("Cypriot",
-        super::script::Cypriot_table), ("Cyrillic",
-        super::script::Cyrillic_table), ("Default_Ignorable_Code_Point",
-        super::derived_property::Default_Ignorable_Code_Point_table),
-        ("Deseret", super::script::Deseret_table), ("Devanagari",
-        super::script::Devanagari_table), ("Duployan",
-        super::script::Duployan_table), ("Egyptian_Hieroglyphs",
-        super::script::Egyptian_Hieroglyphs_table), ("Elbasan",
-        super::script::Elbasan_table), ("Ethiopic",
-        super::script::Ethiopic_table), ("Georgian",
-        super::script::Georgian_table), ("Glagolitic",
-        super::script::Glagolitic_table), ("Gothic",
-        super::script::Gothic_table), ("Grantha", super::script::Grantha_table),
-        ("Grapheme_Extend", super::derived_property::Grapheme_Extend_table),
-        ("Greek", super::script::Greek_table), ("Gujarati",
-        super::script::Gujarati_table), ("Gurmukhi",
-        super::script::Gurmukhi_table), ("Han", super::script::Han_table),
-        ("Hangul", super::script::Hangul_table), ("Hanunoo",
-        super::script::Hanunoo_table), ("Hatran", super::script::Hatran_table),
-        ("Hebrew", super::script::Hebrew_table), ("Hiragana",
-        super::script::Hiragana_table), ("Imperial_Aramaic",
-        super::script::Imperial_Aramaic_table), ("Inherited",
-        super::script::Inherited_table), ("Inscriptional_Pahlavi",
-        super::script::Inscriptional_Pahlavi_table), ("Inscriptional_Parthian",
-        super::script::Inscriptional_Parthian_table), ("Javanese",
-        super::script::Javanese_table), ("Join_Control",
-        super::property::Join_Control_table), ("Kaithi",
-        super::script::Kaithi_table), ("Kannada", super::script::Kannada_table),
-        ("Katakana", super::script::Katakana_table), ("Kayah_Li",
-        super::script::Kayah_Li_table), ("Kharoshthi",
-        super::script::Kharoshthi_table), ("Khmer", super::script::Khmer_table),
-        ("Khojki", super::script::Khojki_table), ("Khudawadi",
-        super::script::Khudawadi_table), ("L",
-        super::general_category::L_table), ("LC",
-        super::general_category::LC_table), ("Lao", super::script::Lao_table),
-        ("Latin", super::script::Latin_table), ("Lepcha",
-        super::script::Lepcha_table), ("Limbu", super::script::Limbu_table),
-        ("Linear_A", super::script::Linear_A_table), ("Linear_B",
-        super::script::Linear_B_table), ("Lisu", super::script::Lisu_table),
-        ("Ll", super::general_category::Ll_table), ("Lm",
-        super::general_category::Lm_table), ("Lo",
-        super::general_category::Lo_table), ("Lowercase",
-        super::derived_property::Lowercase_table), ("Lt",
-        super::general_category::Lt_table), ("Lu",
-        super::general_category::Lu_table), ("Lycian",
-        super::script::Lycian_table), ("Lydian", super::script::Lydian_table),
-        ("M", super::general_category::M_table), ("Mahajani",
-        super::script::Mahajani_table), ("Malayalam",
-        super::script::Malayalam_table), ("Mandaic",
-        super::script::Mandaic_table), ("Manichaean",
-        super::script::Manichaean_table), ("Mc",
-        super::general_category::Mc_table), ("Me",
-        super::general_category::Me_table), ("Meetei_Mayek",
-        super::script::Meetei_Mayek_table), ("Mende_Kikakui",
-        super::script::Mende_Kikakui_table), ("Meroitic_Cursive",
-        super::script::Meroitic_Cursive_table), ("Meroitic_Hieroglyphs",
-        super::script::Meroitic_Hieroglyphs_table), ("Miao",
-        super::script::Miao_table), ("Mn", super::general_category::Mn_table),
-        ("Modi", super::script::Modi_table), ("Mongolian",
-        super::script::Mongolian_table), ("Mro", super::script::Mro_table),
-        ("Multani", super::script::Multani_table), ("Myanmar",
-        super::script::Myanmar_table), ("N", super::general_category::N_table),
-        ("Nabataean", super::script::Nabataean_table), ("Nd",
-        super::general_category::Nd_table), ("New_Tai_Lue",
-        super::script::New_Tai_Lue_table), ("Nko", super::script::Nko_table),
-        ("Nl", super::general_category::Nl_table), ("No",
-        super::general_category::No_table), ("Noncharacter_Code_Point",
-        super::property::Noncharacter_Code_Point_table), ("Ogham",
-        super::script::Ogham_table), ("Ol_Chiki",
-        super::script::Ol_Chiki_table), ("Old_Hungarian",
-        super::script::Old_Hungarian_table), ("Old_Italic",
-        super::script::Old_Italic_table), ("Old_North_Arabian",
-        super::script::Old_North_Arabian_table), ("Old_Permic",
-        super::script::Old_Permic_table), ("Old_Persian",
-        super::script::Old_Persian_table), ("Old_South_Arabian",
-        super::script::Old_South_Arabian_table), ("Old_Turkic",
-        super::script::Old_Turkic_table), ("Oriya", super::script::Oriya_table),
-        ("Osmanya", super::script::Osmanya_table), ("P",
-        super::general_category::P_table), ("Pahawh_Hmong",
-        super::script::Pahawh_Hmong_table), ("Palmyrene",
-        super::script::Palmyrene_table), ("Pau_Cin_Hau",
-        super::script::Pau_Cin_Hau_table), ("Pc",
-        super::general_category::Pc_table), ("Pd",
-        super::general_category::Pd_table), ("Pe",
-        super::general_category::Pe_table), ("Pf",
-        super::general_category::Pf_table), ("Phags_Pa",
-        super::script::Phags_Pa_table), ("Phoenician",
-        super::script::Phoenician_table), ("Pi",
-        super::general_category::Pi_table), ("Po",
-        super::general_category::Po_table), ("Ps",
-        super::general_category::Ps_table), ("Psalter_Pahlavi",
-        super::script::Psalter_Pahlavi_table), ("Rejang",
-        super::script::Rejang_table), ("Runic", super::script::Runic_table),
-        ("S", super::general_category::S_table), ("Samaritan",
-        super::script::Samaritan_table), ("Saurashtra",
-        super::script::Saurashtra_table), ("Sc",
-        super::general_category::Sc_table), ("Sharada",
-        super::script::Sharada_table), ("Shavian",
-        super::script::Shavian_table), ("Siddham",
-        super::script::Siddham_table), ("SignWriting",
-        super::script::SignWriting_table), ("Sinhala",
-        super::script::Sinhala_table), ("Sk",
-        super::general_category::Sk_table), ("Sm",
-        super::general_category::Sm_table), ("So",
-        super::general_category::So_table), ("Sora_Sompeng",
-        super::script::Sora_Sompeng_table), ("Sundanese",
-        super::script::Sundanese_table), ("Syloti_Nagri",
-        super::script::Syloti_Nagri_table), ("Syriac",
-        super::script::Syriac_table), ("Tagalog", super::script::Tagalog_table),
-        ("Tagbanwa", super::script::Tagbanwa_table), ("Tai_Le",
-        super::script::Tai_Le_table), ("Tai_Tham",
-        super::script::Tai_Tham_table), ("Tai_Viet",
-        super::script::Tai_Viet_table), ("Takri", super::script::Takri_table),
-        ("Tamil", super::script::Tamil_table), ("Telugu",
-        super::script::Telugu_table), ("Thaana", super::script::Thaana_table),
-        ("Thai", super::script::Thai_table), ("Tibetan",
-        super::script::Tibetan_table), ("Tifinagh",
-        super::script::Tifinagh_table), ("Tirhuta",
-        super::script::Tirhuta_table), ("Ugaritic",
-        super::script::Ugaritic_table), ("Uppercase",
-        super::derived_property::Uppercase_table), ("Vai",
-        super::script::Vai_table), ("Warang_Citi",
-        super::script::Warang_Citi_table), ("White_Space",
-        super::property::White_Space_table), ("XID_Continue",
-        super::derived_property::XID_Continue_table), ("XID_Start",
-        super::derived_property::XID_Start_table), ("Yi",
-        super::script::Yi_table), ("Z", super::general_category::Z_table),
-        ("Zl", super::general_category::Zl_table), ("Zp",
-        super::general_category::Zp_table), ("Zs",
-        super::general_category::Zs_table)
-    ];
+#[cfg(test)]
+mod tests {
+    use super::{contains_simple_case_mapping, simple_fold};
 
-    pub const PERLD: &'static [(char, char)] = super::general_category::Nd_table;
+    #[test]
+    fn simple_fold_k() {
+        let xs: Vec<char> = simple_fold('k').unwrap().collect();
+        assert_eq!(xs, vec!['K', 'K']);
 
-    pub const PERLS: &'static [(char, char)] = super::property::White_Space_table;
+        let xs: Vec<char> = simple_fold('K').unwrap().collect();
+        assert_eq!(xs, vec!['k', 'K']);
 
-    pub const PERLW: &'static [(char, char)] = &[
-        ('\u{30}', '\u{39}'), ('\u{41}', '\u{5a}'), ('\u{5f}', '\u{5f}'),
-        ('\u{61}', '\u{7a}'), ('\u{aa}', '\u{aa}'), ('\u{b5}', '\u{b5}'),
-        ('\u{ba}', '\u{ba}'), ('\u{c0}', '\u{d6}'), ('\u{d8}', '\u{f6}'),
-        ('\u{f8}', '\u{2c1}'), ('\u{2c6}', '\u{2d1}'), ('\u{2e0}', '\u{2e4}'),
-        ('\u{2ec}', '\u{2ec}'), ('\u{2ee}', '\u{2ee}'), ('\u{300}', '\u{374}'),
-        ('\u{376}', '\u{377}'), ('\u{37a}', '\u{37d}'), ('\u{37f}', '\u{37f}'),
-        ('\u{386}', '\u{386}'), ('\u{388}', '\u{38a}'), ('\u{38c}', '\u{38c}'),
-        ('\u{38e}', '\u{3a1}'), ('\u{3a3}', '\u{3f5}'), ('\u{3f7}', '\u{481}'),
-        ('\u{483}', '\u{52f}'), ('\u{531}', '\u{556}'), ('\u{559}', '\u{559}'),
-        ('\u{561}', '\u{587}'), ('\u{591}', '\u{5bd}'), ('\u{5bf}', '\u{5bf}'),
-        ('\u{5c1}', '\u{5c2}'), ('\u{5c4}', '\u{5c5}'), ('\u{5c7}', '\u{5c7}'),
-        ('\u{5d0}', '\u{5ea}'), ('\u{5f0}', '\u{5f2}'), ('\u{610}', '\u{61a}'),
-        ('\u{620}', '\u{669}'), ('\u{66e}', '\u{6d3}'), ('\u{6d5}', '\u{6dc}'),
-        ('\u{6df}', '\u{6e8}'), ('\u{6ea}', '\u{6fc}'), ('\u{6ff}', '\u{6ff}'),
-        ('\u{710}', '\u{74a}'), ('\u{74d}', '\u{7b1}'), ('\u{7c0}', '\u{7f5}'),
-        ('\u{7fa}', '\u{7fa}'), ('\u{800}', '\u{82d}'), ('\u{840}', '\u{85b}'),
-        ('\u{8a0}', '\u{8b4}'), ('\u{8e3}', '\u{963}'), ('\u{966}', '\u{96f}'),
-        ('\u{971}', '\u{983}'), ('\u{985}', '\u{98c}'), ('\u{98f}', '\u{990}'),
-        ('\u{993}', '\u{9a8}'), ('\u{9aa}', '\u{9b0}'), ('\u{9b2}', '\u{9b2}'),
-        ('\u{9b6}', '\u{9b9}'), ('\u{9bc}', '\u{9c4}'), ('\u{9c7}', '\u{9c8}'),
-        ('\u{9cb}', '\u{9ce}'), ('\u{9d7}', '\u{9d7}'), ('\u{9dc}', '\u{9dd}'),
-        ('\u{9df}', '\u{9e3}'), ('\u{9e6}', '\u{9f1}'), ('\u{a01}', '\u{a03}'),
-        ('\u{a05}', '\u{a0a}'), ('\u{a0f}', '\u{a10}'), ('\u{a13}', '\u{a28}'),
-        ('\u{a2a}', '\u{a30}'), ('\u{a32}', '\u{a33}'), ('\u{a35}', '\u{a36}'),
-        ('\u{a38}', '\u{a39}'), ('\u{a3c}', '\u{a3c}'), ('\u{a3e}', '\u{a42}'),
-        ('\u{a47}', '\u{a48}'), ('\u{a4b}', '\u{a4d}'), ('\u{a51}', '\u{a51}'),
-        ('\u{a59}', '\u{a5c}'), ('\u{a5e}', '\u{a5e}'), ('\u{a66}', '\u{a75}'),
-        ('\u{a81}', '\u{a83}'), ('\u{a85}', '\u{a8d}'), ('\u{a8f}', '\u{a91}'),
-        ('\u{a93}', '\u{aa8}'), ('\u{aaa}', '\u{ab0}'), ('\u{ab2}', '\u{ab3}'),
-        ('\u{ab5}', '\u{ab9}'), ('\u{abc}', '\u{ac5}'), ('\u{ac7}', '\u{ac9}'),
-        ('\u{acb}', '\u{acd}'), ('\u{ad0}', '\u{ad0}'), ('\u{ae0}', '\u{ae3}'),
-        ('\u{ae6}', '\u{aef}'), ('\u{af9}', '\u{af9}'), ('\u{b01}', '\u{b03}'),
-        ('\u{b05}', '\u{b0c}'), ('\u{b0f}', '\u{b10}'), ('\u{b13}', '\u{b28}'),
-        ('\u{b2a}', '\u{b30}'), ('\u{b32}', '\u{b33}'), ('\u{b35}', '\u{b39}'),
-        ('\u{b3c}', '\u{b44}'), ('\u{b47}', '\u{b48}'), ('\u{b4b}', '\u{b4d}'),
-        ('\u{b56}', '\u{b57}'), ('\u{b5c}', '\u{b5d}'), ('\u{b5f}', '\u{b63}'),
-        ('\u{b66}', '\u{b6f}'), ('\u{b71}', '\u{b71}'), ('\u{b82}', '\u{b83}'),
-        ('\u{b85}', '\u{b8a}'), ('\u{b8e}', '\u{b90}'), ('\u{b92}', '\u{b95}'),
-        ('\u{b99}', '\u{b9a}'), ('\u{b9c}', '\u{b9c}'), ('\u{b9e}', '\u{b9f}'),
-        ('\u{ba3}', '\u{ba4}'), ('\u{ba8}', '\u{baa}'), ('\u{bae}', '\u{bb9}'),
-        ('\u{bbe}', '\u{bc2}'), ('\u{bc6}', '\u{bc8}'), ('\u{bca}', '\u{bcd}'),
-        ('\u{bd0}', '\u{bd0}'), ('\u{bd7}', '\u{bd7}'), ('\u{be6}', '\u{bef}'),
-        ('\u{c00}', '\u{c03}'), ('\u{c05}', '\u{c0c}'), ('\u{c0e}', '\u{c10}'),
-        ('\u{c12}', '\u{c28}'), ('\u{c2a}', '\u{c39}'), ('\u{c3d}', '\u{c44}'),
-        ('\u{c46}', '\u{c48}'), ('\u{c4a}', '\u{c4d}'), ('\u{c55}', '\u{c56}'),
-        ('\u{c58}', '\u{c5a}'), ('\u{c60}', '\u{c63}'), ('\u{c66}', '\u{c6f}'),
-        ('\u{c81}', '\u{c83}'), ('\u{c85}', '\u{c8c}'), ('\u{c8e}', '\u{c90}'),
-        ('\u{c92}', '\u{ca8}'), ('\u{caa}', '\u{cb3}'), ('\u{cb5}', '\u{cb9}'),
-        ('\u{cbc}', '\u{cc4}'), ('\u{cc6}', '\u{cc8}'), ('\u{cca}', '\u{ccd}'),
-        ('\u{cd5}', '\u{cd6}'), ('\u{cde}', '\u{cde}'), ('\u{ce0}', '\u{ce3}'),
-        ('\u{ce6}', '\u{cef}'), ('\u{cf1}', '\u{cf2}'), ('\u{d01}', '\u{d03}'),
-        ('\u{d05}', '\u{d0c}'), ('\u{d0e}', '\u{d10}'), ('\u{d12}', '\u{d3a}'),
-        ('\u{d3d}', '\u{d44}'), ('\u{d46}', '\u{d48}'), ('\u{d4a}', '\u{d4e}'),
-        ('\u{d57}', '\u{d57}'), ('\u{d5f}', '\u{d63}'), ('\u{d66}', '\u{d6f}'),
-        ('\u{d7a}', '\u{d7f}'), ('\u{d82}', '\u{d83}'), ('\u{d85}', '\u{d96}'),
-        ('\u{d9a}', '\u{db1}'), ('\u{db3}', '\u{dbb}'), ('\u{dbd}', '\u{dbd}'),
-        ('\u{dc0}', '\u{dc6}'), ('\u{dca}', '\u{dca}'), ('\u{dcf}', '\u{dd4}'),
-        ('\u{dd6}', '\u{dd6}'), ('\u{dd8}', '\u{ddf}'), ('\u{de6}', '\u{def}'),
-        ('\u{df2}', '\u{df3}'), ('\u{e01}', '\u{e3a}'), ('\u{e40}', '\u{e4e}'),
-        ('\u{e50}', '\u{e59}'), ('\u{e81}', '\u{e82}'), ('\u{e84}', '\u{e84}'),
-        ('\u{e87}', '\u{e88}'), ('\u{e8a}', '\u{e8a}'), ('\u{e8d}', '\u{e8d}'),
-        ('\u{e94}', '\u{e97}'), ('\u{e99}', '\u{e9f}'), ('\u{ea1}', '\u{ea3}'),
-        ('\u{ea5}', '\u{ea5}'), ('\u{ea7}', '\u{ea7}'), ('\u{eaa}', '\u{eab}'),
-        ('\u{ead}', '\u{eb9}'), ('\u{ebb}', '\u{ebd}'), ('\u{ec0}', '\u{ec4}'),
-        ('\u{ec6}', '\u{ec6}'), ('\u{ec8}', '\u{ecd}'), ('\u{ed0}', '\u{ed9}'),
-        ('\u{edc}', '\u{edf}'), ('\u{f00}', '\u{f00}'), ('\u{f18}', '\u{f19}'),
-        ('\u{f20}', '\u{f29}'), ('\u{f35}', '\u{f35}'), ('\u{f37}', '\u{f37}'),
-        ('\u{f39}', '\u{f39}'), ('\u{f3e}', '\u{f47}'), ('\u{f49}', '\u{f6c}'),
-        ('\u{f71}', '\u{f84}'), ('\u{f86}', '\u{f97}'), ('\u{f99}', '\u{fbc}'),
-        ('\u{fc6}', '\u{fc6}'), ('\u{1000}', '\u{1049}'), ('\u{1050}',
-        '\u{109d}'), ('\u{10a0}', '\u{10c5}'), ('\u{10c7}', '\u{10c7}'),
-        ('\u{10cd}', '\u{10cd}'), ('\u{10d0}', '\u{10fa}'), ('\u{10fc}',
-        '\u{1248}'), ('\u{124a}', '\u{124d}'), ('\u{1250}', '\u{1256}'),
-        ('\u{1258}', '\u{1258}'), ('\u{125a}', '\u{125d}'), ('\u{1260}',
-        '\u{1288}'), ('\u{128a}', '\u{128d}'), ('\u{1290}', '\u{12b0}'),
-        ('\u{12b2}', '\u{12b5}'), ('\u{12b8}', '\u{12be}'), ('\u{12c0}',
-        '\u{12c0}'), ('\u{12c2}', '\u{12c5}'), ('\u{12c8}', '\u{12d6}'),
-        ('\u{12d8}', '\u{1310}'), ('\u{1312}', '\u{1315}'), ('\u{1318}',
-        '\u{135a}'), ('\u{135d}', '\u{135f}'), ('\u{1380}', '\u{138f}'),
-        ('\u{13a0}', '\u{13f5}'), ('\u{13f8}', '\u{13fd}'), ('\u{1401}',
-        '\u{166c}'), ('\u{166f}', '\u{167f}'), ('\u{1681}', '\u{169a}'),
-        ('\u{16a0}', '\u{16ea}'), ('\u{16ee}', '\u{16f8}'), ('\u{1700}',
-        '\u{170c}'), ('\u{170e}', '\u{1714}'), ('\u{1720}', '\u{1734}'),
-        ('\u{1740}', '\u{1753}'), ('\u{1760}', '\u{176c}'), ('\u{176e}',
-        '\u{1770}'), ('\u{1772}', '\u{1773}'), ('\u{1780}', '\u{17d3}'),
-        ('\u{17d7}', '\u{17d7}'), ('\u{17dc}', '\u{17dd}'), ('\u{17e0}',
-        '\u{17e9}'), ('\u{180b}', '\u{180d}'), ('\u{1810}', '\u{1819}'),
-        ('\u{1820}', '\u{1877}'), ('\u{1880}', '\u{18aa}'), ('\u{18b0}',
-        '\u{18f5}'), ('\u{1900}', '\u{191e}'), ('\u{1920}', '\u{192b}'),
-        ('\u{1930}', '\u{193b}'), ('\u{1946}', '\u{196d}'), ('\u{1970}',
-        '\u{1974}'), ('\u{1980}', '\u{19ab}'), ('\u{19b0}', '\u{19c9}'),
-        ('\u{19d0}', '\u{19d9}'), ('\u{1a00}', '\u{1a1b}'), ('\u{1a20}',
-        '\u{1a5e}'), ('\u{1a60}', '\u{1a7c}'), ('\u{1a7f}', '\u{1a89}'),
-        ('\u{1a90}', '\u{1a99}'), ('\u{1aa7}', '\u{1aa7}'), ('\u{1ab0}',
-        '\u{1abe}'), ('\u{1b00}', '\u{1b4b}'), ('\u{1b50}', '\u{1b59}'),
-        ('\u{1b6b}', '\u{1b73}'), ('\u{1b80}', '\u{1bf3}'), ('\u{1c00}',
-        '\u{1c37}'), ('\u{1c40}', '\u{1c49}'), ('\u{1c4d}', '\u{1c7d}'),
-        ('\u{1cd0}', '\u{1cd2}'), ('\u{1cd4}', '\u{1cf6}'), ('\u{1cf8}',
-        '\u{1cf9}'), ('\u{1d00}', '\u{1df5}'), ('\u{1dfc}', '\u{1f15}'),
-        ('\u{1f18}', '\u{1f1d}'), ('\u{1f20}', '\u{1f45}'), ('\u{1f48}',
-        '\u{1f4d}'), ('\u{1f50}', '\u{1f57}'), ('\u{1f59}', '\u{1f59}'),
-        ('\u{1f5b}', '\u{1f5b}'), ('\u{1f5d}', '\u{1f5d}'), ('\u{1f5f}',
-        '\u{1f7d}'), ('\u{1f80}', '\u{1fb4}'), ('\u{1fb6}', '\u{1fbc}'),
-        ('\u{1fbe}', '\u{1fbe}'), ('\u{1fc2}', '\u{1fc4}'), ('\u{1fc6}',
-        '\u{1fcc}'), ('\u{1fd0}', '\u{1fd3}'), ('\u{1fd6}', '\u{1fdb}'),
-        ('\u{1fe0}', '\u{1fec}'), ('\u{1ff2}', '\u{1ff4}'), ('\u{1ff6}',
-        '\u{1ffc}'), ('\u{200c}', '\u{200d}'), ('\u{203f}', '\u{2040}'),
-        ('\u{2054}', '\u{2054}'), ('\u{2071}', '\u{2071}'), ('\u{207f}',
-        '\u{207f}'), ('\u{2090}', '\u{209c}'), ('\u{20d0}', '\u{20f0}'),
-        ('\u{2102}', '\u{2102}'), ('\u{2107}', '\u{2107}'), ('\u{210a}',
-        '\u{2113}'), ('\u{2115}', '\u{2115}'), ('\u{2119}', '\u{211d}'),
-        ('\u{2124}', '\u{2124}'), ('\u{2126}', '\u{2126}'), ('\u{2128}',
-        '\u{2128}'), ('\u{212a}', '\u{212d}'), ('\u{212f}', '\u{2139}'),
-        ('\u{213c}', '\u{213f}'), ('\u{2145}', '\u{2149}'), ('\u{214e}',
-        '\u{214e}'), ('\u{2160}', '\u{2188}'), ('\u{24b6}', '\u{24e9}'),
-        ('\u{2c00}', '\u{2c2e}'), ('\u{2c30}', '\u{2c5e}'), ('\u{2c60}',
-        '\u{2ce4}'), ('\u{2ceb}', '\u{2cf3}'), ('\u{2d00}', '\u{2d25}'),
-        ('\u{2d27}', '\u{2d27}'), ('\u{2d2d}', '\u{2d2d}'), ('\u{2d30}',
-        '\u{2d67}'), ('\u{2d6f}', '\u{2d6f}'), ('\u{2d7f}', '\u{2d96}'),
-        ('\u{2da0}', '\u{2da6}'), ('\u{2da8}', '\u{2dae}'), ('\u{2db0}',
-        '\u{2db6}'), ('\u{2db8}', '\u{2dbe}'), ('\u{2dc0}', '\u{2dc6}'),
-        ('\u{2dc8}', '\u{2dce}'), ('\u{2dd0}', '\u{2dd6}'), ('\u{2dd8}',
-        '\u{2dde}'), ('\u{2de0}', '\u{2dff}'), ('\u{2e2f}', '\u{2e2f}'),
-        ('\u{3005}', '\u{3007}'), ('\u{3021}', '\u{302f}'), ('\u{3031}',
-        '\u{3035}'), ('\u{3038}', '\u{303c}'), ('\u{3041}', '\u{3096}'),
-        ('\u{3099}', '\u{309a}'), ('\u{309d}', '\u{309f}'), ('\u{30a1}',
-        '\u{30fa}'), ('\u{30fc}', '\u{30ff}'), ('\u{3105}', '\u{312d}'),
-        ('\u{3131}', '\u{318e}'), ('\u{31a0}', '\u{31ba}'), ('\u{31f0}',
-        '\u{31ff}'), ('\u{3400}', '\u{4db5}'), ('\u{4e00}', '\u{9fd5}'),
-        ('\u{a000}', '\u{a48c}'), ('\u{a4d0}', '\u{a4fd}'), ('\u{a500}',
-        '\u{a60c}'), ('\u{a610}', '\u{a62b}'), ('\u{a640}', '\u{a672}'),
-        ('\u{a674}', '\u{a67d}'), ('\u{a67f}', '\u{a6f1}'), ('\u{a717}',
-        '\u{a71f}'), ('\u{a722}', '\u{a788}'), ('\u{a78b}', '\u{a7ad}'),
-        ('\u{a7b0}', '\u{a7b7}'), ('\u{a7f7}', '\u{a827}'), ('\u{a840}',
-        '\u{a873}'), ('\u{a880}', '\u{a8c4}'), ('\u{a8d0}', '\u{a8d9}'),
-        ('\u{a8e0}', '\u{a8f7}'), ('\u{a8fb}', '\u{a8fb}'), ('\u{a8fd}',
-        '\u{a8fd}'), ('\u{a900}', '\u{a92d}'), ('\u{a930}', '\u{a953}'),
-        ('\u{a960}', '\u{a97c}'), ('\u{a980}', '\u{a9c0}'), ('\u{a9cf}',
-        '\u{a9d9}'), ('\u{a9e0}', '\u{a9fe}'), ('\u{aa00}', '\u{aa36}'),
-        ('\u{aa40}', '\u{aa4d}'), ('\u{aa50}', '\u{aa59}'), ('\u{aa60}',
-        '\u{aa76}'), ('\u{aa7a}', '\u{aac2}'), ('\u{aadb}', '\u{aadd}'),
-        ('\u{aae0}', '\u{aaef}'), ('\u{aaf2}', '\u{aaf6}'), ('\u{ab01}',
-        '\u{ab06}'), ('\u{ab09}', '\u{ab0e}'), ('\u{ab11}', '\u{ab16}'),
-        ('\u{ab20}', '\u{ab26}'), ('\u{ab28}', '\u{ab2e}'), ('\u{ab30}',
-        '\u{ab5a}'), ('\u{ab5c}', '\u{ab65}'), ('\u{ab70}', '\u{abea}'),
-        ('\u{abec}', '\u{abed}'), ('\u{abf0}', '\u{abf9}'), ('\u{ac00}',
-        '\u{d7a3}'), ('\u{d7b0}', '\u{d7c6}'), ('\u{d7cb}', '\u{d7fb}'),
-        ('\u{f900}', '\u{fa6d}'), ('\u{fa70}', '\u{fad9}'), ('\u{fb00}',
-        '\u{fb06}'), ('\u{fb13}', '\u{fb17}'), ('\u{fb1d}', '\u{fb28}'),
-        ('\u{fb2a}', '\u{fb36}'), ('\u{fb38}', '\u{fb3c}'), ('\u{fb3e}',
-        '\u{fb3e}'), ('\u{fb40}', '\u{fb41}'), ('\u{fb43}', '\u{fb44}'),
-        ('\u{fb46}', '\u{fbb1}'), ('\u{fbd3}', '\u{fd3d}'), ('\u{fd50}',
-        '\u{fd8f}'), ('\u{fd92}', '\u{fdc7}'), ('\u{fdf0}', '\u{fdfb}'),
-        ('\u{fe00}', '\u{fe0f}'), ('\u{fe20}', '\u{fe2f}'), ('\u{fe33}',
-        '\u{fe34}'), ('\u{fe4d}', '\u{fe4f}'), ('\u{fe70}', '\u{fe74}'),
-        ('\u{fe76}', '\u{fefc}'), ('\u{ff10}', '\u{ff19}'), ('\u{ff21}',
-        '\u{ff3a}'), ('\u{ff3f}', '\u{ff3f}'), ('\u{ff41}', '\u{ff5a}'),
-        ('\u{ff66}', '\u{ffbe}'), ('\u{ffc2}', '\u{ffc7}'), ('\u{ffca}',
-        '\u{ffcf}'), ('\u{ffd2}', '\u{ffd7}'), ('\u{ffda}', '\u{ffdc}'),
-        ('\u{10000}', '\u{1000b}'), ('\u{1000d}', '\u{10026}'), ('\u{10028}',
-        '\u{1003a}'), ('\u{1003c}', '\u{1003d}'), ('\u{1003f}', '\u{1004d}'),
-        ('\u{10050}', '\u{1005d}'), ('\u{10080}', '\u{100fa}'), ('\u{10140}',
-        '\u{10174}'), ('\u{101fd}', '\u{101fd}'), ('\u{10280}', '\u{1029c}'),
-        ('\u{102a0}', '\u{102d0}'), ('\u{102e0}', '\u{102e0}'), ('\u{10300}',
-        '\u{1031f}'), ('\u{10330}', '\u{1034a}'), ('\u{10350}', '\u{1037a}'),
-        ('\u{10380}', '\u{1039d}'), ('\u{103a0}', '\u{103c3}'), ('\u{103c8}',
-        '\u{103cf}'), ('\u{103d1}', '\u{103d5}'), ('\u{10400}', '\u{1049d}'),
-        ('\u{104a0}', '\u{104a9}'), ('\u{10500}', '\u{10527}'), ('\u{10530}',
-        '\u{10563}'), ('\u{10600}', '\u{10736}'), ('\u{10740}', '\u{10755}'),
-        ('\u{10760}', '\u{10767}'), ('\u{10800}', '\u{10805}'), ('\u{10808}',
-        '\u{10808}'), ('\u{1080a}', '\u{10835}'), ('\u{10837}', '\u{10838}'),
-        ('\u{1083c}', '\u{1083c}'), ('\u{1083f}', '\u{10855}'), ('\u{10860}',
-        '\u{10876}'), ('\u{10880}', '\u{1089e}'), ('\u{108e0}', '\u{108f2}'),
-        ('\u{108f4}', '\u{108f5}'), ('\u{10900}', '\u{10915}'), ('\u{10920}',
-        '\u{10939}'), ('\u{10980}', '\u{109b7}'), ('\u{109be}', '\u{109bf}'),
-        ('\u{10a00}', '\u{10a03}'), ('\u{10a05}', '\u{10a06}'), ('\u{10a0c}',
-        '\u{10a13}'), ('\u{10a15}', '\u{10a17}'), ('\u{10a19}', '\u{10a33}'),
-        ('\u{10a38}', '\u{10a3a}'), ('\u{10a3f}', '\u{10a3f}'), ('\u{10a60}',
-        '\u{10a7c}'), ('\u{10a80}', '\u{10a9c}'), ('\u{10ac0}', '\u{10ac7}'),
-        ('\u{10ac9}', '\u{10ae6}'), ('\u{10b00}', '\u{10b35}'), ('\u{10b40}',
-        '\u{10b55}'), ('\u{10b60}', '\u{10b72}'), ('\u{10b80}', '\u{10b91}'),
-        ('\u{10c00}', '\u{10c48}'), ('\u{10c80}', '\u{10cb2}'), ('\u{10cc0}',
-        '\u{10cf2}'), ('\u{11000}', '\u{11046}'), ('\u{11066}', '\u{1106f}'),
-        ('\u{1107f}', '\u{110ba}'), ('\u{110d0}', '\u{110e8}'), ('\u{110f0}',
-        '\u{110f9}'), ('\u{11100}', '\u{11134}'), ('\u{11136}', '\u{1113f}'),
-        ('\u{11150}', '\u{11173}'), ('\u{11176}', '\u{11176}'), ('\u{11180}',
-        '\u{111c4}'), ('\u{111ca}', '\u{111cc}'), ('\u{111d0}', '\u{111da}'),
-        ('\u{111dc}', '\u{111dc}'), ('\u{11200}', '\u{11211}'), ('\u{11213}',
-        '\u{11237}'), ('\u{11280}', '\u{11286}'), ('\u{11288}', '\u{11288}'),
-        ('\u{1128a}', '\u{1128d}'), ('\u{1128f}', '\u{1129d}'), ('\u{1129f}',
-        '\u{112a8}'), ('\u{112b0}', '\u{112ea}'), ('\u{112f0}', '\u{112f9}'),
-        ('\u{11300}', '\u{11303}'), ('\u{11305}', '\u{1130c}'), ('\u{1130f}',
-        '\u{11310}'), ('\u{11313}', '\u{11328}'), ('\u{1132a}', '\u{11330}'),
-        ('\u{11332}', '\u{11333}'), ('\u{11335}', '\u{11339}'), ('\u{1133c}',
-        '\u{11344}'), ('\u{11347}', '\u{11348}'), ('\u{1134b}', '\u{1134d}'),
-        ('\u{11350}', '\u{11350}'), ('\u{11357}', '\u{11357}'), ('\u{1135d}',
-        '\u{11363}'), ('\u{11366}', '\u{1136c}'), ('\u{11370}', '\u{11374}'),
-        ('\u{11480}', '\u{114c5}'), ('\u{114c7}', '\u{114c7}'), ('\u{114d0}',
-        '\u{114d9}'), ('\u{11580}', '\u{115b5}'), ('\u{115b8}', '\u{115c0}'),
-        ('\u{115d8}', '\u{115dd}'), ('\u{11600}', '\u{11640}'), ('\u{11644}',
-        '\u{11644}'), ('\u{11650}', '\u{11659}'), ('\u{11680}', '\u{116b7}'),
-        ('\u{116c0}', '\u{116c9}'), ('\u{11700}', '\u{11719}'), ('\u{1171d}',
-        '\u{1172b}'), ('\u{11730}', '\u{11739}'), ('\u{118a0}', '\u{118e9}'),
-        ('\u{118ff}', '\u{118ff}'), ('\u{11ac0}', '\u{11af8}'), ('\u{12000}',
-        '\u{12399}'), ('\u{12400}', '\u{1246e}'), ('\u{12480}', '\u{12543}'),
-        ('\u{13000}', '\u{1342e}'), ('\u{14400}', '\u{14646}'), ('\u{16800}',
-        '\u{16a38}'), ('\u{16a40}', '\u{16a5e}'), ('\u{16a60}', '\u{16a69}'),
-        ('\u{16ad0}', '\u{16aed}'), ('\u{16af0}', '\u{16af4}'), ('\u{16b00}',
-        '\u{16b36}'), ('\u{16b40}', '\u{16b43}'), ('\u{16b50}', '\u{16b59}'),
-        ('\u{16b63}', '\u{16b77}'), ('\u{16b7d}', '\u{16b8f}'), ('\u{16f00}',
-        '\u{16f44}'), ('\u{16f50}', '\u{16f7e}'), ('\u{16f8f}', '\u{16f9f}'),
-        ('\u{1b000}', '\u{1b001}'), ('\u{1bc00}', '\u{1bc6a}'), ('\u{1bc70}',
-        '\u{1bc7c}'), ('\u{1bc80}', '\u{1bc88}'), ('\u{1bc90}', '\u{1bc99}'),
-        ('\u{1bc9d}', '\u{1bc9e}'), ('\u{1d165}', '\u{1d169}'), ('\u{1d16d}',
-        '\u{1d172}'), ('\u{1d17b}', '\u{1d182}'), ('\u{1d185}', '\u{1d18b}'),
-        ('\u{1d1aa}', '\u{1d1ad}'), ('\u{1d242}', '\u{1d244}'), ('\u{1d400}',
-        '\u{1d454}'), ('\u{1d456}', '\u{1d49c}'), ('\u{1d49e}', '\u{1d49f}'),
-        ('\u{1d4a2}', '\u{1d4a2}'), ('\u{1d4a5}', '\u{1d4a6}'), ('\u{1d4a9}',
-        '\u{1d4ac}'), ('\u{1d4ae}', '\u{1d4b9}'), ('\u{1d4bb}', '\u{1d4bb}'),
-        ('\u{1d4bd}', '\u{1d4c3}'), ('\u{1d4c5}', '\u{1d505}'), ('\u{1d507}',
-        '\u{1d50a}'), ('\u{1d50d}', '\u{1d514}'), ('\u{1d516}', '\u{1d51c}'),
-        ('\u{1d51e}', '\u{1d539}'), ('\u{1d53b}', '\u{1d53e}'), ('\u{1d540}',
-        '\u{1d544}'), ('\u{1d546}', '\u{1d546}'), ('\u{1d54a}', '\u{1d550}'),
-        ('\u{1d552}', '\u{1d6a5}'), ('\u{1d6a8}', '\u{1d6c0}'), ('\u{1d6c2}',
-        '\u{1d6da}'), ('\u{1d6dc}', '\u{1d6fa}'), ('\u{1d6fc}', '\u{1d714}'),
-        ('\u{1d716}', '\u{1d734}'), ('\u{1d736}', '\u{1d74e}'), ('\u{1d750}',
-        '\u{1d76e}'), ('\u{1d770}', '\u{1d788}'), ('\u{1d78a}', '\u{1d7a8}'),
-        ('\u{1d7aa}', '\u{1d7c2}'), ('\u{1d7c4}', '\u{1d7cb}'), ('\u{1d7ce}',
-        '\u{1d7ff}'), ('\u{1da00}', '\u{1da36}'), ('\u{1da3b}', '\u{1da6c}'),
-        ('\u{1da75}', '\u{1da75}'), ('\u{1da84}', '\u{1da84}'), ('\u{1da9b}',
-        '\u{1da9f}'), ('\u{1daa1}', '\u{1daaf}'), ('\u{1e800}', '\u{1e8c4}'),
-        ('\u{1e8d0}', '\u{1e8d6}'), ('\u{1ee00}', '\u{1ee03}'), ('\u{1ee05}',
-        '\u{1ee1f}'), ('\u{1ee21}', '\u{1ee22}'), ('\u{1ee24}', '\u{1ee24}'),
-        ('\u{1ee27}', '\u{1ee27}'), ('\u{1ee29}', '\u{1ee32}'), ('\u{1ee34}',
-        '\u{1ee37}'), ('\u{1ee39}', '\u{1ee39}'), ('\u{1ee3b}', '\u{1ee3b}'),
-        ('\u{1ee42}', '\u{1ee42}'), ('\u{1ee47}', '\u{1ee47}'), ('\u{1ee49}',
-        '\u{1ee49}'), ('\u{1ee4b}', '\u{1ee4b}'), ('\u{1ee4d}', '\u{1ee4f}'),
-        ('\u{1ee51}', '\u{1ee52}'), ('\u{1ee54}', '\u{1ee54}'), ('\u{1ee57}',
-        '\u{1ee57}'), ('\u{1ee59}', '\u{1ee59}'), ('\u{1ee5b}', '\u{1ee5b}'),
-        ('\u{1ee5d}', '\u{1ee5d}'), ('\u{1ee5f}', '\u{1ee5f}'), ('\u{1ee61}',
-        '\u{1ee62}'), ('\u{1ee64}', '\u{1ee64}'), ('\u{1ee67}', '\u{1ee6a}'),
-        ('\u{1ee6c}', '\u{1ee72}'), ('\u{1ee74}', '\u{1ee77}'), ('\u{1ee79}',
-        '\u{1ee7c}'), ('\u{1ee7e}', '\u{1ee7e}'), ('\u{1ee80}', '\u{1ee89}'),
-        ('\u{1ee8b}', '\u{1ee9b}'), ('\u{1eea1}', '\u{1eea3}'), ('\u{1eea5}',
-        '\u{1eea9}'), ('\u{1eeab}', '\u{1eebb}'), ('\u{1f130}', '\u{1f149}'),
-        ('\u{1f150}', '\u{1f169}'), ('\u{1f170}', '\u{1f189}'), ('\u{20000}',
-        '\u{2a6d6}'), ('\u{2a700}', '\u{2b734}'), ('\u{2b740}', '\u{2b81d}'),
-        ('\u{2b820}', '\u{2cea1}'), ('\u{2f800}', '\u{2fa1d}'), ('\u{e0100}',
-        '\u{e01ef}')
-    ];
+        let xs: Vec<char> = simple_fold('K').unwrap().collect();
+        assert_eq!(xs, vec!['K', 'k']);
+    }
 
-}
+    #[test]
+    fn simple_fold_a() {
+        let xs: Vec<char> = simple_fold('a').unwrap().collect();
+        assert_eq!(xs, vec!['A']);
+
+        let xs: Vec<char> = simple_fold('A').unwrap().collect();
+        assert_eq!(xs, vec!['a']);
+    }
 
-pub mod case_folding {
-    pub const C_plus_S_both_table: &'static [(char, char)] = &[
-        ('\u{41}', '\u{61}'), ('\u{42}', '\u{62}'), ('\u{43}', '\u{63}'),
-        ('\u{44}', '\u{64}'), ('\u{45}', '\u{65}'), ('\u{46}', '\u{66}'),
-        ('\u{47}', '\u{67}'), ('\u{48}', '\u{68}'), ('\u{49}', '\u{69}'),
-        ('\u{4a}', '\u{6a}'), ('\u{4b}', '\u{6b}'), ('\u{4b}', '\u{212a}'),
-        ('\u{4c}', '\u{6c}'), ('\u{4d}', '\u{6d}'), ('\u{4e}', '\u{6e}'),
-        ('\u{4f}', '\u{6f}'), ('\u{50}', '\u{70}'), ('\u{51}', '\u{71}'),
-        ('\u{52}', '\u{72}'), ('\u{53}', '\u{73}'), ('\u{53}', '\u{17f}'),
-        ('\u{54}', '\u{74}'), ('\u{55}', '\u{75}'), ('\u{56}', '\u{76}'),
-        ('\u{57}', '\u{77}'), ('\u{58}', '\u{78}'), ('\u{59}', '\u{79}'),
-        ('\u{5a}', '\u{7a}'), ('\u{61}', '\u{41}'), ('\u{62}', '\u{42}'),
-        ('\u{63}', '\u{43}'), ('\u{64}', '\u{44}'), ('\u{65}', '\u{45}'),
-        ('\u{66}', '\u{46}'), ('\u{67}', '\u{47}'), ('\u{68}', '\u{48}'),
-        ('\u{69}', '\u{49}'), ('\u{6a}', '\u{4a}'), ('\u{6b}', '\u{4b}'),
-        ('\u{6b}', '\u{212a}'), ('\u{6c}', '\u{4c}'), ('\u{6d}', '\u{4d}'),
-        ('\u{6e}', '\u{4e}'), ('\u{6f}', '\u{4f}'), ('\u{70}', '\u{50}'),
-        ('\u{71}', '\u{51}'), ('\u{72}', '\u{52}'), ('\u{73}', '\u{53}'),
-        ('\u{73}', '\u{17f}'), ('\u{74}', '\u{54}'), ('\u{75}', '\u{55}'),
-        ('\u{76}', '\u{56}'), ('\u{77}', '\u{57}'), ('\u{78}', '\u{58}'),
-        ('\u{79}', '\u{59}'), ('\u{7a}', '\u{5a}'), ('\u{b5}', '\u{39c}'),
-        ('\u{b5}', '\u{3bc}'), ('\u{c0}', '\u{e0}'), ('\u{c1}', '\u{e1}'),
-        ('\u{c2}', '\u{e2}'), ('\u{c3}', '\u{e3}'), ('\u{c4}', '\u{e4}'),
-        ('\u{c5}', '\u{e5}'), ('\u{c5}', '\u{212b}'), ('\u{c6}', '\u{e6}'),
-        ('\u{c7}', '\u{e7}'), ('\u{c8}', '\u{e8}'), ('\u{c9}', '\u{e9}'),
-        ('\u{ca}', '\u{ea}'), ('\u{cb}', '\u{eb}'), ('\u{cc}', '\u{ec}'),
-        ('\u{cd}', '\u{ed}'), ('\u{ce}', '\u{ee}'), ('\u{cf}', '\u{ef}'),
-        ('\u{d0}', '\u{f0}'), ('\u{d1}', '\u{f1}'), ('\u{d2}', '\u{f2}'),
-        ('\u{d3}', '\u{f3}'), ('\u{d4}', '\u{f4}'), ('\u{d5}', '\u{f5}'),
-        ('\u{d6}', '\u{f6}'), ('\u{d8}', '\u{f8}'), ('\u{d9}', '\u{f9}'),
-        ('\u{da}', '\u{fa}'), ('\u{db}', '\u{fb}'), ('\u{dc}', '\u{fc}'),
-        ('\u{dd}', '\u{fd}'), ('\u{de}', '\u{fe}'), ('\u{df}', '\u{1e9e}'),
-        ('\u{e0}', '\u{c0}'), ('\u{e1}', '\u{c1}'), ('\u{e2}', '\u{c2}'),
-        ('\u{e3}', '\u{c3}'), ('\u{e4}', '\u{c4}'), ('\u{e5}', '\u{c5}'),
-        ('\u{e5}', '\u{212b}'), ('\u{e6}', '\u{c6}'), ('\u{e7}', '\u{c7}'),
-        ('\u{e8}', '\u{c8}'), ('\u{e9}', '\u{c9}'), ('\u{ea}', '\u{ca}'),
-        ('\u{eb}', '\u{cb}'), ('\u{ec}', '\u{cc}'), ('\u{ed}', '\u{cd}'),
-        ('\u{ee}', '\u{ce}'), ('\u{ef}', '\u{cf}'), ('\u{f0}', '\u{d0}'),
-        ('\u{f1}', '\u{d1}'), ('\u{f2}', '\u{d2}'), ('\u{f3}', '\u{d3}'),
-        ('\u{f4}', '\u{d4}'), ('\u{f5}', '\u{d5}'), ('\u{f6}', '\u{d6}'),
-        ('\u{f8}', '\u{d8}'), ('\u{f9}', '\u{d9}'), ('\u{fa}', '\u{da}'),
-        ('\u{fb}', '\u{db}'), ('\u{fc}', '\u{dc}'), ('\u{fd}', '\u{dd}'),
-        ('\u{fe}', '\u{de}'), ('\u{ff}', '\u{178}'), ('\u{100}', '\u{101}'),
-        ('\u{101}', '\u{100}'), ('\u{102}', '\u{103}'), ('\u{103}', '\u{102}'),
-        ('\u{104}', '\u{105}'), ('\u{105}', '\u{104}'), ('\u{106}', '\u{107}'),
-        ('\u{107}', '\u{106}'), ('\u{108}', '\u{109}'), ('\u{109}', '\u{108}'),
-        ('\u{10a}', '\u{10b}'), ('\u{10b}', '\u{10a}'), ('\u{10c}', '\u{10d}'),
-        ('\u{10d}', '\u{10c}'), ('\u{10e}', '\u{10f}'), ('\u{10f}', '\u{10e}'),
-        ('\u{110}', '\u{111}'), ('\u{111}', '\u{110}'), ('\u{112}', '\u{113}'),
-        ('\u{113}', '\u{112}'), ('\u{114}', '\u{115}'), ('\u{115}', '\u{114}'),
-        ('\u{116}', '\u{117}'), ('\u{117}', '\u{116}'), ('\u{118}', '\u{119}'),
-        ('\u{119}', '\u{118}'), ('\u{11a}', '\u{11b}'), ('\u{11b}', '\u{11a}'),
-        ('\u{11c}', '\u{11d}'), ('\u{11d}', '\u{11c}'), ('\u{11e}', '\u{11f}'),
-        ('\u{11f}', '\u{11e}'), ('\u{120}', '\u{121}'), ('\u{121}', '\u{120}'),
-        ('\u{122}', '\u{123}'), ('\u{123}', '\u{122}'), ('\u{124}', '\u{125}'),
-        ('\u{125}', '\u{124}'), ('\u{126}', '\u{127}'), ('\u{127}', '\u{126}'),
-        ('\u{128}', '\u{129}'), ('\u{129}', '\u{128}'), ('\u{12a}', '\u{12b}'),
-        ('\u{12b}', '\u{12a}'), ('\u{12c}', '\u{12d}'), ('\u{12d}', '\u{12c}'),
-        ('\u{12e}', '\u{12f}'), ('\u{12f}', '\u{12e}'), ('\u{132}', '\u{133}'),
-        ('\u{133}', '\u{132}'), ('\u{134}', '\u{135}'), ('\u{135}', '\u{134}'),
-        ('\u{136}', '\u{137}'), ('\u{137}', '\u{136}'), ('\u{139}', '\u{13a}'),
-        ('\u{13a}', '\u{139}'), ('\u{13b}', '\u{13c}'), ('\u{13c}', '\u{13b}'),
-        ('\u{13d}', '\u{13e}'), ('\u{13e}', '\u{13d}'), ('\u{13f}', '\u{140}'),
-        ('\u{140}', '\u{13f}'), ('\u{141}', '\u{142}'), ('\u{142}', '\u{141}'),
-        ('\u{143}', '\u{144}'), ('\u{144}', '\u{143}'), ('\u{145}', '\u{146}'),
-        ('\u{146}', '\u{145}'), ('\u{147}', '\u{148}'), ('\u{148}', '\u{147}'),
-        ('\u{14a}', '\u{14b}'), ('\u{14b}', '\u{14a}'), ('\u{14c}', '\u{14d}'),
-        ('\u{14d}', '\u{14c}'), ('\u{14e}', '\u{14f}'), ('\u{14f}', '\u{14e}'),
-        ('\u{150}', '\u{151}'), ('\u{151}', '\u{150}'), ('\u{152}', '\u{153}'),
-        ('\u{153}', '\u{152}'), ('\u{154}', '\u{155}'), ('\u{155}', '\u{154}'),
-        ('\u{156}', '\u{157}'), ('\u{157}', '\u{156}'), ('\u{158}', '\u{159}'),
-        ('\u{159}', '\u{158}'), ('\u{15a}', '\u{15b}'), ('\u{15b}', '\u{15a}'),
-        ('\u{15c}', '\u{15d}'), ('\u{15d}', '\u{15c}'), ('\u{15e}', '\u{15f}'),
-        ('\u{15f}', '\u{15e}'), ('\u{160}', '\u{161}'), ('\u{161}', '\u{160}'),
-        ('\u{162}', '\u{163}'), ('\u{163}', '\u{162}'), ('\u{164}', '\u{165}'),
-        ('\u{165}', '\u{164}'), ('\u{166}', '\u{167}'), ('\u{167}', '\u{166}'),
-        ('\u{168}', '\u{169}'), ('\u{169}', '\u{168}'), ('\u{16a}', '\u{16b}'),
-        ('\u{16b}', '\u{16a}'), ('\u{16c}', '\u{16d}'), ('\u{16d}', '\u{16c}'),
-        ('\u{16e}', '\u{16f}'), ('\u{16f}', '\u{16e}'), ('\u{170}', '\u{171}'),
-        ('\u{171}', '\u{170}'), ('\u{172}', '\u{173}'), ('\u{173}', '\u{172}'),
-        ('\u{174}', '\u{175}'), ('\u{175}', '\u{174}'), ('\u{176}', '\u{177}'),
-        ('\u{177}', '\u{176}'), ('\u{178}', '\u{ff}'), ('\u{179}', '\u{17a}'),
-        ('\u{17a}', '\u{179}'), ('\u{17b}', '\u{17c}'), ('\u{17c}', '\u{17b}'),
-        ('\u{17d}', '\u{17e}'), ('\u{17e}', '\u{17d}'), ('\u{17f}', '\u{53}'),
-        ('\u{17f}', '\u{73}'), ('\u{180}', '\u{243}'), ('\u{181}', '\u{253}'),
-        ('\u{182}', '\u{183}'), ('\u{183}', '\u{182}'), ('\u{184}', '\u{185}'),
-        ('\u{185}', '\u{184}'), ('\u{186}', '\u{254}'), ('\u{187}', '\u{188}'),
-        ('\u{188}', '\u{187}'), ('\u{189}', '\u{256}'), ('\u{18a}', '\u{257}'),
-        ('\u{18b}', '\u{18c}'), ('\u{18c}', '\u{18b}'), ('\u{18e}', '\u{1dd}'),
-        ('\u{18f}', '\u{259}'), ('\u{190}', '\u{25b}'), ('\u{191}', '\u{192}'),
-        ('\u{192}', '\u{191}'), ('\u{193}', '\u{260}'), ('\u{194}', '\u{263}'),
-        ('\u{195}', '\u{1f6}'), ('\u{196}', '\u{269}'), ('\u{197}', '\u{268}'),
-        ('\u{198}', '\u{199}'), ('\u{199}', '\u{198}'), ('\u{19a}', '\u{23d}'),
-        ('\u{19c}', '\u{26f}'), ('\u{19d}', '\u{272}'), ('\u{19e}', '\u{220}'),
-        ('\u{19f}', '\u{275}'), ('\u{1a0}', '\u{1a1}'), ('\u{1a1}', '\u{1a0}'),
-        ('\u{1a2}', '\u{1a3}'), ('\u{1a3}', '\u{1a2}'), ('\u{1a4}', '\u{1a5}'),
-        ('\u{1a5}', '\u{1a4}'), ('\u{1a6}', '\u{280}'), ('\u{1a7}', '\u{1a8}'),
-        ('\u{1a8}', '\u{1a7}'), ('\u{1a9}', '\u{283}'), ('\u{1ac}', '\u{1ad}'),
-        ('\u{1ad}', '\u{1ac}'), ('\u{1ae}', '\u{288}'), ('\u{1af}', '\u{1b0}'),
-        ('\u{1b0}', '\u{1af}'), ('\u{1b1}', '\u{28a}'), ('\u{1b2}', '\u{28b}'),
-        ('\u{1b3}', '\u{1b4}'), ('\u{1b4}', '\u{1b3}'), ('\u{1b5}', '\u{1b6}'),
-        ('\u{1b6}', '\u{1b5}'), ('\u{1b7}', '\u{292}'), ('\u{1b8}', '\u{1b9}'),
-        ('\u{1b9}', '\u{1b8}'), ('\u{1bc}', '\u{1bd}'), ('\u{1bd}', '\u{1bc}'),
-        ('\u{1bf}', '\u{1f7}'), ('\u{1c4}', '\u{1c5}'), ('\u{1c4}', '\u{1c6}'),
-        ('\u{1c5}', '\u{1c4}'), ('\u{1c5}', '\u{1c6}'), ('\u{1c6}', '\u{1c4}'),
-        ('\u{1c6}', '\u{1c5}'), ('\u{1c7}', '\u{1c8}'), ('\u{1c7}', '\u{1c9}'),
-        ('\u{1c8}', '\u{1c7}'), ('\u{1c8}', '\u{1c9}'), ('\u{1c9}', '\u{1c7}'),
-        ('\u{1c9}', '\u{1c8}'), ('\u{1ca}', '\u{1cb}'), ('\u{1ca}', '\u{1cc}'),
-        ('\u{1cb}', '\u{1ca}'), ('\u{1cb}', '\u{1cc}'), ('\u{1cc}', '\u{1ca}'),
-        ('\u{1cc}', '\u{1cb}'), ('\u{1cd}', '\u{1ce}'), ('\u{1ce}', '\u{1cd}'),
-        ('\u{1cf}', '\u{1d0}'), ('\u{1d0}', '\u{1cf}'), ('\u{1d1}', '\u{1d2}'),
-        ('\u{1d2}', '\u{1d1}'), ('\u{1d3}', '\u{1d4}'), ('\u{1d4}', '\u{1d3}'),
-        ('\u{1d5}', '\u{1d6}'), ('\u{1d6}', '\u{1d5}'), ('\u{1d7}', '\u{1d8}'),
-        ('\u{1d8}', '\u{1d7}'), ('\u{1d9}', '\u{1da}'), ('\u{1da}', '\u{1d9}'),
-        ('\u{1db}', '\u{1dc}'), ('\u{1dc}', '\u{1db}'), ('\u{1dd}', '\u{18e}'),
-        ('\u{1de}', '\u{1df}'), ('\u{1df}', '\u{1de}'), ('\u{1e0}', '\u{1e1}'),
-        ('\u{1e1}', '\u{1e0}'), ('\u{1e2}', '\u{1e3}'), ('\u{1e3}', '\u{1e2}'),
-        ('\u{1e4}', '\u{1e5}'), ('\u{1e5}', '\u{1e4}'), ('\u{1e6}', '\u{1e7}'),
-        ('\u{1e7}', '\u{1e6}'), ('\u{1e8}', '\u{1e9}'), ('\u{1e9}', '\u{1e8}'),
-        ('\u{1ea}', '\u{1eb}'), ('\u{1eb}', '\u{1ea}'), ('\u{1ec}', '\u{1ed}'),
-        ('\u{1ed}', '\u{1ec}'), ('\u{1ee}', '\u{1ef}'), ('\u{1ef}', '\u{1ee}'),
-        ('\u{1f1}', '\u{1f2}'), ('\u{1f1}', '\u{1f3}'), ('\u{1f2}', '\u{1f1}'),
-        ('\u{1f2}', '\u{1f3}'), ('\u{1f3}', '\u{1f1}'), ('\u{1f3}', '\u{1f2}'),
-        ('\u{1f4}', '\u{1f5}'), ('\u{1f5}', '\u{1f4}'), ('\u{1f6}', '\u{195}'),
-        ('\u{1f7}', '\u{1bf}'), ('\u{1f8}', '\u{1f9}'), ('\u{1f9}', '\u{1f8}'),
-        ('\u{1fa}', '\u{1fb}'), ('\u{1fb}', '\u{1fa}'), ('\u{1fc}', '\u{1fd}'),
-        ('\u{1fd}', '\u{1fc}'), ('\u{1fe}', '\u{1ff}'), ('\u{1ff}', '\u{1fe}'),
-        ('\u{200}', '\u{201}'), ('\u{201}', '\u{200}'), ('\u{202}', '\u{203}'),
-        ('\u{203}', '\u{202}'), ('\u{204}', '\u{205}'), ('\u{205}', '\u{204}'),
-        ('\u{206}', '\u{207}'), ('\u{207}', '\u{206}'), ('\u{208}', '\u{209}'),
-        ('\u{209}', '\u{208}'), ('\u{20a}', '\u{20b}'), ('\u{20b}', '\u{20a}'),
-        ('\u{20c}', '\u{20d}'), ('\u{20d}', '\u{20c}'), ('\u{20e}', '\u{20f}'),
-        ('\u{20f}', '\u{20e}'), ('\u{210}', '\u{211}'), ('\u{211}', '\u{210}'),
-        ('\u{212}', '\u{213}'), ('\u{213}', '\u{212}'), ('\u{214}', '\u{215}'),
-        ('\u{215}', '\u{214}'), ('\u{216}', '\u{217}'), ('\u{217}', '\u{216}'),
-        ('\u{218}', '\u{219}'), ('\u{219}', '\u{218}'), ('\u{21a}', '\u{21b}'),
-        ('\u{21b}', '\u{21a}'), ('\u{21c}', '\u{21d}'), ('\u{21d}', '\u{21c}'),
-        ('\u{21e}', '\u{21f}'), ('\u{21f}', '\u{21e}'), ('\u{220}', '\u{19e}'),
-        ('\u{222}', '\u{223}'), ('\u{223}', '\u{222}'), ('\u{224}', '\u{225}'),
-        ('\u{225}', '\u{224}'), ('\u{226}', '\u{227}'), ('\u{227}', '\u{226}'),
-        ('\u{228}', '\u{229}'), ('\u{229}', '\u{228}'), ('\u{22a}', '\u{22b}'),
-        ('\u{22b}', '\u{22a}'), ('\u{22c}', '\u{22d}'), ('\u{22d}', '\u{22c}'),
-        ('\u{22e}', '\u{22f}'), ('\u{22f}', '\u{22e}'), ('\u{230}', '\u{231}'),
-        ('\u{231}', '\u{230}'), ('\u{232}', '\u{233}'), ('\u{233}', '\u{232}'),
-        ('\u{23a}', '\u{2c65}'), ('\u{23b}', '\u{23c}'), ('\u{23c}', '\u{23b}'),
-        ('\u{23d}', '\u{19a}'), ('\u{23e}', '\u{2c66}'), ('\u{23f}',
-        '\u{2c7e}'), ('\u{240}', '\u{2c7f}'), ('\u{241}', '\u{242}'),
-        ('\u{242}', '\u{241}'), ('\u{243}', '\u{180}'), ('\u{244}', '\u{289}'),
-        ('\u{245}', '\u{28c}'), ('\u{246}', '\u{247}'), ('\u{247}', '\u{246}'),
-        ('\u{248}', '\u{249}'), ('\u{249}', '\u{248}'), ('\u{24a}', '\u{24b}'),
-        ('\u{24b}', '\u{24a}'), ('\u{24c}', '\u{24d}'), ('\u{24d}', '\u{24c}'),
-        ('\u{24e}', '\u{24f}'), ('\u{24f}', '\u{24e}'), ('\u{250}', '\u{2c6f}'),
-        ('\u{251}', '\u{2c6d}'), ('\u{252}', '\u{2c70}'), ('\u{253}',
-        '\u{181}'), ('\u{254}', '\u{186}'), ('\u{256}', '\u{189}'), ('\u{257}',
-        '\u{18a}'), ('\u{259}', '\u{18f}'), ('\u{25b}', '\u{190}'), ('\u{25c}',
-        '\u{a7ab}'), ('\u{260}', '\u{193}'), ('\u{261}', '\u{a7ac}'),
-        ('\u{263}', '\u{194}'), ('\u{265}', '\u{a78d}'), ('\u{266}',
-        '\u{a7aa}'), ('\u{268}', '\u{197}'), ('\u{269}', '\u{196}'), ('\u{26b}',
-        '\u{2c62}'), ('\u{26c}', '\u{a7ad}'), ('\u{26f}', '\u{19c}'),
-        ('\u{271}', '\u{2c6e}'), ('\u{272}', '\u{19d}'), ('\u{275}', '\u{19f}'),
-        ('\u{27d}', '\u{2c64}'), ('\u{280}', '\u{1a6}'), ('\u{283}', '\u{1a9}'),
-        ('\u{287}', '\u{a7b1}'), ('\u{288}', '\u{1ae}'), ('\u{289}', '\u{244}'),
-        ('\u{28a}', '\u{1b1}'), ('\u{28b}', '\u{1b2}'), ('\u{28c}', '\u{245}'),
-        ('\u{292}', '\u{1b7}'), ('\u{29d}', '\u{a7b2}'), ('\u{29e}',
-        '\u{a7b0}'), ('\u{345}', '\u{399}'), ('\u{345}', '\u{3b9}'), ('\u{345}',
-        '\u{1fbe}'), ('\u{370}', '\u{371}'), ('\u{371}', '\u{370}'), ('\u{372}',
-        '\u{373}'), ('\u{373}', '\u{372}'), ('\u{376}', '\u{377}'), ('\u{377}',
-        '\u{376}'), ('\u{37b}', '\u{3fd}'), ('\u{37c}', '\u{3fe}'), ('\u{37d}',
-        '\u{3ff}'), ('\u{37f}', '\u{3f3}'), ('\u{386}', '\u{3ac}'), ('\u{388}',
-        '\u{3ad}'), ('\u{389}', '\u{3ae}'), ('\u{38a}', '\u{3af}'), ('\u{38c}',
-        '\u{3cc}'), ('\u{38e}', '\u{3cd}'), ('\u{38f}', '\u{3ce}'), ('\u{391}',
-        '\u{3b1}'), ('\u{392}', '\u{3b2}'), ('\u{392}', '\u{3d0}'), ('\u{393}',
-        '\u{3b3}'), ('\u{394}', '\u{3b4}'), ('\u{395}', '\u{3b5}'), ('\u{395}',
-        '\u{3f5}'), ('\u{396}', '\u{3b6}'), ('\u{397}', '\u{3b7}'), ('\u{398}',
-        '\u{3b8}'), ('\u{398}', '\u{3d1}'), ('\u{398}', '\u{3f4}'), ('\u{399}',
-        '\u{345}'), ('\u{399}', '\u{3b9}'), ('\u{399}', '\u{1fbe}'), ('\u{39a}',
-        '\u{3ba}'), ('\u{39a}', '\u{3f0}'), ('\u{39b}', '\u{3bb}'), ('\u{39c}',
-        '\u{b5}'), ('\u{39c}', '\u{3bc}'), ('\u{39d}', '\u{3bd}'), ('\u{39e}',
-        '\u{3be}'), ('\u{39f}', '\u{3bf}'), ('\u{3a0}', '\u{3c0}'), ('\u{3a0}',
-        '\u{3d6}'), ('\u{3a1}', '\u{3c1}'), ('\u{3a1}', '\u{3f1}'), ('\u{3a3}',
-        '\u{3c2}'), ('\u{3a3}', '\u{3c3}'), ('\u{3a4}', '\u{3c4}'), ('\u{3a5}',
-        '\u{3c5}'), ('\u{3a6}', '\u{3c6}'), ('\u{3a6}', '\u{3d5}'), ('\u{3a7}',
-        '\u{3c7}'), ('\u{3a8}', '\u{3c8}'), ('\u{3a9}', '\u{3c9}'), ('\u{3a9}',
-        '\u{2126}'), ('\u{3aa}', '\u{3ca}'), ('\u{3ab}', '\u{3cb}'), ('\u{3ac}',
-        '\u{386}'), ('\u{3ad}', '\u{388}'), ('\u{3ae}', '\u{389}'), ('\u{3af}',
-        '\u{38a}'), ('\u{3b1}', '\u{391}'), ('\u{3b2}', '\u{392}'), ('\u{3b2}',
-        '\u{3d0}'), ('\u{3b3}', '\u{393}'), ('\u{3b4}', '\u{394}'), ('\u{3b5}',
-        '\u{395}'), ('\u{3b5}', '\u{3f5}'), ('\u{3b6}', '\u{396}'), ('\u{3b7}',
-        '\u{397}'), ('\u{3b8}', '\u{398}'), ('\u{3b8}', '\u{3d1}'), ('\u{3b8}',
-        '\u{3f4}'), ('\u{3b9}', '\u{345}'), ('\u{3b9}', '\u{399}'), ('\u{3b9}',
-        '\u{1fbe}'), ('\u{3ba}', '\u{39a}'), ('\u{3ba}', '\u{3f0}'), ('\u{3bb}',
-        '\u{39b}'), ('\u{3bc}', '\u{b5}'), ('\u{3bc}', '\u{39c}'), ('\u{3bd}',
-        '\u{39d}'), ('\u{3be}', '\u{39e}'), ('\u{3bf}', '\u{39f}'), ('\u{3c0}',
-        '\u{3a0}'), ('\u{3c0}', '\u{3d6}'), ('\u{3c1}', '\u{3a1}'), ('\u{3c1}',
-        '\u{3f1}'), ('\u{3c2}', '\u{3a3}'), ('\u{3c2}', '\u{3c3}'), ('\u{3c3}',
-        '\u{3a3}'), ('\u{3c3}', '\u{3c2}'), ('\u{3c4}', '\u{3a4}'), ('\u{3c5}',
-        '\u{3a5}'), ('\u{3c6}', '\u{3a6}'), ('\u{3c6}', '\u{3d5}'), ('\u{3c7}',
-        '\u{3a7}'), ('\u{3c8}', '\u{3a8}'), ('\u{3c9}', '\u{3a9}'), ('\u{3c9}',
-        '\u{2126}'), ('\u{3ca}', '\u{3aa}'), ('\u{3cb}', '\u{3ab}'), ('\u{3cc}',
-        '\u{38c}'), ('\u{3cd}', '\u{38e}'), ('\u{3ce}', '\u{38f}'), ('\u{3cf}',
-        '\u{3d7}'), ('\u{3d0}', '\u{392}'), ('\u{3d0}', '\u{3b2}'), ('\u{3d1}',
-        '\u{398}'), ('\u{3d1}', '\u{3b8}'), ('\u{3d1}', '\u{3f4}'), ('\u{3d5}',
-        '\u{3a6}'), ('\u{3d5}', '\u{3c6}'), ('\u{3d6}', '\u{3a0}'), ('\u{3d6}',
-        '\u{3c0}'), ('\u{3d7}', '\u{3cf}'), ('\u{3d8}', '\u{3d9}'), ('\u{3d9}',
-        '\u{3d8}'), ('\u{3da}', '\u{3db}'), ('\u{3db}', '\u{3da}'), ('\u{3dc}',
-        '\u{3dd}'), ('\u{3dd}', '\u{3dc}'), ('\u{3de}', '\u{3df}'), ('\u{3df}',
-        '\u{3de}'), ('\u{3e0}', '\u{3e1}'), ('\u{3e1}', '\u{3e0}'), ('\u{3e2}',
-        '\u{3e3}'), ('\u{3e3}', '\u{3e2}'), ('\u{3e4}', '\u{3e5}'), ('\u{3e5}',
-        '\u{3e4}'), ('\u{3e6}', '\u{3e7}'), ('\u{3e7}', '\u{3e6}'), ('\u{3e8}',
-        '\u{3e9}'), ('\u{3e9}', '\u{3e8}'), ('\u{3ea}', '\u{3eb}'), ('\u{3eb}',
-        '\u{3ea}'), ('\u{3ec}', '\u{3ed}'), ('\u{3ed}', '\u{3ec}'), ('\u{3ee}',
-        '\u{3ef}'), ('\u{3ef}', '\u{3ee}'), ('\u{3f0}', '\u{39a}'), ('\u{3f0}',
-        '\u{3ba}'), ('\u{3f1}', '\u{3a1}'), ('\u{3f1}', '\u{3c1}'), ('\u{3f2}',
-        '\u{3f9}'), ('\u{3f3}', '\u{37f}'), ('\u{3f4}', '\u{398}'), ('\u{3f4}',
-        '\u{3b8}'), ('\u{3f4}', '\u{3d1}'), ('\u{3f5}', '\u{395}'), ('\u{3f5}',
-        '\u{3b5}'), ('\u{3f7}', '\u{3f8}'), ('\u{3f8}', '\u{3f7}'), ('\u{3f9}',
-        '\u{3f2}'), ('\u{3fa}', '\u{3fb}'), ('\u{3fb}', '\u{3fa}'), ('\u{3fd}',
-        '\u{37b}'), ('\u{3fe}', '\u{37c}'), ('\u{3ff}', '\u{37d}'), ('\u{400}',
-        '\u{450}'), ('\u{401}', '\u{451}'), ('\u{402}', '\u{452}'), ('\u{403}',
-        '\u{453}'), ('\u{404}', '\u{454}'), ('\u{405}', '\u{455}'), ('\u{406}',
-        '\u{456}'), ('\u{407}', '\u{457}'), ('\u{408}', '\u{458}'), ('\u{409}',
-        '\u{459}'), ('\u{40a}', '\u{45a}'), ('\u{40b}', '\u{45b}'), ('\u{40c}',
-        '\u{45c}'), ('\u{40d}', '\u{45d}'), ('\u{40e}', '\u{45e}'), ('\u{40f}',
-        '\u{45f}'), ('\u{410}', '\u{430}'), ('\u{411}', '\u{431}'), ('\u{412}',
-        '\u{432}'), ('\u{413}', '\u{433}'), ('\u{414}', '\u{434}'), ('\u{415}',
-        '\u{435}'), ('\u{416}', '\u{436}'), ('\u{417}', '\u{437}'), ('\u{418}',
-        '\u{438}'), ('\u{419}', '\u{439}'), ('\u{41a}', '\u{43a}'), ('\u{41b}',
-        '\u{43b}'), ('\u{41c}', '\u{43c}'), ('\u{41d}', '\u{43d}'), ('\u{41e}',
-        '\u{43e}'), ('\u{41f}', '\u{43f}'), ('\u{420}', '\u{440}'), ('\u{421}',
-        '\u{441}'), ('\u{422}', '\u{442}'), ('\u{423}', '\u{443}'), ('\u{424}',
-        '\u{444}'), ('\u{425}', '\u{445}'), ('\u{426}', '\u{446}'), ('\u{427}',
-        '\u{447}'), ('\u{428}', '\u{448}'), ('\u{429}', '\u{449}'), ('\u{42a}',
-        '\u{44a}'), ('\u{42b}', '\u{44b}'), ('\u{42c}', '\u{44c}'), ('\u{42d}',
-        '\u{44d}'), ('\u{42e}', '\u{44e}'), ('\u{42f}', '\u{44f}'), ('\u{430}',
-        '\u{410}'), ('\u{431}', '\u{411}'), ('\u{432}', '\u{412}'), ('\u{433}',
-        '\u{413}'), ('\u{434}', '\u{414}'), ('\u{435}', '\u{415}'), ('\u{436}',
-        '\u{416}'), ('\u{437}', '\u{417}'), ('\u{438}', '\u{418}'), ('\u{439}',
-        '\u{419}'), ('\u{43a}', '\u{41a}'), ('\u{43b}', '\u{41b}'), ('\u{43c}',
-        '\u{41c}'), ('\u{43d}', '\u{41d}'), ('\u{43e}', '\u{41e}'), ('\u{43f}',
-        '\u{41f}'), ('\u{440}', '\u{420}'), ('\u{441}', '\u{421}'), ('\u{442}',
-        '\u{422}'), ('\u{443}', '\u{423}'), ('\u{444}', '\u{424}'), ('\u{445}',
-        '\u{425}'), ('\u{446}', '\u{426}'), ('\u{447}', '\u{427}'), ('\u{448}',
-        '\u{428}'), ('\u{449}', '\u{429}'), ('\u{44a}', '\u{42a}'), ('\u{44b}',
-        '\u{42b}'), ('\u{44c}', '\u{42c}'), ('\u{44d}', '\u{42d}'), ('\u{44e}',
-        '\u{42e}'), ('\u{44f}', '\u{42f}'), ('\u{450}', '\u{400}'), ('\u{451}',
-        '\u{401}'), ('\u{452}', '\u{402}'), ('\u{453}', '\u{403}'), ('\u{454}',
-        '\u{404}'), ('\u{455}', '\u{405}'), ('\u{456}', '\u{406}'), ('\u{457}',
-        '\u{407}'), ('\u{458}', '\u{408}'), ('\u{459}', '\u{409}'), ('\u{45a}',
-        '\u{40a}'), ('\u{45b}', '\u{40b}'), ('\u{45c}', '\u{40c}'), ('\u{45d}',
-        '\u{40d}'), ('\u{45e}', '\u{40e}'), ('\u{45f}', '\u{40f}'), ('\u{460}',
-        '\u{461}'), ('\u{461}', '\u{460}'), ('\u{462}', '\u{463}'), ('\u{463}',
-        '\u{462}'), ('\u{464}', '\u{465}'), ('\u{465}', '\u{464}'), ('\u{466}',
-        '\u{467}'), ('\u{467}', '\u{466}'), ('\u{468}', '\u{469}'), ('\u{469}',
-        '\u{468}'), ('\u{46a}', '\u{46b}'), ('\u{46b}', '\u{46a}'), ('\u{46c}',
-        '\u{46d}'), ('\u{46d}', '\u{46c}'), ('\u{46e}', '\u{46f}'), ('\u{46f}',
-        '\u{46e}'), ('\u{470}', '\u{471}'), ('\u{471}', '\u{470}'), ('\u{472}',
-        '\u{473}'), ('\u{473}', '\u{472}'), ('\u{474}', '\u{475}'), ('\u{475}',
-        '\u{474}'), ('\u{476}', '\u{477}'), ('\u{477}', '\u{476}'), ('\u{478}',
-        '\u{479}'), ('\u{479}', '\u{478}'), ('\u{47a}', '\u{47b}'), ('\u{47b}',
-        '\u{47a}'), ('\u{47c}', '\u{47d}'), ('\u{47d}', '\u{47c}'), ('\u{47e}',
-        '\u{47f}'), ('\u{47f}', '\u{47e}'), ('\u{480}', '\u{481}'), ('\u{481}',
-        '\u{480}'), ('\u{48a}', '\u{48b}'), ('\u{48b}', '\u{48a}'), ('\u{48c}',
-        '\u{48d}'), ('\u{48d}', '\u{48c}'), ('\u{48e}', '\u{48f}'), ('\u{48f}',
-        '\u{48e}'), ('\u{490}', '\u{491}'), ('\u{491}', '\u{490}'), ('\u{492}',
-        '\u{493}'), ('\u{493}', '\u{492}'), ('\u{494}', '\u{495}'), ('\u{495}',
-        '\u{494}'), ('\u{496}', '\u{497}'), ('\u{497}', '\u{496}'), ('\u{498}',
-        '\u{499}'), ('\u{499}', '\u{498}'), ('\u{49a}', '\u{49b}'), ('\u{49b}',
-        '\u{49a}'), ('\u{49c}', '\u{49d}'), ('\u{49d}', '\u{49c}'), ('\u{49e}',
-        '\u{49f}'), ('\u{49f}', '\u{49e}'), ('\u{4a0}', '\u{4a1}'), ('\u{4a1}',
-        '\u{4a0}'), ('\u{4a2}', '\u{4a3}'), ('\u{4a3}', '\u{4a2}'), ('\u{4a4}',
-        '\u{4a5}'), ('\u{4a5}', '\u{4a4}'), ('\u{4a6}', '\u{4a7}'), ('\u{4a7}',
-        '\u{4a6}'), ('\u{4a8}', '\u{4a9}'), ('\u{4a9}', '\u{4a8}'), ('\u{4aa}',
-        '\u{4ab}'), ('\u{4ab}', '\u{4aa}'), ('\u{4ac}', '\u{4ad}'), ('\u{4ad}',
-        '\u{4ac}'), ('\u{4ae}', '\u{4af}'), ('\u{4af}', '\u{4ae}'), ('\u{4b0}',
-        '\u{4b1}'), ('\u{4b1}', '\u{4b0}'), ('\u{4b2}', '\u{4b3}'), ('\u{4b3}',
-        '\u{4b2}'), ('\u{4b4}', '\u{4b5}'), ('\u{4b5}', '\u{4b4}'), ('\u{4b6}',
-        '\u{4b7}'), ('\u{4b7}', '\u{4b6}'), ('\u{4b8}', '\u{4b9}'), ('\u{4b9}',
-        '\u{4b8}'), ('\u{4ba}', '\u{4bb}'), ('\u{4bb}', '\u{4ba}'), ('\u{4bc}',
-        '\u{4bd}'), ('\u{4bd}', '\u{4bc}'), ('\u{4be}', '\u{4bf}'), ('\u{4bf}',
-        '\u{4be}'), ('\u{4c0}', '\u{4cf}'), ('\u{4c1}', '\u{4c2}'), ('\u{4c2}',
-        '\u{4c1}'), ('\u{4c3}', '\u{4c4}'), ('\u{4c4}', '\u{4c3}'), ('\u{4c5}',
-        '\u{4c6}'), ('\u{4c6}', '\u{4c5}'), ('\u{4c7}', '\u{4c8}'), ('\u{4c8}',
-        '\u{4c7}'), ('\u{4c9}', '\u{4ca}'), ('\u{4ca}', '\u{4c9}'), ('\u{4cb}',
-        '\u{4cc}'), ('\u{4cc}', '\u{4cb}'), ('\u{4cd}', '\u{4ce}'), ('\u{4ce}',
-        '\u{4cd}'), ('\u{4cf}', '\u{4c0}'), ('\u{4d0}', '\u{4d1}'), ('\u{4d1}',
-        '\u{4d0}'), ('\u{4d2}', '\u{4d3}'), ('\u{4d3}', '\u{4d2}'), ('\u{4d4}',
-        '\u{4d5}'), ('\u{4d5}', '\u{4d4}'), ('\u{4d6}', '\u{4d7}'), ('\u{4d7}',
-        '\u{4d6}'), ('\u{4d8}', '\u{4d9}'), ('\u{4d9}', '\u{4d8}'), ('\u{4da}',
-        '\u{4db}'), ('\u{4db}', '\u{4da}'), ('\u{4dc}', '\u{4dd}'), ('\u{4dd}',
-        '\u{4dc}'), ('\u{4de}', '\u{4df}'), ('\u{4df}', '\u{4de}'), ('\u{4e0}',
-        '\u{4e1}'), ('\u{4e1}', '\u{4e0}'), ('\u{4e2}', '\u{4e3}'), ('\u{4e3}',
-        '\u{4e2}'), ('\u{4e4}', '\u{4e5}'), ('\u{4e5}', '\u{4e4}'), ('\u{4e6}',
-        '\u{4e7}'), ('\u{4e7}', '\u{4e6}'), ('\u{4e8}', '\u{4e9}'), ('\u{4e9}',
-        '\u{4e8}'), ('\u{4ea}', '\u{4eb}'), ('\u{4eb}', '\u{4ea}'), ('\u{4ec}',
-        '\u{4ed}'), ('\u{4ed}', '\u{4ec}'), ('\u{4ee}', '\u{4ef}'), ('\u{4ef}',
-        '\u{4ee}'), ('\u{4f0}', '\u{4f1}'), ('\u{4f1}', '\u{4f0}'), ('\u{4f2}',
-        '\u{4f3}'), ('\u{4f3}', '\u{4f2}'), ('\u{4f4}', '\u{4f5}'), ('\u{4f5}',
-        '\u{4f4}'), ('\u{4f6}', '\u{4f7}'), ('\u{4f7}', '\u{4f6}'), ('\u{4f8}',
-        '\u{4f9}'), ('\u{4f9}', '\u{4f8}'), ('\u{4fa}', '\u{4fb}'), ('\u{4fb}',
-        '\u{4fa}'), ('\u{4fc}', '\u{4fd}'), ('\u{4fd}', '\u{4fc}'), ('\u{4fe}',
-        '\u{4ff}'), ('\u{4ff}', '\u{4fe}'), ('\u{500}', '\u{501}'), ('\u{501}',
-        '\u{500}'), ('\u{502}', '\u{503}'), ('\u{503}', '\u{502}'), ('\u{504}',
-        '\u{505}'), ('\u{505}', '\u{504}'), ('\u{506}', '\u{507}'), ('\u{507}',
-        '\u{506}'), ('\u{508}', '\u{509}'), ('\u{509}', '\u{508}'), ('\u{50a}',
-        '\u{50b}'), ('\u{50b}', '\u{50a}'), ('\u{50c}', '\u{50d}'), ('\u{50d}',
-        '\u{50c}'), ('\u{50e}', '\u{50f}'), ('\u{50f}', '\u{50e}'), ('\u{510}',
-        '\u{511}'), ('\u{511}', '\u{510}'), ('\u{512}', '\u{513}'), ('\u{513}',
-        '\u{512}'), ('\u{514}', '\u{515}'), ('\u{515}', '\u{514}'), ('\u{516}',
-        '\u{517}'), ('\u{517}', '\u{516}'), ('\u{518}', '\u{519}'), ('\u{519}',
-        '\u{518}'), ('\u{51a}', '\u{51b}'), ('\u{51b}', '\u{51a}'), ('\u{51c}',
-        '\u{51d}'), ('\u{51d}', '\u{51c}'), ('\u{51e}', '\u{51f}'), ('\u{51f}',
-        '\u{51e}'), ('\u{520}', '\u{521}'), ('\u{521}', '\u{520}'), ('\u{522}',
-        '\u{523}'), ('\u{523}', '\u{522}'), ('\u{524}', '\u{525}'), ('\u{525}',
-        '\u{524}'), ('\u{526}', '\u{527}'), ('\u{527}', '\u{526}'), ('\u{528}',
-        '\u{529}'), ('\u{529}', '\u{528}'), ('\u{52a}', '\u{52b}'), ('\u{52b}',
-        '\u{52a}'), ('\u{52c}', '\u{52d}'), ('\u{52d}', '\u{52c}'), ('\u{52e}',
-        '\u{52f}'), ('\u{52f}', '\u{52e}'), ('\u{531}', '\u{561}'), ('\u{532}',
-        '\u{562}'), ('\u{533}', '\u{563}'), ('\u{534}', '\u{564}'), ('\u{535}',
-        '\u{565}'), ('\u{536}', '\u{566}'), ('\u{537}', '\u{567}'), ('\u{538}',
-        '\u{568}'), ('\u{539}', '\u{569}'), ('\u{53a}', '\u{56a}'), ('\u{53b}',
-        '\u{56b}'), ('\u{53c}', '\u{56c}'), ('\u{53d}', '\u{56d}'), ('\u{53e}',
-        '\u{56e}'), ('\u{53f}', '\u{56f}'), ('\u{540}', '\u{570}'), ('\u{541}',
-        '\u{571}'), ('\u{542}', '\u{572}'), ('\u{543}', '\u{573}'), ('\u{544}',
-        '\u{574}'), ('\u{545}', '\u{575}'), ('\u{546}', '\u{576}'), ('\u{547}',
-        '\u{577}'), ('\u{548}', '\u{578}'), ('\u{549}', '\u{579}'), ('\u{54a}',
-        '\u{57a}'), ('\u{54b}', '\u{57b}'), ('\u{54c}', '\u{57c}'), ('\u{54d}',
-        '\u{57d}'), ('\u{54e}', '\u{57e}'), ('\u{54f}', '\u{57f}'), ('\u{550}',
-        '\u{580}'), ('\u{551}', '\u{581}'), ('\u{552}', '\u{582}'), ('\u{553}',
-        '\u{583}'), ('\u{554}', '\u{584}'), ('\u{555}', '\u{585}'), ('\u{556}',
-        '\u{586}'), ('\u{561}', '\u{531}'), ('\u{562}', '\u{532}'), ('\u{563}',
-        '\u{533}'), ('\u{564}', '\u{534}'), ('\u{565}', '\u{535}'), ('\u{566}',
-        '\u{536}'), ('\u{567}', '\u{537}'), ('\u{568}', '\u{538}'), ('\u{569}',
-        '\u{539}'), ('\u{56a}', '\u{53a}'), ('\u{56b}', '\u{53b}'), ('\u{56c}',
-        '\u{53c}'), ('\u{56d}', '\u{53d}'), ('\u{56e}', '\u{53e}'), ('\u{56f}',
-        '\u{53f}'), ('\u{570}', '\u{540}'), ('\u{571}', '\u{541}'), ('\u{572}',
-        '\u{542}'), ('\u{573}', '\u{543}'), ('\u{574}', '\u{544}'), ('\u{575}',
-        '\u{545}'), ('\u{576}', '\u{546}'), ('\u{577}', '\u{547}'), ('\u{578}',
-        '\u{548}'), ('\u{579}', '\u{549}'), ('\u{57a}', '\u{54a}'), ('\u{57b}',
-        '\u{54b}'), ('\u{57c}', '\u{54c}'), ('\u{57d}', '\u{54d}'), ('\u{57e}',
-        '\u{54e}'), ('\u{57f}', '\u{54f}'), ('\u{580}', '\u{550}'), ('\u{581}',
-        '\u{551}'), ('\u{582}', '\u{552}'), ('\u{583}', '\u{553}'), ('\u{584}',
-        '\u{554}'), ('\u{585}', '\u{555}'), ('\u{586}', '\u{556}'), ('\u{10a0}',
-        '\u{2d00}'), ('\u{10a1}', '\u{2d01}'), ('\u{10a2}', '\u{2d02}'),
-        ('\u{10a3}', '\u{2d03}'), ('\u{10a4}', '\u{2d04}'), ('\u{10a5}',
-        '\u{2d05}'), ('\u{10a6}', '\u{2d06}'), ('\u{10a7}', '\u{2d07}'),
-        ('\u{10a8}', '\u{2d08}'), ('\u{10a9}', '\u{2d09}'), ('\u{10aa}',
-        '\u{2d0a}'), ('\u{10ab}', '\u{2d0b}'), ('\u{10ac}', '\u{2d0c}'),
-        ('\u{10ad}', '\u{2d0d}'), ('\u{10ae}', '\u{2d0e}'), ('\u{10af}',
-        '\u{2d0f}'), ('\u{10b0}', '\u{2d10}'), ('\u{10b1}', '\u{2d11}'),
-        ('\u{10b2}', '\u{2d12}'), ('\u{10b3}', '\u{2d13}'), ('\u{10b4}',
-        '\u{2d14}'), ('\u{10b5}', '\u{2d15}'), ('\u{10b6}', '\u{2d16}'),
-        ('\u{10b7}', '\u{2d17}'), ('\u{10b8}', '\u{2d18}'), ('\u{10b9}',
-        '\u{2d19}'), ('\u{10ba}', '\u{2d1a}'), ('\u{10bb}', '\u{2d1b}'),
-        ('\u{10bc}', '\u{2d1c}'), ('\u{10bd}', '\u{2d1d}'), ('\u{10be}',
-        '\u{2d1e}'), ('\u{10bf}', '\u{2d1f}'), ('\u{10c0}', '\u{2d20}'),
-        ('\u{10c1}', '\u{2d21}'), ('\u{10c2}', '\u{2d22}'), ('\u{10c3}',
-        '\u{2d23}'), ('\u{10c4}', '\u{2d24}'), ('\u{10c5}', '\u{2d25}'),
-        ('\u{10c7}', '\u{2d27}'), ('\u{10cd}', '\u{2d2d}'), ('\u{13a0}',
-        '\u{ab70}'), ('\u{13a1}', '\u{ab71}'), ('\u{13a2}', '\u{ab72}'),
-        ('\u{13a3}', '\u{ab73}'), ('\u{13a4}', '\u{ab74}'), ('\u{13a5}',
-        '\u{ab75}'), ('\u{13a6}', '\u{ab76}'), ('\u{13a7}', '\u{ab77}'),
-        ('\u{13a8}', '\u{ab78}'), ('\u{13a9}', '\u{ab79}'), ('\u{13aa}',
-        '\u{ab7a}'), ('\u{13ab}', '\u{ab7b}'), ('\u{13ac}', '\u{ab7c}'),
-        ('\u{13ad}', '\u{ab7d}'), ('\u{13ae}', '\u{ab7e}'), ('\u{13af}',
-        '\u{ab7f}'), ('\u{13b0}', '\u{ab80}'), ('\u{13b1}', '\u{ab81}'),
-        ('\u{13b2}', '\u{ab82}'), ('\u{13b3}', '\u{ab83}'), ('\u{13b4}',
-        '\u{ab84}'), ('\u{13b5}', '\u{ab85}'), ('\u{13b6}', '\u{ab86}'),
-        ('\u{13b7}', '\u{ab87}'), ('\u{13b8}', '\u{ab88}'), ('\u{13b9}',
-        '\u{ab89}'), ('\u{13ba}', '\u{ab8a}'), ('\u{13bb}', '\u{ab8b}'),
-        ('\u{13bc}', '\u{ab8c}'), ('\u{13bd}', '\u{ab8d}'), ('\u{13be}',
-        '\u{ab8e}'), ('\u{13bf}', '\u{ab8f}'), ('\u{13c0}', '\u{ab90}'),
-        ('\u{13c1}', '\u{ab91}'), ('\u{13c2}', '\u{ab92}'), ('\u{13c3}',
-        '\u{ab93}'), ('\u{13c4}', '\u{ab94}'), ('\u{13c5}', '\u{ab95}'),
-        ('\u{13c6}', '\u{ab96}'), ('\u{13c7}', '\u{ab97}'), ('\u{13c8}',
-        '\u{ab98}'), ('\u{13c9}', '\u{ab99}'), ('\u{13ca}', '\u{ab9a}'),
-        ('\u{13cb}', '\u{ab9b}'), ('\u{13cc}', '\u{ab9c}'), ('\u{13cd}',
-        '\u{ab9d}'), ('\u{13ce}', '\u{ab9e}'), ('\u{13cf}', '\u{ab9f}'),
-        ('\u{13d0}', '\u{aba0}'), ('\u{13d1}', '\u{aba1}'), ('\u{13d2}',
-        '\u{aba2}'), ('\u{13d3}', '\u{aba3}'), ('\u{13d4}', '\u{aba4}'),
-        ('\u{13d5}', '\u{aba5}'), ('\u{13d6}', '\u{aba6}'), ('\u{13d7}',
-        '\u{aba7}'), ('\u{13d8}', '\u{aba8}'), ('\u{13d9}', '\u{aba9}'),
-        ('\u{13da}', '\u{abaa}'), ('\u{13db}', '\u{abab}'), ('\u{13dc}',
-        '\u{abac}'), ('\u{13dd}', '\u{abad}'), ('\u{13de}', '\u{abae}'),
-        ('\u{13df}', '\u{abaf}'), ('\u{13e0}', '\u{abb0}'), ('\u{13e1}',
-        '\u{abb1}'), ('\u{13e2}', '\u{abb2}'), ('\u{13e3}', '\u{abb3}'),
-        ('\u{13e4}', '\u{abb4}'), ('\u{13e5}', '\u{abb5}'), ('\u{13e6}',
-        '\u{abb6}'), ('\u{13e7}', '\u{abb7}'), ('\u{13e8}', '\u{abb8}'),
-        ('\u{13e9}', '\u{abb9}'), ('\u{13ea}', '\u{abba}'), ('\u{13eb}',
-        '\u{abbb}'), ('\u{13ec}', '\u{abbc}'), ('\u{13ed}', '\u{abbd}'),
-        ('\u{13ee}', '\u{abbe}'), ('\u{13ef}', '\u{abbf}'), ('\u{13f0}',
-        '\u{13f8}'), ('\u{13f1}', '\u{13f9}'), ('\u{13f2}', '\u{13fa}'),
-        ('\u{13f3}', '\u{13fb}'), ('\u{13f4}', '\u{13fc}'), ('\u{13f5}',
-        '\u{13fd}'), ('\u{13f8}', '\u{13f0}'), ('\u{13f9}', '\u{13f1}'),
-        ('\u{13fa}', '\u{13f2}'), ('\u{13fb}', '\u{13f3}'), ('\u{13fc}',
-        '\u{13f4}'), ('\u{13fd}', '\u{13f5}'), ('\u{1d79}', '\u{a77d}'),
-        ('\u{1d7d}', '\u{2c63}'), ('\u{1e00}', '\u{1e01}'), ('\u{1e01}',
-        '\u{1e00}'), ('\u{1e02}', '\u{1e03}'), ('\u{1e03}', '\u{1e02}'),
-        ('\u{1e04}', '\u{1e05}'), ('\u{1e05}', '\u{1e04}'), ('\u{1e06}',
-        '\u{1e07}'), ('\u{1e07}', '\u{1e06}'), ('\u{1e08}', '\u{1e09}'),
-        ('\u{1e09}', '\u{1e08}'), ('\u{1e0a}', '\u{1e0b}'), ('\u{1e0b}',
-        '\u{1e0a}'), ('\u{1e0c}', '\u{1e0d}'), ('\u{1e0d}', '\u{1e0c}'),
-        ('\u{1e0e}', '\u{1e0f}'), ('\u{1e0f}', '\u{1e0e}'), ('\u{1e10}',
-        '\u{1e11}'), ('\u{1e11}', '\u{1e10}'), ('\u{1e12}', '\u{1e13}'),
-        ('\u{1e13}', '\u{1e12}'), ('\u{1e14}', '\u{1e15}'), ('\u{1e15}',
-        '\u{1e14}'), ('\u{1e16}', '\u{1e17}'), ('\u{1e17}', '\u{1e16}'),
-        ('\u{1e18}', '\u{1e19}'), ('\u{1e19}', '\u{1e18}'), ('\u{1e1a}',
-        '\u{1e1b}'), ('\u{1e1b}', '\u{1e1a}'), ('\u{1e1c}', '\u{1e1d}'),
-        ('\u{1e1d}', '\u{1e1c}'), ('\u{1e1e}', '\u{1e1f}'), ('\u{1e1f}',
-        '\u{1e1e}'), ('\u{1e20}', '\u{1e21}'), ('\u{1e21}', '\u{1e20}'),
-        ('\u{1e22}', '\u{1e23}'), ('\u{1e23}', '\u{1e22}'), ('\u{1e24}',
-        '\u{1e25}'), ('\u{1e25}', '\u{1e24}'), ('\u{1e26}', '\u{1e27}'),
-        ('\u{1e27}', '\u{1e26}'), ('\u{1e28}', '\u{1e29}'), ('\u{1e29}',
-        '\u{1e28}'), ('\u{1e2a}', '\u{1e2b}'), ('\u{1e2b}', '\u{1e2a}'),
-        ('\u{1e2c}', '\u{1e2d}'), ('\u{1e2d}', '\u{1e2c}'), ('\u{1e2e}',
-        '\u{1e2f}'), ('\u{1e2f}', '\u{1e2e}'), ('\u{1e30}', '\u{1e31}'),
-        ('\u{1e31}', '\u{1e30}'), ('\u{1e32}', '\u{1e33}'), ('\u{1e33}',
-        '\u{1e32}'), ('\u{1e34}', '\u{1e35}'), ('\u{1e35}', '\u{1e34}'),
-        ('\u{1e36}', '\u{1e37}'), ('\u{1e37}', '\u{1e36}'), ('\u{1e38}',
-        '\u{1e39}'), ('\u{1e39}', '\u{1e38}'), ('\u{1e3a}', '\u{1e3b}'),
-        ('\u{1e3b}', '\u{1e3a}'), ('\u{1e3c}', '\u{1e3d}'), ('\u{1e3d}',
-        '\u{1e3c}'), ('\u{1e3e}', '\u{1e3f}'), ('\u{1e3f}', '\u{1e3e}'),
-        ('\u{1e40}', '\u{1e41}'), ('\u{1e41}', '\u{1e40}'), ('\u{1e42}',
-        '\u{1e43}'), ('\u{1e43}', '\u{1e42}'), ('\u{1e44}', '\u{1e45}'),
-        ('\u{1e45}', '\u{1e44}'), ('\u{1e46}', '\u{1e47}'), ('\u{1e47}',
-        '\u{1e46}'), ('\u{1e48}', '\u{1e49}'), ('\u{1e49}', '\u{1e48}'),
-        ('\u{1e4a}', '\u{1e4b}'), ('\u{1e4b}', '\u{1e4a}'), ('\u{1e4c}',
-        '\u{1e4d}'), ('\u{1e4d}', '\u{1e4c}'), ('\u{1e4e}', '\u{1e4f}'),
-        ('\u{1e4f}', '\u{1e4e}'), ('\u{1e50}', '\u{1e51}'), ('\u{1e51}',
-        '\u{1e50}'), ('\u{1e52}', '\u{1e53}'), ('\u{1e53}', '\u{1e52}'),
-        ('\u{1e54}', '\u{1e55}'), ('\u{1e55}', '\u{1e54}'), ('\u{1e56}',
-        '\u{1e57}'), ('\u{1e57}', '\u{1e56}'), ('\u{1e58}', '\u{1e59}'),
-        ('\u{1e59}', '\u{1e58}'), ('\u{1e5a}', '\u{1e5b}'), ('\u{1e5b}',
-        '\u{1e5a}'), ('\u{1e5c}', '\u{1e5d}'), ('\u{1e5d}', '\u{1e5c}'),
-        ('\u{1e5e}', '\u{1e5f}'), ('\u{1e5f}', '\u{1e5e}'), ('\u{1e60}',
-        '\u{1e61}'), ('\u{1e60}', '\u{1e9b}'), ('\u{1e61}', '\u{1e60}'),
-        ('\u{1e61}', '\u{1e9b}'), ('\u{1e62}', '\u{1e63}'), ('\u{1e63}',
-        '\u{1e62}'), ('\u{1e64}', '\u{1e65}'), ('\u{1e65}', '\u{1e64}'),
-        ('\u{1e66}', '\u{1e67}'), ('\u{1e67}', '\u{1e66}'), ('\u{1e68}',
-        '\u{1e69}'), ('\u{1e69}', '\u{1e68}'), ('\u{1e6a}', '\u{1e6b}'),
-        ('\u{1e6b}', '\u{1e6a}'), ('\u{1e6c}', '\u{1e6d}'), ('\u{1e6d}',
-        '\u{1e6c}'), ('\u{1e6e}', '\u{1e6f}'), ('\u{1e6f}', '\u{1e6e}'),
-        ('\u{1e70}', '\u{1e71}'), ('\u{1e71}', '\u{1e70}'), ('\u{1e72}',
-        '\u{1e73}'), ('\u{1e73}', '\u{1e72}'), ('\u{1e74}', '\u{1e75}'),
-        ('\u{1e75}', '\u{1e74}'), ('\u{1e76}', '\u{1e77}'), ('\u{1e77}',
-        '\u{1e76}'), ('\u{1e78}', '\u{1e79}'), ('\u{1e79}', '\u{1e78}'),
-        ('\u{1e7a}', '\u{1e7b}'), ('\u{1e7b}', '\u{1e7a}'), ('\u{1e7c}',
-        '\u{1e7d}'), ('\u{1e7d}', '\u{1e7c}'), ('\u{1e7e}', '\u{1e7f}'),
-        ('\u{1e7f}', '\u{1e7e}'), ('\u{1e80}', '\u{1e81}'), ('\u{1e81}',
-        '\u{1e80}'), ('\u{1e82}', '\u{1e83}'), ('\u{1e83}', '\u{1e82}'),
-        ('\u{1e84}', '\u{1e85}'), ('\u{1e85}', '\u{1e84}'), ('\u{1e86}',
-        '\u{1e87}'), ('\u{1e87}', '\u{1e86}'), ('\u{1e88}', '\u{1e89}'),
-        ('\u{1e89}', '\u{1e88}'), ('\u{1e8a}', '\u{1e8b}'), ('\u{1e8b}',
-        '\u{1e8a}'), ('\u{1e8c}', '\u{1e8d}'), ('\u{1e8d}', '\u{1e8c}'),
-        ('\u{1e8e}', '\u{1e8f}'), ('\u{1e8f}', '\u{1e8e}'), ('\u{1e90}',
-        '\u{1e91}'), ('\u{1e91}', '\u{1e90}'), ('\u{1e92}', '\u{1e93}'),
-        ('\u{1e93}', '\u{1e92}'), ('\u{1e94}', '\u{1e95}'), ('\u{1e95}',
-        '\u{1e94}'), ('\u{1e9b}', '\u{1e60}'), ('\u{1e9b}', '\u{1e61}'),
-        ('\u{1e9e}', '\u{df}'), ('\u{1ea0}', '\u{1ea1}'), ('\u{1ea1}',
-        '\u{1ea0}'), ('\u{1ea2}', '\u{1ea3}'), ('\u{1ea3}', '\u{1ea2}'),
-        ('\u{1ea4}', '\u{1ea5}'), ('\u{1ea5}', '\u{1ea4}'), ('\u{1ea6}',
-        '\u{1ea7}'), ('\u{1ea7}', '\u{1ea6}'), ('\u{1ea8}', '\u{1ea9}'),
-        ('\u{1ea9}', '\u{1ea8}'), ('\u{1eaa}', '\u{1eab}'), ('\u{1eab}',
-        '\u{1eaa}'), ('\u{1eac}', '\u{1ead}'), ('\u{1ead}', '\u{1eac}'),
-        ('\u{1eae}', '\u{1eaf}'), ('\u{1eaf}', '\u{1eae}'), ('\u{1eb0}',
-        '\u{1eb1}'), ('\u{1eb1}', '\u{1eb0}'), ('\u{1eb2}', '\u{1eb3}'),
-        ('\u{1eb3}', '\u{1eb2}'), ('\u{1eb4}', '\u{1eb5}'), ('\u{1eb5}',
-        '\u{1eb4}'), ('\u{1eb6}', '\u{1eb7}'), ('\u{1eb7}', '\u{1eb6}'),
-        ('\u{1eb8}', '\u{1eb9}'), ('\u{1eb9}', '\u{1eb8}'), ('\u{1eba}',
-        '\u{1ebb}'), ('\u{1ebb}', '\u{1eba}'), ('\u{1ebc}', '\u{1ebd}'),
-        ('\u{1ebd}', '\u{1ebc}'), ('\u{1ebe}', '\u{1ebf}'), ('\u{1ebf}',
-        '\u{1ebe}'), ('\u{1ec0}', '\u{1ec1}'), ('\u{1ec1}', '\u{1ec0}'),
-        ('\u{1ec2}', '\u{1ec3}'), ('\u{1ec3}', '\u{1ec2}'), ('\u{1ec4}',
-        '\u{1ec5}'), ('\u{1ec5}', '\u{1ec4}'), ('\u{1ec6}', '\u{1ec7}'),
-        ('\u{1ec7}', '\u{1ec6}'), ('\u{1ec8}', '\u{1ec9}'), ('\u{1ec9}',
-        '\u{1ec8}'), ('\u{1eca}', '\u{1ecb}'), ('\u{1ecb}', '\u{1eca}'),
-        ('\u{1ecc}', '\u{1ecd}'), ('\u{1ecd}', '\u{1ecc}'), ('\u{1ece}',
-        '\u{1ecf}'), ('\u{1ecf}', '\u{1ece}'), ('\u{1ed0}', '\u{1ed1}'),
-        ('\u{1ed1}', '\u{1ed0}'), ('\u{1ed2}', '\u{1ed3}'), ('\u{1ed3}',
-        '\u{1ed2}'), ('\u{1ed4}', '\u{1ed5}'), ('\u{1ed5}', '\u{1ed4}'),
-        ('\u{1ed6}', '\u{1ed7}'), ('\u{1ed7}', '\u{1ed6}'), ('\u{1ed8}',
-        '\u{1ed9}'), ('\u{1ed9}', '\u{1ed8}'), ('\u{1eda}', '\u{1edb}'),
-        ('\u{1edb}', '\u{1eda}'), ('\u{1edc}', '\u{1edd}'), ('\u{1edd}',
-        '\u{1edc}'), ('\u{1ede}', '\u{1edf}'), ('\u{1edf}', '\u{1ede}'),
-        ('\u{1ee0}', '\u{1ee1}'), ('\u{1ee1}', '\u{1ee0}'), ('\u{1ee2}',
-        '\u{1ee3}'), ('\u{1ee3}', '\u{1ee2}'), ('\u{1ee4}', '\u{1ee5}'),
-        ('\u{1ee5}', '\u{1ee4}'), ('\u{1ee6}', '\u{1ee7}'), ('\u{1ee7}',
-        '\u{1ee6}'), ('\u{1ee8}', '\u{1ee9}'), ('\u{1ee9}', '\u{1ee8}'),
-        ('\u{1eea}', '\u{1eeb}'), ('\u{1eeb}', '\u{1eea}'), ('\u{1eec}',
-        '\u{1eed}'), ('\u{1eed}', '\u{1eec}'), ('\u{1eee}', '\u{1eef}'),
-        ('\u{1eef}', '\u{1eee}'), ('\u{1ef0}', '\u{1ef1}'), ('\u{1ef1}',
-        '\u{1ef0}'), ('\u{1ef2}', '\u{1ef3}'), ('\u{1ef3}', '\u{1ef2}'),
-        ('\u{1ef4}', '\u{1ef5}'), ('\u{1ef5}', '\u{1ef4}'), ('\u{1ef6}',
-        '\u{1ef7}'), ('\u{1ef7}', '\u{1ef6}'), ('\u{1ef8}', '\u{1ef9}'),
-        ('\u{1ef9}', '\u{1ef8}'), ('\u{1efa}', '\u{1efb}'), ('\u{1efb}',
-        '\u{1efa}'), ('\u{1efc}', '\u{1efd}'), ('\u{1efd}', '\u{1efc}'),
-        ('\u{1efe}', '\u{1eff}'), ('\u{1eff}', '\u{1efe}'), ('\u{1f00}',
-        '\u{1f08}'), ('\u{1f01}', '\u{1f09}'), ('\u{1f02}', '\u{1f0a}'),
-        ('\u{1f03}', '\u{1f0b}'), ('\u{1f04}', '\u{1f0c}'), ('\u{1f05}',
-        '\u{1f0d}'), ('\u{1f06}', '\u{1f0e}'), ('\u{1f07}', '\u{1f0f}'),
-        ('\u{1f08}', '\u{1f00}'), ('\u{1f09}', '\u{1f01}'), ('\u{1f0a}',
-        '\u{1f02}'), ('\u{1f0b}', '\u{1f03}'), ('\u{1f0c}', '\u{1f04}'),
-        ('\u{1f0d}', '\u{1f05}'), ('\u{1f0e}', '\u{1f06}'), ('\u{1f0f}',
-        '\u{1f07}'), ('\u{1f10}', '\u{1f18}'), ('\u{1f11}', '\u{1f19}'),
-        ('\u{1f12}', '\u{1f1a}'), ('\u{1f13}', '\u{1f1b}'), ('\u{1f14}',
-        '\u{1f1c}'), ('\u{1f15}', '\u{1f1d}'), ('\u{1f18}', '\u{1f10}'),
-        ('\u{1f19}', '\u{1f11}'), ('\u{1f1a}', '\u{1f12}'), ('\u{1f1b}',
-        '\u{1f13}'), ('\u{1f1c}', '\u{1f14}'), ('\u{1f1d}', '\u{1f15}'),
-        ('\u{1f20}', '\u{1f28}'), ('\u{1f21}', '\u{1f29}'), ('\u{1f22}',
-        '\u{1f2a}'), ('\u{1f23}', '\u{1f2b}'), ('\u{1f24}', '\u{1f2c}'),
-        ('\u{1f25}', '\u{1f2d}'), ('\u{1f26}', '\u{1f2e}'), ('\u{1f27}',
-        '\u{1f2f}'), ('\u{1f28}', '\u{1f20}'), ('\u{1f29}', '\u{1f21}'),
-        ('\u{1f2a}', '\u{1f22}'), ('\u{1f2b}', '\u{1f23}'), ('\u{1f2c}',
-        '\u{1f24}'), ('\u{1f2d}', '\u{1f25}'), ('\u{1f2e}', '\u{1f26}'),
-        ('\u{1f2f}', '\u{1f27}'), ('\u{1f30}', '\u{1f38}'), ('\u{1f31}',
-        '\u{1f39}'), ('\u{1f32}', '\u{1f3a}'), ('\u{1f33}', '\u{1f3b}'),
-        ('\u{1f34}', '\u{1f3c}'), ('\u{1f35}', '\u{1f3d}'), ('\u{1f36}',
-        '\u{1f3e}'), ('\u{1f37}', '\u{1f3f}'), ('\u{1f38}', '\u{1f30}'),
-        ('\u{1f39}', '\u{1f31}'), ('\u{1f3a}', '\u{1f32}'), ('\u{1f3b}',
-        '\u{1f33}'), ('\u{1f3c}', '\u{1f34}'), ('\u{1f3d}', '\u{1f35}'),
-        ('\u{1f3e}', '\u{1f36}'), ('\u{1f3f}', '\u{1f37}'), ('\u{1f40}',
-        '\u{1f48}'), ('\u{1f41}', '\u{1f49}'), ('\u{1f42}', '\u{1f4a}'),
-        ('\u{1f43}', '\u{1f4b}'), ('\u{1f44}', '\u{1f4c}'), ('\u{1f45}',
-        '\u{1f4d}'), ('\u{1f48}', '\u{1f40}'), ('\u{1f49}', '\u{1f41}'),
-        ('\u{1f4a}', '\u{1f42}'), ('\u{1f4b}', '\u{1f43}'), ('\u{1f4c}',
-        '\u{1f44}'), ('\u{1f4d}', '\u{1f45}'), ('\u{1f51}', '\u{1f59}'),
-        ('\u{1f53}', '\u{1f5b}'), ('\u{1f55}', '\u{1f5d}'), ('\u{1f57}',
-        '\u{1f5f}'), ('\u{1f59}', '\u{1f51}'), ('\u{1f5b}', '\u{1f53}'),
-        ('\u{1f5d}', '\u{1f55}'), ('\u{1f5f}', '\u{1f57}'), ('\u{1f60}',
-        '\u{1f68}'), ('\u{1f61}', '\u{1f69}'), ('\u{1f62}', '\u{1f6a}'),
-        ('\u{1f63}', '\u{1f6b}'), ('\u{1f64}', '\u{1f6c}'), ('\u{1f65}',
-        '\u{1f6d}'), ('\u{1f66}', '\u{1f6e}'), ('\u{1f67}', '\u{1f6f}'),
-        ('\u{1f68}', '\u{1f60}'), ('\u{1f69}', '\u{1f61}'), ('\u{1f6a}',
-        '\u{1f62}'), ('\u{1f6b}', '\u{1f63}'), ('\u{1f6c}', '\u{1f64}'),
-        ('\u{1f6d}', '\u{1f65}'), ('\u{1f6e}', '\u{1f66}'), ('\u{1f6f}',
-        '\u{1f67}'), ('\u{1f70}', '\u{1fba}'), ('\u{1f71}', '\u{1fbb}'),
-        ('\u{1f72}', '\u{1fc8}'), ('\u{1f73}', '\u{1fc9}'), ('\u{1f74}',
-        '\u{1fca}'), ('\u{1f75}', '\u{1fcb}'), ('\u{1f76}', '\u{1fda}'),
-        ('\u{1f77}', '\u{1fdb}'), ('\u{1f78}', '\u{1ff8}'), ('\u{1f79}',
-        '\u{1ff9}'), ('\u{1f7a}', '\u{1fea}'), ('\u{1f7b}', '\u{1feb}'),
-        ('\u{1f7c}', '\u{1ffa}'), ('\u{1f7d}', '\u{1ffb}'), ('\u{1f80}',
-        '\u{1f88}'), ('\u{1f81}', '\u{1f89}'), ('\u{1f82}', '\u{1f8a}'),
-        ('\u{1f83}', '\u{1f8b}'), ('\u{1f84}', '\u{1f8c}'), ('\u{1f85}',
-        '\u{1f8d}'), ('\u{1f86}', '\u{1f8e}'), ('\u{1f87}', '\u{1f8f}'),
-        ('\u{1f88}', '\u{1f80}'), ('\u{1f89}', '\u{1f81}'), ('\u{1f8a}',
-        '\u{1f82}'), ('\u{1f8b}', '\u{1f83}'), ('\u{1f8c}', '\u{1f84}'),
-        ('\u{1f8d}', '\u{1f85}'), ('\u{1f8e}', '\u{1f86}'), ('\u{1f8f}',
-        '\u{1f87}'), ('\u{1f90}', '\u{1f98}'), ('\u{1f91}', '\u{1f99}'),
-        ('\u{1f92}', '\u{1f9a}'), ('\u{1f93}', '\u{1f9b}'), ('\u{1f94}',
-        '\u{1f9c}'), ('\u{1f95}', '\u{1f9d}'), ('\u{1f96}', '\u{1f9e}'),
-        ('\u{1f97}', '\u{1f9f}'), ('\u{1f98}', '\u{1f90}'), ('\u{1f99}',
-        '\u{1f91}'), ('\u{1f9a}', '\u{1f92}'), ('\u{1f9b}', '\u{1f93}'),
-        ('\u{1f9c}', '\u{1f94}'), ('\u{1f9d}', '\u{1f95}'), ('\u{1f9e}',
-        '\u{1f96}'), ('\u{1f9f}', '\u{1f97}'), ('\u{1fa0}', '\u{1fa8}'),
-        ('\u{1fa1}', '\u{1fa9}'), ('\u{1fa2}', '\u{1faa}'), ('\u{1fa3}',
-        '\u{1fab}'), ('\u{1fa4}', '\u{1fac}'), ('\u{1fa5}', '\u{1fad}'),
-        ('\u{1fa6}', '\u{1fae}'), ('\u{1fa7}', '\u{1faf}'), ('\u{1fa8}',
-        '\u{1fa0}'), ('\u{1fa9}', '\u{1fa1}'), ('\u{1faa}', '\u{1fa2}'),
-        ('\u{1fab}', '\u{1fa3}'), ('\u{1fac}', '\u{1fa4}'), ('\u{1fad}',
-        '\u{1fa5}'), ('\u{1fae}', '\u{1fa6}'), ('\u{1faf}', '\u{1fa7}'),
-        ('\u{1fb0}', '\u{1fb8}'), ('\u{1fb1}', '\u{1fb9}'), ('\u{1fb3}',
-        '\u{1fbc}'), ('\u{1fb8}', '\u{1fb0}'), ('\u{1fb9}', '\u{1fb1}'),
-        ('\u{1fba}', '\u{1f70}'), ('\u{1fbb}', '\u{1f71}'), ('\u{1fbc}',
-        '\u{1fb3}'), ('\u{1fbe}', '\u{345}'), ('\u{1fbe}', '\u{399}'),
-        ('\u{1fbe}', '\u{3b9}'), ('\u{1fc3}', '\u{1fcc}'), ('\u{1fc8}',
-        '\u{1f72}'), ('\u{1fc9}', '\u{1f73}'), ('\u{1fca}', '\u{1f74}'),
-        ('\u{1fcb}', '\u{1f75}'), ('\u{1fcc}', '\u{1fc3}'), ('\u{1fd0}',
-        '\u{1fd8}'), ('\u{1fd1}', '\u{1fd9}'), ('\u{1fd8}', '\u{1fd0}'),
-        ('\u{1fd9}', '\u{1fd1}'), ('\u{1fda}', '\u{1f76}'), ('\u{1fdb}',
-        '\u{1f77}'), ('\u{1fe0}', '\u{1fe8}'), ('\u{1fe1}', '\u{1fe9}'),
-        ('\u{1fe5}', '\u{1fec}'), ('\u{1fe8}', '\u{1fe0}'), ('\u{1fe9}',
-        '\u{1fe1}'), ('\u{1fea}', '\u{1f7a}'), ('\u{1feb}', '\u{1f7b}'),
-        ('\u{1fec}', '\u{1fe5}'), ('\u{1ff3}', '\u{1ffc}'), ('\u{1ff8}',
-        '\u{1f78}'), ('\u{1ff9}', '\u{1f79}'), ('\u{1ffa}', '\u{1f7c}'),
-        ('\u{1ffb}', '\u{1f7d}'), ('\u{1ffc}', '\u{1ff3}'), ('\u{2126}',
-        '\u{3a9}'), ('\u{2126}', '\u{3c9}'), ('\u{212a}', '\u{4b}'),
-        ('\u{212a}', '\u{6b}'), ('\u{212b}', '\u{c5}'), ('\u{212b}', '\u{e5}'),
-        ('\u{2132}', '\u{214e}'), ('\u{214e}', '\u{2132}'), ('\u{2160}',
-        '\u{2170}'), ('\u{2161}', '\u{2171}'), ('\u{2162}', '\u{2172}'),
-        ('\u{2163}', '\u{2173}'), ('\u{2164}', '\u{2174}'), ('\u{2165}',
-        '\u{2175}'), ('\u{2166}', '\u{2176}'), ('\u{2167}', '\u{2177}'),
-        ('\u{2168}', '\u{2178}'), ('\u{2169}', '\u{2179}'), ('\u{216a}',
-        '\u{217a}'), ('\u{216b}', '\u{217b}'), ('\u{216c}', '\u{217c}'),
-        ('\u{216d}', '\u{217d}'), ('\u{216e}', '\u{217e}'), ('\u{216f}',
-        '\u{217f}'), ('\u{2170}', '\u{2160}'), ('\u{2171}', '\u{2161}'),
-        ('\u{2172}', '\u{2162}'), ('\u{2173}', '\u{2163}'), ('\u{2174}',
-        '\u{2164}'), ('\u{2175}', '\u{2165}'), ('\u{2176}', '\u{2166}'),
-        ('\u{2177}', '\u{2167}'), ('\u{2178}', '\u{2168}'), ('\u{2179}',
-        '\u{2169}'), ('\u{217a}', '\u{216a}'), ('\u{217b}', '\u{216b}'),
-        ('\u{217c}', '\u{216c}'), ('\u{217d}', '\u{216d}'), ('\u{217e}',
-        '\u{216e}'), ('\u{217f}', '\u{216f}'), ('\u{2183}', '\u{2184}'),
-        ('\u{2184}', '\u{2183}'), ('\u{24b6}', '\u{24d0}'), ('\u{24b7}',
-        '\u{24d1}'), ('\u{24b8}', '\u{24d2}'), ('\u{24b9}', '\u{24d3}'),
-        ('\u{24ba}', '\u{24d4}'), ('\u{24bb}', '\u{24d5}'), ('\u{24bc}',
-        '\u{24d6}'), ('\u{24bd}', '\u{24d7}'), ('\u{24be}', '\u{24d8}'),
-        ('\u{24bf}', '\u{24d9}'), ('\u{24c0}', '\u{24da}'), ('\u{24c1}',
-        '\u{24db}'), ('\u{24c2}', '\u{24dc}'), ('\u{24c3}', '\u{24dd}'),
-        ('\u{24c4}', '\u{24de}'), ('\u{24c5}', '\u{24df}'), ('\u{24c6}',
-        '\u{24e0}'), ('\u{24c7}', '\u{24e1}'), ('\u{24c8}', '\u{24e2}'),
-        ('\u{24c9}', '\u{24e3}'), ('\u{24ca}', '\u{24e4}'), ('\u{24cb}',
-        '\u{24e5}'), ('\u{24cc}', '\u{24e6}'), ('\u{24cd}', '\u{24e7}'),
-        ('\u{24ce}', '\u{24e8}'), ('\u{24cf}', '\u{24e9}'), ('\u{24d0}',
-        '\u{24b6}'), ('\u{24d1}', '\u{24b7}'), ('\u{24d2}', '\u{24b8}'),
-        ('\u{24d3}', '\u{24b9}'), ('\u{24d4}', '\u{24ba}'), ('\u{24d5}',
-        '\u{24bb}'), ('\u{24d6}', '\u{24bc}'), ('\u{24d7}', '\u{24bd}'),
-        ('\u{24d8}', '\u{24be}'), ('\u{24d9}', '\u{24bf}'), ('\u{24da}',
-        '\u{24c0}'), ('\u{24db}', '\u{24c1}'), ('\u{24dc}', '\u{24c2}'),
-        ('\u{24dd}', '\u{24c3}'), ('\u{24de}', '\u{24c4}'), ('\u{24df}',
-        '\u{24c5}'), ('\u{24e0}', '\u{24c6}'), ('\u{24e1}', '\u{24c7}'),
-        ('\u{24e2}', '\u{24c8}'), ('\u{24e3}', '\u{24c9}'), ('\u{24e4}',
-        '\u{24ca}'), ('\u{24e5}', '\u{24cb}'), ('\u{24e6}', '\u{24cc}'),
-        ('\u{24e7}', '\u{24cd}'), ('\u{24e8}', '\u{24ce}'), ('\u{24e9}',
-        '\u{24cf}'), ('\u{2c00}', '\u{2c30}'), ('\u{2c01}', '\u{2c31}'),
-        ('\u{2c02}', '\u{2c32}'), ('\u{2c03}', '\u{2c33}'), ('\u{2c04}',
-        '\u{2c34}'), ('\u{2c05}', '\u{2c35}'), ('\u{2c06}', '\u{2c36}'),
-        ('\u{2c07}', '\u{2c37}'), ('\u{2c08}', '\u{2c38}'), ('\u{2c09}',
-        '\u{2c39}'), ('\u{2c0a}', '\u{2c3a}'), ('\u{2c0b}', '\u{2c3b}'),
-        ('\u{2c0c}', '\u{2c3c}'), ('\u{2c0d}', '\u{2c3d}'), ('\u{2c0e}',
-        '\u{2c3e}'), ('\u{2c0f}', '\u{2c3f}'), ('\u{2c10}', '\u{2c40}'),
-        ('\u{2c11}', '\u{2c41}'), ('\u{2c12}', '\u{2c42}'), ('\u{2c13}',
-        '\u{2c43}'), ('\u{2c14}', '\u{2c44}'), ('\u{2c15}', '\u{2c45}'),
-        ('\u{2c16}', '\u{2c46}'), ('\u{2c17}', '\u{2c47}'), ('\u{2c18}',
-        '\u{2c48}'), ('\u{2c19}', '\u{2c49}'), ('\u{2c1a}', '\u{2c4a}'),
-        ('\u{2c1b}', '\u{2c4b}'), ('\u{2c1c}', '\u{2c4c}'), ('\u{2c1d}',
-        '\u{2c4d}'), ('\u{2c1e}', '\u{2c4e}'), ('\u{2c1f}', '\u{2c4f}'),
-        ('\u{2c20}', '\u{2c50}'), ('\u{2c21}', '\u{2c51}'), ('\u{2c22}',
-        '\u{2c52}'), ('\u{2c23}', '\u{2c53}'), ('\u{2c24}', '\u{2c54}'),
-        ('\u{2c25}', '\u{2c55}'), ('\u{2c26}', '\u{2c56}'), ('\u{2c27}',
-        '\u{2c57}'), ('\u{2c28}', '\u{2c58}'), ('\u{2c29}', '\u{2c59}'),
-        ('\u{2c2a}', '\u{2c5a}'), ('\u{2c2b}', '\u{2c5b}'), ('\u{2c2c}',
-        '\u{2c5c}'), ('\u{2c2d}', '\u{2c5d}'), ('\u{2c2e}', '\u{2c5e}'),
-        ('\u{2c30}', '\u{2c00}'), ('\u{2c31}', '\u{2c01}'), ('\u{2c32}',
-        '\u{2c02}'), ('\u{2c33}', '\u{2c03}'), ('\u{2c34}', '\u{2c04}'),
-        ('\u{2c35}', '\u{2c05}'), ('\u{2c36}', '\u{2c06}'), ('\u{2c37}',
-        '\u{2c07}'), ('\u{2c38}', '\u{2c08}'), ('\u{2c39}', '\u{2c09}'),
-        ('\u{2c3a}', '\u{2c0a}'), ('\u{2c3b}', '\u{2c0b}'), ('\u{2c3c}',
-        '\u{2c0c}'), ('\u{2c3d}', '\u{2c0d}'), ('\u{2c3e}', '\u{2c0e}'),
-        ('\u{2c3f}', '\u{2c0f}'), ('\u{2c40}', '\u{2c10}'), ('\u{2c41}',
-        '\u{2c11}'), ('\u{2c42}', '\u{2c12}'), ('\u{2c43}', '\u{2c13}'),
-        ('\u{2c44}', '\u{2c14}'), ('\u{2c45}', '\u{2c15}'), ('\u{2c46}',
-        '\u{2c16}'), ('\u{2c47}', '\u{2c17}'), ('\u{2c48}', '\u{2c18}'),
-        ('\u{2c49}', '\u{2c19}'), ('\u{2c4a}', '\u{2c1a}'), ('\u{2c4b}',
-        '\u{2c1b}'), ('\u{2c4c}', '\u{2c1c}'), ('\u{2c4d}', '\u{2c1d}'),
-        ('\u{2c4e}', '\u{2c1e}'), ('\u{2c4f}', '\u{2c1f}'), ('\u{2c50}',
-        '\u{2c20}'), ('\u{2c51}', '\u{2c21}'), ('\u{2c52}', '\u{2c22}'),
-        ('\u{2c53}', '\u{2c23}'), ('\u{2c54}', '\u{2c24}'), ('\u{2c55}',
-        '\u{2c25}'), ('\u{2c56}', '\u{2c26}'), ('\u{2c57}', '\u{2c27}'),
-        ('\u{2c58}', '\u{2c28}'), ('\u{2c59}', '\u{2c29}'), ('\u{2c5a}',
-        '\u{2c2a}'), ('\u{2c5b}', '\u{2c2b}'), ('\u{2c5c}', '\u{2c2c}'),
-        ('\u{2c5d}', '\u{2c2d}'), ('\u{2c5e}', '\u{2c2e}'), ('\u{2c60}',
-        '\u{2c61}'), ('\u{2c61}', '\u{2c60}'), ('\u{2c62}', '\u{26b}'),
-        ('\u{2c63}', '\u{1d7d}'), ('\u{2c64}', '\u{27d}'), ('\u{2c65}',
-        '\u{23a}'), ('\u{2c66}', '\u{23e}'), ('\u{2c67}', '\u{2c68}'),
-        ('\u{2c68}', '\u{2c67}'), ('\u{2c69}', '\u{2c6a}'), ('\u{2c6a}',
-        '\u{2c69}'), ('\u{2c6b}', '\u{2c6c}'), ('\u{2c6c}', '\u{2c6b}'),
-        ('\u{2c6d}', '\u{251}'), ('\u{2c6e}', '\u{271}'), ('\u{2c6f}',
-        '\u{250}'), ('\u{2c70}', '\u{252}'), ('\u{2c72}', '\u{2c73}'),
-        ('\u{2c73}', '\u{2c72}'), ('\u{2c75}', '\u{2c76}'), ('\u{2c76}',
-        '\u{2c75}'), ('\u{2c7e}', '\u{23f}'), ('\u{2c7f}', '\u{240}'),
-        ('\u{2c80}', '\u{2c81}'), ('\u{2c81}', '\u{2c80}'), ('\u{2c82}',
-        '\u{2c83}'), ('\u{2c83}', '\u{2c82}'), ('\u{2c84}', '\u{2c85}'),
-        ('\u{2c85}', '\u{2c84}'), ('\u{2c86}', '\u{2c87}'), ('\u{2c87}',
-        '\u{2c86}'), ('\u{2c88}', '\u{2c89}'), ('\u{2c89}', '\u{2c88}'),
-        ('\u{2c8a}', '\u{2c8b}'), ('\u{2c8b}', '\u{2c8a}'), ('\u{2c8c}',
-        '\u{2c8d}'), ('\u{2c8d}', '\u{2c8c}'), ('\u{2c8e}', '\u{2c8f}'),
-        ('\u{2c8f}', '\u{2c8e}'), ('\u{2c90}', '\u{2c91}'), ('\u{2c91}',
-        '\u{2c90}'), ('\u{2c92}', '\u{2c93}'), ('\u{2c93}', '\u{2c92}'),
-        ('\u{2c94}', '\u{2c95}'), ('\u{2c95}', '\u{2c94}'), ('\u{2c96}',
-        '\u{2c97}'), ('\u{2c97}', '\u{2c96}'), ('\u{2c98}', '\u{2c99}'),
-        ('\u{2c99}', '\u{2c98}'), ('\u{2c9a}', '\u{2c9b}'), ('\u{2c9b}',
-        '\u{2c9a}'), ('\u{2c9c}', '\u{2c9d}'), ('\u{2c9d}', '\u{2c9c}'),
-        ('\u{2c9e}', '\u{2c9f}'), ('\u{2c9f}', '\u{2c9e}'), ('\u{2ca0}',
-        '\u{2ca1}'), ('\u{2ca1}', '\u{2ca0}'), ('\u{2ca2}', '\u{2ca3}'),
-        ('\u{2ca3}', '\u{2ca2}'), ('\u{2ca4}', '\u{2ca5}'), ('\u{2ca5}',
-        '\u{2ca4}'), ('\u{2ca6}', '\u{2ca7}'), ('\u{2ca7}', '\u{2ca6}'),
-        ('\u{2ca8}', '\u{2ca9}'), ('\u{2ca9}', '\u{2ca8}'), ('\u{2caa}',
-        '\u{2cab}'), ('\u{2cab}', '\u{2caa}'), ('\u{2cac}', '\u{2cad}'),
-        ('\u{2cad}', '\u{2cac}'), ('\u{2cae}', '\u{2caf}'), ('\u{2caf}',
-        '\u{2cae}'), ('\u{2cb0}', '\u{2cb1}'), ('\u{2cb1}', '\u{2cb0}'),
-        ('\u{2cb2}', '\u{2cb3}'), ('\u{2cb3}', '\u{2cb2}'), ('\u{2cb4}',
-        '\u{2cb5}'), ('\u{2cb5}', '\u{2cb4}'), ('\u{2cb6}', '\u{2cb7}'),
-        ('\u{2cb7}', '\u{2cb6}'), ('\u{2cb8}', '\u{2cb9}'), ('\u{2cb9}',
-        '\u{2cb8}'), ('\u{2cba}', '\u{2cbb}'), ('\u{2cbb}', '\u{2cba}'),
-        ('\u{2cbc}', '\u{2cbd}'), ('\u{2cbd}', '\u{2cbc}'), ('\u{2cbe}',
-        '\u{2cbf}'), ('\u{2cbf}', '\u{2cbe}'), ('\u{2cc0}', '\u{2cc1}'),
-        ('\u{2cc1}', '\u{2cc0}'), ('\u{2cc2}', '\u{2cc3}'), ('\u{2cc3}',
-        '\u{2cc2}'), ('\u{2cc4}', '\u{2cc5}'), ('\u{2cc5}', '\u{2cc4}'),
-        ('\u{2cc6}', '\u{2cc7}'), ('\u{2cc7}', '\u{2cc6}'), ('\u{2cc8}',
-        '\u{2cc9}'), ('\u{2cc9}', '\u{2cc8}'), ('\u{2cca}', '\u{2ccb}'),
-        ('\u{2ccb}', '\u{2cca}'), ('\u{2ccc}', '\u{2ccd}'), ('\u{2ccd}',
-        '\u{2ccc}'), ('\u{2cce}', '\u{2ccf}'), ('\u{2ccf}', '\u{2cce}'),
-        ('\u{2cd0}', '\u{2cd1}'), ('\u{2cd1}', '\u{2cd0}'), ('\u{2cd2}',
-        '\u{2cd3}'), ('\u{2cd3}', '\u{2cd2}'), ('\u{2cd4}', '\u{2cd5}'),
-        ('\u{2cd5}', '\u{2cd4}'), ('\u{2cd6}', '\u{2cd7}'), ('\u{2cd7}',
-        '\u{2cd6}'), ('\u{2cd8}', '\u{2cd9}'), ('\u{2cd9}', '\u{2cd8}'),
-        ('\u{2cda}', '\u{2cdb}'), ('\u{2cdb}', '\u{2cda}'), ('\u{2cdc}',
-        '\u{2cdd}'), ('\u{2cdd}', '\u{2cdc}'), ('\u{2cde}', '\u{2cdf}'),
-        ('\u{2cdf}', '\u{2cde}'), ('\u{2ce0}', '\u{2ce1}'), ('\u{2ce1}',
-        '\u{2ce0}'), ('\u{2ce2}', '\u{2ce3}'), ('\u{2ce3}', '\u{2ce2}'),
-        ('\u{2ceb}', '\u{2cec}'), ('\u{2cec}', '\u{2ceb}'), ('\u{2ced}',
-        '\u{2cee}'), ('\u{2cee}', '\u{2ced}'), ('\u{2cf2}', '\u{2cf3}'),
-        ('\u{2cf3}', '\u{2cf2}'), ('\u{2d00}', '\u{10a0}'), ('\u{2d01}',
-        '\u{10a1}'), ('\u{2d02}', '\u{10a2}'), ('\u{2d03}', '\u{10a3}'),
-        ('\u{2d04}', '\u{10a4}'), ('\u{2d05}', '\u{10a5}'), ('\u{2d06}',
-        '\u{10a6}'), ('\u{2d07}', '\u{10a7}'), ('\u{2d08}', '\u{10a8}'),
-        ('\u{2d09}', '\u{10a9}'), ('\u{2d0a}', '\u{10aa}'), ('\u{2d0b}',
-        '\u{10ab}'), ('\u{2d0c}', '\u{10ac}'), ('\u{2d0d}', '\u{10ad}'),
-        ('\u{2d0e}', '\u{10ae}'), ('\u{2d0f}', '\u{10af}'), ('\u{2d10}',
-        '\u{10b0}'), ('\u{2d11}', '\u{10b1}'), ('\u{2d12}', '\u{10b2}'),
-        ('\u{2d13}', '\u{10b3}'), ('\u{2d14}', '\u{10b4}'), ('\u{2d15}',
-        '\u{10b5}'), ('\u{2d16}', '\u{10b6}'), ('\u{2d17}', '\u{10b7}'),
-        ('\u{2d18}', '\u{10b8}'), ('\u{2d19}', '\u{10b9}'), ('\u{2d1a}',
-        '\u{10ba}'), ('\u{2d1b}', '\u{10bb}'), ('\u{2d1c}', '\u{10bc}'),
-        ('\u{2d1d}', '\u{10bd}'), ('\u{2d1e}', '\u{10be}'), ('\u{2d1f}',
-        '\u{10bf}'), ('\u{2d20}', '\u{10c0}'), ('\u{2d21}', '\u{10c1}'),
-        ('\u{2d22}', '\u{10c2}'), ('\u{2d23}', '\u{10c3}'), ('\u{2d24}',
-        '\u{10c4}'), ('\u{2d25}', '\u{10c5}'), ('\u{2d27}', '\u{10c7}'),
-        ('\u{2d2d}', '\u{10cd}'), ('\u{a640}', '\u{a641}'), ('\u{a641}',
-        '\u{a640}'), ('\u{a642}', '\u{a643}'), ('\u{a643}', '\u{a642}'),
-        ('\u{a644}', '\u{a645}'), ('\u{a645}', '\u{a644}'), ('\u{a646}',
-        '\u{a647}'), ('\u{a647}', '\u{a646}'), ('\u{a648}', '\u{a649}'),
-        ('\u{a649}', '\u{a648}'), ('\u{a64a}', '\u{a64b}'), ('\u{a64b}',
-        '\u{a64a}'), ('\u{a64c}', '\u{a64d}'), ('\u{a64d}', '\u{a64c}'),
-        ('\u{a64e}', '\u{a64f}'), ('\u{a64f}', '\u{a64e}'), ('\u{a650}',
-        '\u{a651}'), ('\u{a651}', '\u{a650}'), ('\u{a652}', '\u{a653}'),
-        ('\u{a653}', '\u{a652}'), ('\u{a654}', '\u{a655}'), ('\u{a655}',
-        '\u{a654}'), ('\u{a656}', '\u{a657}'), ('\u{a657}', '\u{a656}'),
-        ('\u{a658}', '\u{a659}'), ('\u{a659}', '\u{a658}'), ('\u{a65a}',
-        '\u{a65b}'), ('\u{a65b}', '\u{a65a}'), ('\u{a65c}', '\u{a65d}'),
-        ('\u{a65d}', '\u{a65c}'), ('\u{a65e}', '\u{a65f}'), ('\u{a65f}',
-        '\u{a65e}'), ('\u{a660}', '\u{a661}'), ('\u{a661}', '\u{a660}'),
-        ('\u{a662}', '\u{a663}'), ('\u{a663}', '\u{a662}'), ('\u{a664}',
-        '\u{a665}'), ('\u{a665}', '\u{a664}'), ('\u{a666}', '\u{a667}'),
-        ('\u{a667}', '\u{a666}'), ('\u{a668}', '\u{a669}'), ('\u{a669}',
-        '\u{a668}'), ('\u{a66a}', '\u{a66b}'), ('\u{a66b}', '\u{a66a}'),
-        ('\u{a66c}', '\u{a66d}'), ('\u{a66d}', '\u{a66c}'), ('\u{a680}',
-        '\u{a681}'), ('\u{a681}', '\u{a680}'), ('\u{a682}', '\u{a683}'),
-        ('\u{a683}', '\u{a682}'), ('\u{a684}', '\u{a685}'), ('\u{a685}',
-        '\u{a684}'), ('\u{a686}', '\u{a687}'), ('\u{a687}', '\u{a686}'),
-        ('\u{a688}', '\u{a689}'), ('\u{a689}', '\u{a688}'), ('\u{a68a}',
-        '\u{a68b}'), ('\u{a68b}', '\u{a68a}'), ('\u{a68c}', '\u{a68d}'),
-        ('\u{a68d}', '\u{a68c}'), ('\u{a68e}', '\u{a68f}'), ('\u{a68f}',
-        '\u{a68e}'), ('\u{a690}', '\u{a691}'), ('\u{a691}', '\u{a690}'),
-        ('\u{a692}', '\u{a693}'), ('\u{a693}', '\u{a692}'), ('\u{a694}',
-        '\u{a695}'), ('\u{a695}', '\u{a694}'), ('\u{a696}', '\u{a697}'),
-        ('\u{a697}', '\u{a696}'), ('\u{a698}', '\u{a699}'), ('\u{a699}',
-        '\u{a698}'), ('\u{a69a}', '\u{a69b}'), ('\u{a69b}', '\u{a69a}'),
-        ('\u{a722}', '\u{a723}'), ('\u{a723}', '\u{a722}'), ('\u{a724}',
-        '\u{a725}'), ('\u{a725}', '\u{a724}'), ('\u{a726}', '\u{a727}'),
-        ('\u{a727}', '\u{a726}'), ('\u{a728}', '\u{a729}'), ('\u{a729}',
-        '\u{a728}'), ('\u{a72a}', '\u{a72b}'), ('\u{a72b}', '\u{a72a}'),
-        ('\u{a72c}', '\u{a72d}'), ('\u{a72d}', '\u{a72c}'), ('\u{a72e}',
-        '\u{a72f}'), ('\u{a72f}', '\u{a72e}'), ('\u{a732}', '\u{a733}'),
-        ('\u{a733}', '\u{a732}'), ('\u{a734}', '\u{a735}'), ('\u{a735}',
-        '\u{a734}'), ('\u{a736}', '\u{a737}'), ('\u{a737}', '\u{a736}'),
-        ('\u{a738}', '\u{a739}'), ('\u{a739}', '\u{a738}'), ('\u{a73a}',
-        '\u{a73b}'), ('\u{a73b}', '\u{a73a}'), ('\u{a73c}', '\u{a73d}'),
-        ('\u{a73d}', '\u{a73c}'), ('\u{a73e}', '\u{a73f}'), ('\u{a73f}',
-        '\u{a73e}'), ('\u{a740}', '\u{a741}'), ('\u{a741}', '\u{a740}'),
-        ('\u{a742}', '\u{a743}'), ('\u{a743}', '\u{a742}'), ('\u{a744}',
-        '\u{a745}'), ('\u{a745}', '\u{a744}'), ('\u{a746}', '\u{a747}'),
-        ('\u{a747}', '\u{a746}'), ('\u{a748}', '\u{a749}'), ('\u{a749}',
-        '\u{a748}'), ('\u{a74a}', '\u{a74b}'), ('\u{a74b}', '\u{a74a}'),
-        ('\u{a74c}', '\u{a74d}'), ('\u{a74d}', '\u{a74c}'), ('\u{a74e}',
-        '\u{a74f}'), ('\u{a74f}', '\u{a74e}'), ('\u{a750}', '\u{a751}'),
-        ('\u{a751}', '\u{a750}'), ('\u{a752}', '\u{a753}'), ('\u{a753}',
-        '\u{a752}'), ('\u{a754}', '\u{a755}'), ('\u{a755}', '\u{a754}'),
-        ('\u{a756}', '\u{a757}'), ('\u{a757}', '\u{a756}'), ('\u{a758}',
-        '\u{a759}'), ('\u{a759}', '\u{a758}'), ('\u{a75a}', '\u{a75b}'),
-        ('\u{a75b}', '\u{a75a}'), ('\u{a75c}', '\u{a75d}'), ('\u{a75d}',
-        '\u{a75c}'), ('\u{a75e}', '\u{a75f}'), ('\u{a75f}', '\u{a75e}'),
-        ('\u{a760}', '\u{a761}'), ('\u{a761}', '\u{a760}'), ('\u{a762}',
-        '\u{a763}'), ('\u{a763}', '\u{a762}'), ('\u{a764}', '\u{a765}'),
-        ('\u{a765}', '\u{a764}'), ('\u{a766}', '\u{a767}'), ('\u{a767}',
-        '\u{a766}'), ('\u{a768}', '\u{a769}'), ('\u{a769}', '\u{a768}'),
-        ('\u{a76a}', '\u{a76b}'), ('\u{a76b}', '\u{a76a}'), ('\u{a76c}',
-        '\u{a76d}'), ('\u{a76d}', '\u{a76c}'), ('\u{a76e}', '\u{a76f}'),
-        ('\u{a76f}', '\u{a76e}'), ('\u{a779}', '\u{a77a}'), ('\u{a77a}',
-        '\u{a779}'), ('\u{a77b}', '\u{a77c}'), ('\u{a77c}', '\u{a77b}'),
-        ('\u{a77d}', '\u{1d79}'), ('\u{a77e}', '\u{a77f}'), ('\u{a77f}',
-        '\u{a77e}'), ('\u{a780}', '\u{a781}'), ('\u{a781}', '\u{a780}'),
-        ('\u{a782}', '\u{a783}'), ('\u{a783}', '\u{a782}'), ('\u{a784}',
-        '\u{a785}'), ('\u{a785}', '\u{a784}'), ('\u{a786}', '\u{a787}'),
-        ('\u{a787}', '\u{a786}'), ('\u{a78b}', '\u{a78c}'), ('\u{a78c}',
-        '\u{a78b}'), ('\u{a78d}', '\u{265}'), ('\u{a790}', '\u{a791}'),
-        ('\u{a791}', '\u{a790}'), ('\u{a792}', '\u{a793}'), ('\u{a793}',
-        '\u{a792}'), ('\u{a796}', '\u{a797}'), ('\u{a797}', '\u{a796}'),
-        ('\u{a798}', '\u{a799}'), ('\u{a799}', '\u{a798}'), ('\u{a79a}',
-        '\u{a79b}'), ('\u{a79b}', '\u{a79a}'), ('\u{a79c}', '\u{a79d}'),
-        ('\u{a79d}', '\u{a79c}'), ('\u{a79e}', '\u{a79f}'), ('\u{a79f}',
-        '\u{a79e}'), ('\u{a7a0}', '\u{a7a1}'), ('\u{a7a1}', '\u{a7a0}'),
-        ('\u{a7a2}', '\u{a7a3}'), ('\u{a7a3}', '\u{a7a2}'), ('\u{a7a4}',
-        '\u{a7a5}'), ('\u{a7a5}', '\u{a7a4}'), ('\u{a7a6}', '\u{a7a7}'),
-        ('\u{a7a7}', '\u{a7a6}'), ('\u{a7a8}', '\u{a7a9}'), ('\u{a7a9}',
-        '\u{a7a8}'), ('\u{a7aa}', '\u{266}'), ('\u{a7ab}', '\u{25c}'),
-        ('\u{a7ac}', '\u{261}'), ('\u{a7ad}', '\u{26c}'), ('\u{a7b0}',
-        '\u{29e}'), ('\u{a7b1}', '\u{287}'), ('\u{a7b2}', '\u{29d}'),
-        ('\u{a7b3}', '\u{ab53}'), ('\u{a7b4}', '\u{a7b5}'), ('\u{a7b5}',
-        '\u{a7b4}'), ('\u{a7b6}', '\u{a7b7}'), ('\u{a7b7}', '\u{a7b6}'),
-        ('\u{ab53}', '\u{a7b3}'), ('\u{ab70}', '\u{13a0}'), ('\u{ab71}',
-        '\u{13a1}'), ('\u{ab72}', '\u{13a2}'), ('\u{ab73}', '\u{13a3}'),
-        ('\u{ab74}', '\u{13a4}'), ('\u{ab75}', '\u{13a5}'), ('\u{ab76}',
-        '\u{13a6}'), ('\u{ab77}', '\u{13a7}'), ('\u{ab78}', '\u{13a8}'),
-        ('\u{ab79}', '\u{13a9}'), ('\u{ab7a}', '\u{13aa}'), ('\u{ab7b}',
-        '\u{13ab}'), ('\u{ab7c}', '\u{13ac}'), ('\u{ab7d}', '\u{13ad}'),
-        ('\u{ab7e}', '\u{13ae}'), ('\u{ab7f}', '\u{13af}'), ('\u{ab80}',
-        '\u{13b0}'), ('\u{ab81}', '\u{13b1}'), ('\u{ab82}', '\u{13b2}'),
-        ('\u{ab83}', '\u{13b3}'), ('\u{ab84}', '\u{13b4}'), ('\u{ab85}',
-        '\u{13b5}'), ('\u{ab86}', '\u{13b6}'), ('\u{ab87}', '\u{13b7}'),
-        ('\u{ab88}', '\u{13b8}'), ('\u{ab89}', '\u{13b9}'), ('\u{ab8a}',
-        '\u{13ba}'), ('\u{ab8b}', '\u{13bb}'), ('\u{ab8c}', '\u{13bc}'),
-        ('\u{ab8d}', '\u{13bd}'), ('\u{ab8e}', '\u{13be}'), ('\u{ab8f}',
-        '\u{13bf}'), ('\u{ab90}', '\u{13c0}'), ('\u{ab91}', '\u{13c1}'),
-        ('\u{ab92}', '\u{13c2}'), ('\u{ab93}', '\u{13c3}'), ('\u{ab94}',
-        '\u{13c4}'), ('\u{ab95}', '\u{13c5}'), ('\u{ab96}', '\u{13c6}'),
-        ('\u{ab97}', '\u{13c7}'), ('\u{ab98}', '\u{13c8}'), ('\u{ab99}',
-        '\u{13c9}'), ('\u{ab9a}', '\u{13ca}'), ('\u{ab9b}', '\u{13cb}'),
-        ('\u{ab9c}', '\u{13cc}'), ('\u{ab9d}', '\u{13cd}'), ('\u{ab9e}',
-        '\u{13ce}'), ('\u{ab9f}', '\u{13cf}'), ('\u{aba0}', '\u{13d0}'),
-        ('\u{aba1}', '\u{13d1}'), ('\u{aba2}', '\u{13d2}'), ('\u{aba3}',
-        '\u{13d3}'), ('\u{aba4}', '\u{13d4}'), ('\u{aba5}', '\u{13d5}'),
-        ('\u{aba6}', '\u{13d6}'), ('\u{aba7}', '\u{13d7}'), ('\u{aba8}',
-        '\u{13d8}'), ('\u{aba9}', '\u{13d9}'), ('\u{abaa}', '\u{13da}'),
-        ('\u{abab}', '\u{13db}'), ('\u{abac}', '\u{13dc}'), ('\u{abad}',
-        '\u{13dd}'), ('\u{abae}', '\u{13de}'), ('\u{abaf}', '\u{13df}'),
-        ('\u{abb0}', '\u{13e0}'), ('\u{abb1}', '\u{13e1}'), ('\u{abb2}',
-        '\u{13e2}'), ('\u{abb3}', '\u{13e3}'), ('\u{abb4}', '\u{13e4}'),
-        ('\u{abb5}', '\u{13e5}'), ('\u{abb6}', '\u{13e6}'), ('\u{abb7}',
-        '\u{13e7}'), ('\u{abb8}', '\u{13e8}'), ('\u{abb9}', '\u{13e9}'),
-        ('\u{abba}', '\u{13ea}'), ('\u{abbb}', '\u{13eb}'), ('\u{abbc}',
-        '\u{13ec}'), ('\u{abbd}', '\u{13ed}'), ('\u{abbe}', '\u{13ee}'),
-        ('\u{abbf}', '\u{13ef}'), ('\u{ff21}', '\u{ff41}'), ('\u{ff22}',
-        '\u{ff42}'), ('\u{ff23}', '\u{ff43}'), ('\u{ff24}', '\u{ff44}'),
-        ('\u{ff25}', '\u{ff45}'), ('\u{ff26}', '\u{ff46}'), ('\u{ff27}',
-        '\u{ff47}'), ('\u{ff28}', '\u{ff48}'), ('\u{ff29}', '\u{ff49}'),
-        ('\u{ff2a}', '\u{ff4a}'), ('\u{ff2b}', '\u{ff4b}'), ('\u{ff2c}',
-        '\u{ff4c}'), ('\u{ff2d}', '\u{ff4d}'), ('\u{ff2e}', '\u{ff4e}'),
-        ('\u{ff2f}', '\u{ff4f}'), ('\u{ff30}', '\u{ff50}'), ('\u{ff31}',
-        '\u{ff51}'), ('\u{ff32}', '\u{ff52}'), ('\u{ff33}', '\u{ff53}'),
-        ('\u{ff34}', '\u{ff54}'), ('\u{ff35}', '\u{ff55}'), ('\u{ff36}',
-        '\u{ff56}'), ('\u{ff37}', '\u{ff57}'), ('\u{ff38}', '\u{ff58}'),
-        ('\u{ff39}', '\u{ff59}'), ('\u{ff3a}', '\u{ff5a}'), ('\u{ff41}',
-        '\u{ff21}'), ('\u{ff42}', '\u{ff22}'), ('\u{ff43}', '\u{ff23}'),
-        ('\u{ff44}', '\u{ff24}'), ('\u{ff45}', '\u{ff25}'), ('\u{ff46}',
-        '\u{ff26}'), ('\u{ff47}', '\u{ff27}'), ('\u{ff48}', '\u{ff28}'),
-        ('\u{ff49}', '\u{ff29}'), ('\u{ff4a}', '\u{ff2a}'), ('\u{ff4b}',
-        '\u{ff2b}'), ('\u{ff4c}', '\u{ff2c}'), ('\u{ff4d}', '\u{ff2d}'),
-        ('\u{ff4e}', '\u{ff2e}'), ('\u{ff4f}', '\u{ff2f}'), ('\u{ff50}',
-        '\u{ff30}'), ('\u{ff51}', '\u{ff31}'), ('\u{ff52}', '\u{ff32}'),
-        ('\u{ff53}', '\u{ff33}'), ('\u{ff54}', '\u{ff34}'), ('\u{ff55}',
-        '\u{ff35}'), ('\u{ff56}', '\u{ff36}'), ('\u{ff57}', '\u{ff37}'),
-        ('\u{ff58}', '\u{ff38}'), ('\u{ff59}', '\u{ff39}'), ('\u{ff5a}',
-        '\u{ff3a}'), ('\u{10400}', '\u{10428}'), ('\u{10401}', '\u{10429}'),
-        ('\u{10402}', '\u{1042a}'), ('\u{10403}', '\u{1042b}'), ('\u{10404}',
-        '\u{1042c}'), ('\u{10405}', '\u{1042d}'), ('\u{10406}', '\u{1042e}'),
-        ('\u{10407}', '\u{1042f}'), ('\u{10408}', '\u{10430}'), ('\u{10409}',
-        '\u{10431}'), ('\u{1040a}', '\u{10432}'), ('\u{1040b}', '\u{10433}'),
-        ('\u{1040c}', '\u{10434}'), ('\u{1040d}', '\u{10435}'), ('\u{1040e}',
-        '\u{10436}'), ('\u{1040f}', '\u{10437}'), ('\u{10410}', '\u{10438}'),
-        ('\u{10411}', '\u{10439}'), ('\u{10412}', '\u{1043a}'), ('\u{10413}',
-        '\u{1043b}'), ('\u{10414}', '\u{1043c}'), ('\u{10415}', '\u{1043d}'),
-        ('\u{10416}', '\u{1043e}'), ('\u{10417}', '\u{1043f}'), ('\u{10418}',
-        '\u{10440}'), ('\u{10419}', '\u{10441}'), ('\u{1041a}', '\u{10442}'),
-        ('\u{1041b}', '\u{10443}'), ('\u{1041c}', '\u{10444}'), ('\u{1041d}',
-        '\u{10445}'), ('\u{1041e}', '\u{10446}'), ('\u{1041f}', '\u{10447}'),
-        ('\u{10420}', '\u{10448}'), ('\u{10421}', '\u{10449}'), ('\u{10422}',
-        '\u{1044a}'), ('\u{10423}', '\u{1044b}'), ('\u{10424}', '\u{1044c}'),
-        ('\u{10425}', '\u{1044d}'), ('\u{10426}', '\u{1044e}'), ('\u{10427}',
-        '\u{1044f}'), ('\u{10428}', '\u{10400}'), ('\u{10429}', '\u{10401}'),
-        ('\u{1042a}', '\u{10402}'), ('\u{1042b}', '\u{10403}'), ('\u{1042c}',
-        '\u{10404}'), ('\u{1042d}', '\u{10405}'), ('\u{1042e}', '\u{10406}'),
-        ('\u{1042f}', '\u{10407}'), ('\u{10430}', '\u{10408}'), ('\u{10431}',
-        '\u{10409}'), ('\u{10432}', '\u{1040a}'), ('\u{10433}', '\u{1040b}'),
-        ('\u{10434}', '\u{1040c}'), ('\u{10435}', '\u{1040d}'), ('\u{10436}',
-        '\u{1040e}'), ('\u{10437}', '\u{1040f}'), ('\u{10438}', '\u{10410}'),
-        ('\u{10439}', '\u{10411}'), ('\u{1043a}', '\u{10412}'), ('\u{1043b}',
-        '\u{10413}'), ('\u{1043c}', '\u{10414}'), ('\u{1043d}', '\u{10415}'),
-        ('\u{1043e}', '\u{10416}'), ('\u{1043f}', '\u{10417}'), ('\u{10440}',
-        '\u{10418}'), ('\u{10441}', '\u{10419}'), ('\u{10442}', '\u{1041a}'),
-        ('\u{10443}', '\u{1041b}'), ('\u{10444}', '\u{1041c}'), ('\u{10445}',
-        '\u{1041d}'), ('\u{10446}', '\u{1041e}'), ('\u{10447}', '\u{1041f}'),
-        ('\u{10448}', '\u{10420}'), ('\u{10449}', '\u{10421}'), ('\u{1044a}',
-        '\u{10422}'), ('\u{1044b}', '\u{10423}'), ('\u{1044c}', '\u{10424}'),
-        ('\u{1044d}', '\u{10425}'), ('\u{1044e}', '\u{10426}'), ('\u{1044f}',
-        '\u{10427}'), ('\u{10c80}', '\u{10cc0}'), ('\u{10c81}', '\u{10cc1}'),
-        ('\u{10c82}', '\u{10cc2}'), ('\u{10c83}', '\u{10cc3}'), ('\u{10c84}',
-        '\u{10cc4}'), ('\u{10c85}', '\u{10cc5}'), ('\u{10c86}', '\u{10cc6}'),
-        ('\u{10c87}', '\u{10cc7}'), ('\u{10c88}', '\u{10cc8}'), ('\u{10c89}',
-        '\u{10cc9}'), ('\u{10c8a}', '\u{10cca}'), ('\u{10c8b}', '\u{10ccb}'),
-        ('\u{10c8c}', '\u{10ccc}'), ('\u{10c8d}', '\u{10ccd}'), ('\u{10c8e}',
-        '\u{10cce}'), ('\u{10c8f}', '\u{10ccf}'), ('\u{10c90}', '\u{10cd0}'),
-        ('\u{10c91}', '\u{10cd1}'), ('\u{10c92}', '\u{10cd2}'), ('\u{10c93}',
-        '\u{10cd3}'), ('\u{10c94}', '\u{10cd4}'), ('\u{10c95}', '\u{10cd5}'),
-        ('\u{10c96}', '\u{10cd6}'), ('\u{10c97}', '\u{10cd7}'), ('\u{10c98}',
-        '\u{10cd8}'), ('\u{10c99}', '\u{10cd9}'), ('\u{10c9a}', '\u{10cda}'),
-        ('\u{10c9b}', '\u{10cdb}'), ('\u{10c9c}', '\u{10cdc}'), ('\u{10c9d}',
-        '\u{10cdd}'), ('\u{10c9e}', '\u{10cde}'), ('\u{10c9f}', '\u{10cdf}'),
-        ('\u{10ca0}', '\u{10ce0}'), ('\u{10ca1}', '\u{10ce1}'), ('\u{10ca2}',
-        '\u{10ce2}'), ('\u{10ca3}', '\u{10ce3}'), ('\u{10ca4}', '\u{10ce4}'),
-        ('\u{10ca5}', '\u{10ce5}'), ('\u{10ca6}', '\u{10ce6}'), ('\u{10ca7}',
-        '\u{10ce7}'), ('\u{10ca8}', '\u{10ce8}'), ('\u{10ca9}', '\u{10ce9}'),
-        ('\u{10caa}', '\u{10cea}'), ('\u{10cab}', '\u{10ceb}'), ('\u{10cac}',
-        '\u{10cec}'), ('\u{10cad}', '\u{10ced}'), ('\u{10cae}', '\u{10cee}'),
-        ('\u{10caf}', '\u{10cef}'), ('\u{10cb0}', '\u{10cf0}'), ('\u{10cb1}',
-        '\u{10cf1}'), ('\u{10cb2}', '\u{10cf2}'), ('\u{10cc0}', '\u{10c80}'),
-        ('\u{10cc1}', '\u{10c81}'), ('\u{10cc2}', '\u{10c82}'), ('\u{10cc3}',
-        '\u{10c83}'), ('\u{10cc4}', '\u{10c84}'), ('\u{10cc5}', '\u{10c85}'),
-        ('\u{10cc6}', '\u{10c86}'), ('\u{10cc7}', '\u{10c87}'), ('\u{10cc8}',
-        '\u{10c88}'), ('\u{10cc9}', '\u{10c89}'), ('\u{10cca}', '\u{10c8a}'),
-        ('\u{10ccb}', '\u{10c8b}'), ('\u{10ccc}', '\u{10c8c}'), ('\u{10ccd}',
-        '\u{10c8d}'), ('\u{10cce}', '\u{10c8e}'), ('\u{10ccf}', '\u{10c8f}'),
-        ('\u{10cd0}', '\u{10c90}'), ('\u{10cd1}', '\u{10c91}'), ('\u{10cd2}',
-        '\u{10c92}'), ('\u{10cd3}', '\u{10c93}'), ('\u{10cd4}', '\u{10c94}'),
-        ('\u{10cd5}', '\u{10c95}'), ('\u{10cd6}', '\u{10c96}'), ('\u{10cd7}',
-        '\u{10c97}'), ('\u{10cd8}', '\u{10c98}'), ('\u{10cd9}', '\u{10c99}'),
-        ('\u{10cda}', '\u{10c9a}'), ('\u{10cdb}', '\u{10c9b}'), ('\u{10cdc}',
-        '\u{10c9c}'), ('\u{10cdd}', '\u{10c9d}'), ('\u{10cde}', '\u{10c9e}'),
-        ('\u{10cdf}', '\u{10c9f}'), ('\u{10ce0}', '\u{10ca0}'), ('\u{10ce1}',
-        '\u{10ca1}'), ('\u{10ce2}', '\u{10ca2}'), ('\u{10ce3}', '\u{10ca3}'),
-        ('\u{10ce4}', '\u{10ca4}'), ('\u{10ce5}', '\u{10ca5}'), ('\u{10ce6}',
-        '\u{10ca6}'), ('\u{10ce7}', '\u{10ca7}'), ('\u{10ce8}', '\u{10ca8}'),
-        ('\u{10ce9}', '\u{10ca9}'), ('\u{10cea}', '\u{10caa}'), ('\u{10ceb}',
-        '\u{10cab}'), ('\u{10cec}', '\u{10cac}'), ('\u{10ced}', '\u{10cad}'),
-        ('\u{10cee}', '\u{10cae}'), ('\u{10cef}', '\u{10caf}'), ('\u{10cf0}',
-        '\u{10cb0}'), ('\u{10cf1}', '\u{10cb1}'), ('\u{10cf2}', '\u{10cb2}'),
-        ('\u{118a0}', '\u{118c0}'), ('\u{118a1}', '\u{118c1}'), ('\u{118a2}',
-        '\u{118c2}'), ('\u{118a3}', '\u{118c3}'), ('\u{118a4}', '\u{118c4}'),
-        ('\u{118a5}', '\u{118c5}'), ('\u{118a6}', '\u{118c6}'), ('\u{118a7}',
-        '\u{118c7}'), ('\u{118a8}', '\u{118c8}'), ('\u{118a9}', '\u{118c9}'),
-        ('\u{118aa}', '\u{118ca}'), ('\u{118ab}', '\u{118cb}'), ('\u{118ac}',
-        '\u{118cc}'), ('\u{118ad}', '\u{118cd}'), ('\u{118ae}', '\u{118ce}'),
-        ('\u{118af}', '\u{118cf}'), ('\u{118b0}', '\u{118d0}'), ('\u{118b1}',
-        '\u{118d1}'), ('\u{118b2}', '\u{118d2}'), ('\u{118b3}', '\u{118d3}'),
-        ('\u{118b4}', '\u{118d4}'), ('\u{118b5}', '\u{118d5}'), ('\u{118b6}',
-        '\u{118d6}'), ('\u{118b7}', '\u{118d7}'), ('\u{118b8}', '\u{118d8}'),
-        ('\u{118b9}', '\u{118d9}'), ('\u{118ba}', '\u{118da}'), ('\u{118bb}',
-        '\u{118db}'), ('\u{118bc}', '\u{118dc}'), ('\u{118bd}', '\u{118dd}'),
-        ('\u{118be}', '\u{118de}'), ('\u{118bf}', '\u{118df}'), ('\u{118c0}',
-        '\u{118a0}'), ('\u{118c1}', '\u{118a1}'), ('\u{118c2}', '\u{118a2}'),
-        ('\u{118c3}', '\u{118a3}'), ('\u{118c4}', '\u{118a4}'), ('\u{118c5}',
-        '\u{118a5}'), ('\u{118c6}', '\u{118a6}'), ('\u{118c7}', '\u{118a7}'),
-        ('\u{118c8}', '\u{118a8}'), ('\u{118c9}', '\u{118a9}'), ('\u{118ca}',
-        '\u{118aa}'), ('\u{118cb}', '\u{118ab}'), ('\u{118cc}', '\u{118ac}'),
-        ('\u{118cd}', '\u{118ad}'), ('\u{118ce}', '\u{118ae}'), ('\u{118cf}',
-        '\u{118af}'), ('\u{118d0}', '\u{118b0}'), ('\u{118d1}', '\u{118b1}'),
-        ('\u{118d2}', '\u{118b2}'), ('\u{118d3}', '\u{118b3}'), ('\u{118d4}',
-        '\u{118b4}'), ('\u{118d5}', '\u{118b5}'), ('\u{118d6}', '\u{118b6}'),
-        ('\u{118d7}', '\u{118b7}'), ('\u{118d8}', '\u{118b8}'), ('\u{118d9}',
-        '\u{118b9}'), ('\u{118da}', '\u{118ba}'), ('\u{118db}', '\u{118bb}'),
-        ('\u{118dc}', '\u{118bc}'), ('\u{118dd}', '\u{118bd}'), ('\u{118de}',
-        '\u{118be}'), ('\u{118df}', '\u{118bf}')
-    ];
+    #[test]
+    fn simple_fold_empty() {
+        assert_eq!(Some('A'), simple_fold('?').unwrap_err());
+        assert_eq!(Some('A'), simple_fold('@').unwrap_err());
+        assert_eq!(Some('a'), simple_fold('[').unwrap_err());
+        assert_eq!(Some('Ⰰ'), simple_fold('☃').unwrap_err());
+    }
+
+    #[test]
+    fn simple_fold_max() {
+        assert_eq!(None, simple_fold('\u{10FFFE}').unwrap_err());
+        assert_eq!(None, simple_fold('\u{10FFFF}').unwrap_err());
+    }
 
+    #[test]
+    fn range_contains() {
+        assert!(contains_simple_case_mapping('A', 'A'));
+        assert!(contains_simple_case_mapping('Z', 'Z'));
+        assert!(contains_simple_case_mapping('A', 'Z'));
+        assert!(contains_simple_case_mapping('@', 'A'));
+        assert!(contains_simple_case_mapping('Z', '['));
+        assert!(contains_simple_case_mapping('☃', 'Ⰰ'));
+
+        assert!(!contains_simple_case_mapping('[', '['));
+        assert!(!contains_simple_case_mapping('[', '`'));
+
+        assert!(!contains_simple_case_mapping('☃', '☃'));
+    }
+
+    #[test]
+    fn regression_466() {
+        use super::{CanonicalClassQuery, ClassQuery};
+
+        let q = ClassQuery::OneLetter('C');
+        assert_eq!(
+            q.canonicalize().unwrap(),
+            CanonicalClassQuery::GeneralCategory("Other"));
+    }
 }
-
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/age.rs
@@ -0,0 +1,424 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate age tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("V10_0", V10_0), ("V1_1", V1_1), ("V2_0", V2_0), ("V2_1", V2_1),
+  ("V3_0", V3_0), ("V3_1", V3_1), ("V3_2", V3_2), ("V4_0", V4_0),
+  ("V4_1", V4_1), ("V5_0", V5_0), ("V5_1", V5_1), ("V5_2", V5_2),
+  ("V6_0", V6_0), ("V6_1", V6_1), ("V6_2", V6_2), ("V6_3", V6_3),
+  ("V7_0", V7_0), ("V8_0", V8_0), ("V9_0", V9_0),
+];
+
+pub const V10_0: &'static [(char, char)] = &[
+  ('ࡠ', 'ࡪ'), ('ৼ', '৽'), ('ૺ', '૿'), ('ഀ', 'ഀ'),
+  ('഻', '഼'), ('᳷', '᳷'), ('᷶', '᷹'), ('₿', '₿'),
+  ('⏿', '⏿'), ('⯒', '⯒'), ('⹅', '⹉'), ('ㄮ', 'ㄮ'),
+  ('鿖', '鿪'), ('𐌭', '𐌯'), ('𑨀', '𑩇'), ('𑩐', '𑪃'),
+  ('𑪆', '𑪜'), ('𑪞', '𑪢'), ('𑴀', '𑴆'), ('𑴈', '𑴉'),
+  ('𑴋', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵇'),
+  ('𑵐', '𑵙'), ('𖿡', '𖿡'), ('𛀂', '𛄞'), ('𛅰', '𛋻'),
+  ('🉠', '🉥'), ('🛓', '🛔'), ('🛷', '🛸'), ('🤀', '🤋'),
+  ('🤟', '🤟'), ('🤨', '🤯'), ('🤱', '🤲'), ('🥌', '🥌'),
+  ('🥟', '🥫'), ('🦒', '🦗'), ('🧐', '🧦'), ('𬺰', '𮯠'),
+];
+
+pub const V1_1: &'static [(char, char)] = &[
+  ('\u{0}', 'ǵ'), ('Ǻ', 'ȗ'), ('ɐ', 'ʨ'), ('ʰ', '˞'), ('ˠ', '˩'),
+  ('̀', 'ͅ'), ('͠', '͡'), ('ʹ', '͵'), ('ͺ', 'ͺ'), (';', ';'),
+  ('΄', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ώ'), ('ϐ', 'ϖ'),
+  ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'), ('Ϡ', 'Ϡ'), ('Ϣ', 'ϳ'),
+  ('Ё', 'Ќ'), ('Ў', 'я'), ('ё', 'ќ'), ('ў', '҆'), ('Ґ', 'ӄ'),
+  ('Ӈ', 'ӈ'), ('Ӌ', 'ӌ'), ('Ӑ', 'ӫ'), ('Ӯ', 'ӵ'), ('Ӹ', 'ӹ'),
+  ('Ա', 'Ֆ'), ('ՙ', '՟'), ('ա', 'և'), ('։', '։'), ('ְ', 'ֹ'),
+  ('ֻ', '׃'), ('א', 'ת'), ('װ', '״'), ('،', '،'), ('؛', '؛'),
+  ('؟', '؟'), ('ء', 'غ'), ('ـ', 'ْ'), ('٠', '٭'), ('ٰ', 'ڷ'),
+  ('ں', 'ھ'), ('ۀ', 'ێ'), ('ې', 'ۭ'), ('۰', '۹'), ('ँ', 'ः'),
+  ('अ', 'ह'), ('़', '्'), ('ॐ', '॔'), ('क़', '॰'),
+  ('ঁ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'), ('ও', 'ন'),
+  ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'), ('়', '়'),
+  ('া', 'ৄ'), ('ে', 'ৈ'), ('ো', '্'), ('ৗ', 'ৗ'),
+  ('ড়', 'ঢ়'), ('য়', 'ৣ'), ('০', '৺'), ('ਂ', 'ਂ'),
+  ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'),
+  ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('਼', '਼'),
+  ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'), ('ਖ਼', 'ੜ'),
+  ('ਫ਼', 'ਫ਼'), ('੦', 'ੴ'), ('ઁ', 'ઃ'), ('અ', 'ઋ'),
+  ('ઍ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'),
+  ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'), ('ે', 'ૉ'),
+  ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૠ'), ('૦', '૯'),
+  ('ଁ', 'ଃ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'), ('ଓ', 'ନ'),
+  ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଶ', 'ହ'), ('଼', 'ୃ'),
+  ('େ', 'ୈ'), ('ୋ', '୍'), ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'),
+  ('ୟ', 'ୡ'), ('୦', '୰'), ('ஂ', 'ஃ'), ('அ', 'ஊ'),
+  ('எ', 'ஐ'), ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'),
+  ('ஞ', 'ட'), ('ண', 'த'), ('ந', 'ப'), ('ம', 'வ'),
+  ('ஷ', 'ஹ'), ('ா', 'ூ'), ('ெ', 'ை'), ('ொ', '்'),
+  ('ௗ', 'ௗ'), ('௧', '௲'), ('ఁ', 'ః'), ('అ', 'ఌ'),
+  ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'ళ'), ('వ', 'హ'),
+  ('ా', 'ౄ'), ('ె', 'ై'), ('ొ', '్'), ('ౕ', 'ౖ'),
+  ('ౠ', 'ౡ'), ('౦', '౯'), ('ಂ', 'ಃ'), ('ಅ', 'ಌ'),
+  ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
+  ('ಾ', 'ೄ'), ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'),
+  ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), ('೦', '೯'), ('ം', 'ഃ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ന'), ('പ', 'ഹ'),
+  ('ാ', 'ൃ'), ('െ', 'ൈ'), ('ൊ', '്'), ('ൗ', 'ൗ'),
+  ('ൠ', 'ൡ'), ('൦', '൯'), ('ก', 'ฺ'), ('฿', '๛'),
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໝ'), ('Ⴀ', 'Ⴥ'), ('ა', 'ჶ'),
+  ('჻', '჻'), ('ᄀ', 'ᅙ'), ('ᅟ', 'ᆢ'), ('ᆨ', 'ᇹ'),
+  ('Ḁ', 'ẚ'), ('Ạ', 'ỹ'), ('ἀ', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ῄ'), ('ῆ', 'ΐ'), ('ῖ', 'Ί'), ('῝', '`'),
+  ('ῲ', 'ῴ'), ('ῶ', '῾'), ('\u{2000}', '\u{202e}'), ('‰', '⁆'),
+  ('\u{206a}', '⁰'), ('⁴', '₎'), ('₠', '₪'), ('⃐', '⃡'),
+  ('℀', 'ℸ'), ('⅓', 'ↂ'), ('←', '⇪'), ('∀', '⋱'),
+  ('⌀', '⌀'), ('⌂', '⍺'), ('␀', '␤'), ('⑀', '⑊'),
+  ('①', '⓪'), ('─', '▕'), ('■', '◯'), ('☀', '☓'),
+  ('☚', '♯'), ('✁', '✄'), ('✆', '✉'), ('✌', '✧'),
+  ('✩', '❋'), ('❍', '❍'), ('❏', '❒'), ('❖', '❖'),
+  ('❘', '❞'), ('❡', '❧'), ('❶', '➔'), ('➘', '➯'),
+  ('➱', '➾'), ('\u{3000}', '〷'), ('〿', '〿'), ('ぁ', 'ゔ'),
+  ('゙', 'ゞ'), ('ァ', 'ヾ'), ('ㄅ', 'ㄬ'), ('ㄱ', 'ㆎ'),
+  ('㆐', '㆟'), ('㈀', '㈜'), ('㈠', '㉃'), ('㉠', '㉻'),
+  ('㉿', '㊰'), ('㋀', '㋋'), ('㋐', '㋾'), ('㌀', '㍶'),
+  ('㍻', '㏝'), ('㏠', '㏾'), ('一', '龥'), ('\u{e000}', '鶴'),
+  ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('ﬞ', 'זּ'), ('טּ', 'לּ'),
+  ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'),
+  ('ﯓ', '﴿'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'),
+  ('︠', '︣'), ('︰', '﹄'), ('﹉', '﹒'), ('﹔', '﹦'),
+  ('﹨', '﹫'), ('ﹰ', 'ﹲ'), ('ﹴ', 'ﹴ'), ('ﹶ', 'ﻼ'),
+  ('\u{feff}', '\u{feff}'), ('！', '～'), ('｡', 'ﾾ'), ('ￂ', 'ￇ'),
+  ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('￠', '￦'),
+  ('￨', '￮'), ('�', '\u{ffff}'),
+];
+
+pub const V2_0: &'static [(char, char)] = &[
+  ('֑', '֡'), ('֣', '֯'), ('ׄ', 'ׄ'), ('ༀ', 'ཇ'), ('ཉ', 'ཀྵ'),
+  ('ཱ', 'ྋ'), ('ྐ', 'ྕ'), ('ྗ', 'ྗ'), ('ྙ', 'ྭ'),
+  ('ྱ', 'ྷ'), ('ྐྵ', 'ྐྵ'), ('ẛ', 'ẛ'), ('₫', '₫'),
+  ('가', '힣'), ('\u{1fffe}', '\u{1ffff}'), ('\u{2fffe}', '\u{2ffff}'),
+  ('\u{3fffe}', '\u{3ffff}'), ('\u{4fffe}', '\u{4ffff}'),
+  ('\u{5fffe}', '\u{5ffff}'), ('\u{6fffe}', '\u{6ffff}'),
+  ('\u{7fffe}', '\u{7ffff}'), ('\u{8fffe}', '\u{8ffff}'),
+  ('\u{9fffe}', '\u{9ffff}'), ('\u{afffe}', '\u{affff}'),
+  ('\u{bfffe}', '\u{bffff}'), ('\u{cfffe}', '\u{cffff}'),
+  ('\u{dfffe}', '\u{dffff}'), ('\u{efffe}', '\u{10ffff}'),
+];
+
+pub const V2_1: &'static [(char, char)] = &[
+  ('€', '€'), ('', ''),
+];
+
+pub const V3_0: &'static [(char, char)] = &[
+  ('Ƕ', 'ǹ'), ('Ș', 'ȟ'), ('Ȣ', 'ȳ'), ('ʩ', 'ʭ'), ('˟', '˟'),
+  ('˪', 'ˮ'), ('͆', '͎'), ('͢', '͢'), ('ϗ', 'ϗ'), ('ϛ', 'ϛ'),
+  ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'), ('Ѐ', 'Ѐ'), ('Ѝ', 'Ѝ'),
+  ('ѐ', 'ѐ'), ('ѝ', 'ѝ'), ('҈', '҉'), ('Ҍ', 'ҏ'), ('Ӭ', 'ӭ'),
+  ('֊', '֊'), ('ٓ', 'ٕ'), ('ڸ', 'ڹ'), ('ڿ', 'ڿ'), ('ۏ', 'ۏ'),
+  ('ۺ', '۾'), ('܀', '܍'), ('\u{70f}', 'ܬ'), ('ܰ', '݊'), ('ހ', 'ް'),
+  ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'), ('ා', 'ු'),
+  ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('ෲ', '෴'), ('ཪ', 'ཪ'),
+  ('ྖ', 'ྖ'), ('ྮ', 'ྰ'), ('ྸ', 'ྸ'), ('ྺ', 'ྼ'),
+  ('྾', '࿌'), ('࿏', '࿏'), ('က', 'အ'), ('ဣ', 'ဧ'),
+  ('ဩ', 'ဪ'), ('ာ', 'ဲ'), ('ံ', '္'), ('၀', 'ၙ'),
+  ('ሀ', 'ሆ'), ('ለ', 'ቆ'), ('ቈ', 'ቈ'), ('ቊ', 'ቍ'),
+  ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኆ'),
+  ('ኈ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኮ'), ('ኰ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዎ'), ('ዐ', 'ዖ'), ('ዘ', 'ዮ'), ('ደ', 'ጎ'),
+  ('ጐ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ጞ'), ('ጠ', 'ፆ'),
+  ('ፈ', 'ፚ'), ('፡', '፼'), ('Ꭰ', 'Ᏼ'), ('ᐁ', 'ᙶ'),
+  ('\u{1680}', '᚜'), ('ᚠ', 'ᛰ'), ('ក', 'ៜ'), ('០', '៩'),
+  ('᠀', '\u{180e}'), ('᠐', '᠙'), ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢩ'),
+  ('\u{202f}', '\u{202f}'), ('⁈', '⁍'), ('₭', '₯'), ('⃢', '⃣'),
+  ('ℹ', '℺'), ('Ↄ', 'Ↄ'), ('⇫', '⇳'), ('⌁', '⌁'),
+  ('⍻', '⍻'), ('⍽', '⎚'), ('␥', '␦'), ('◰', '◷'),
+  ('☙', '☙'), ('♰', '♱'), ('⠀', '⣿'), ('⺀', '⺙'),
+  ('⺛', '⻳'), ('⼀', '⿕'), ('⿰', '⿻'), ('〸', '〺'),
+  ('〾', '〾'), ('ㆠ', 'ㆷ'), ('㐀', '䶵'), ('ꀀ', 'ꒌ'),
+  ('꒐', '꒡'), ('꒤', '꒳'), ('꒵', '꓀'), ('꓂', '꓄'),
+  ('꓆', '꓆'), ('יִ', 'יִ'), ('\u{fff9}', '\u{fffb}'),
+];
+
+pub const V3_1: &'static [(char, char)] = &[
+  ('ϴ', 'ϵ'), ('\u{fdd0}', '\u{fdef}'), ('𐌀', '𐌞'), ('𐌠', '𐌣'),
+  ('𐌰', '𐍊'), ('𐐀', '𐐥'), ('𐐨', '𐑍'), ('𝀀', '𝃵'),
+  ('𝄀', '𝄦'), ('𝄪', '𝇝'), ('𝐀', '𝑔'), ('𝑖', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓀'), ('𝓂', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚣'), ('𝚨', '𝟉'), ('𝟎', '𝟿'),
+  ('𠀀', '𪛖'), ('丽', '𪘀'), ('\u{e0001}', '\u{e0001}'),
+  ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const V3_2: &'static [(char, char)] = &[
+  ('Ƞ', 'Ƞ'), ('͏', '͏'), ('ͣ', 'ͯ'), ('Ϙ', 'ϙ'), ('϶', '϶'),
+  ('Ҋ', 'ҋ'), ('Ӆ', 'ӆ'), ('Ӊ', 'ӊ'), ('Ӎ', 'ӎ'), ('Ԁ', 'ԏ'),
+  ('ٮ', 'ٯ'), ('ޱ', 'ޱ'), ('ჷ', 'ჸ'), ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'),
+  ('ᜠ', '᜶'), ('ᝀ', 'ᝓ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'),
+  ('ᝲ', 'ᝳ'), ('⁇', '⁇'), ('⁎', '⁒'), ('⁗', '⁗'),
+  ('\u{205f}', '\u{2063}'), ('ⁱ', 'ⁱ'), ('₰', '₱'), ('⃤', '⃪'),
+  ('ℽ', '⅋'), ('⇴', '⇿'), ('⋲', '⋿'), ('⍼', '⍼'),
+  ('⎛', '⏎'), ('⓫', '⓾'), ('▖', '▟'), ('◸', '◿'),
+  ('☖', '☗'), ('♲', '♽'), ('⚀', '⚉'), ('❨', '❵'),
+  ('⟐', '⟫'), ('⟰', '⟿'), ('⤀', '⫿'), ('〻', '〽'),
+  ('ゕ', 'ゖ'), ('ゟ', '゠'), ('ヿ', 'ヿ'), ('ㇰ', 'ㇿ'),
+  ('㉑', '㉟'), ('㊱', '㊿'), ('꒢', '꒣'), ('꒴', '꒴'),
+  ('꓁', '꓁'), ('꓅', '꓅'), ('侮', '頻'), ('﷼', '﷼'),
+  ('︀', '️'), ('﹅', '﹆'), ('ﹳ', 'ﹳ'), ('｟', '｠'),
+];
+
+pub const V4_0: &'static [(char, char)] = &[
+  ('ȡ', 'ȡ'), ('ȴ', 'ȶ'), ('ʮ', 'ʯ'), ('˯', '˿'), ('͐', '͗'),
+  ('͝', '͟'), ('Ϸ', 'ϻ'), ('\u{600}', '\u{603}'), ('؍', 'ؕ'),
+  ('ٖ', '٘'), ('ۮ', 'ۯ'), ('ۿ', 'ۿ'), ('ܭ', 'ܯ'), ('ݍ', 'ݏ'),
+  ('ऄ', 'ऄ'), ('ঽ', 'ঽ'), ('ਁ', 'ਁ'), ('ਃ', 'ਃ'),
+  ('ઌ', 'ઌ'), ('ૡ', 'ૣ'), ('૱', '૱'), ('ଵ', 'ଵ'),
+  ('ୱ', 'ୱ'), ('௳', '௺'), ('಼', 'ಽ'), ('៝', '៝'),
+  ('៰', '៹'), ('ᤀ', 'ᤜ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'),
+  ('᥀', '᥀'), ('᥄', 'ᥭ'), ('ᥰ', 'ᥴ'), ('᧠', '᧿'),
+  ('ᴀ', 'ᵫ'), ('⁓', '⁔'), ('℻', '℻'), ('⏏', '⏐'),
+  ('⓿', '⓿'), ('☔', '☕'), ('⚊', '⚑'), ('⚠', '⚡'),
+  ('⬀', '⬍'), ('㈝', '㈞'), ('㉐', '㉐'), ('㉼', '㉽'),
+  ('㋌', '㋏'), ('㍷', '㍺'), ('㏞', '㏟'), ('㏿', '㏿'),
+  ('䷀', '䷿'), ('﷽', '﷽'), ('﹇', '﹈'), ('𐀀', '𐀋'),
+  ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'),
+  ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐄀', '𐄂'), ('𐄇', '𐄳'),
+  ('𐄷', '𐄿'), ('𐎀', '𐎝'), ('𐎟', '𐎟'), ('𐐦', '𐐧'),
+  ('𐑎', '𐒝'), ('𐒠', '𐒩'), ('𐠀', '𐠅'), ('𐠈', '𐠈'),
+  ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐠿'),
+  ('𝌀', '𝍖'), ('𝓁', '𝓁'), ('󠄀', '󠇯'),
+];
+
+pub const V4_1: &'static [(char, char)] = &[
+  ('ȷ', 'Ɂ'), ('͘', '͜'), ('ϼ', 'Ͽ'), ('Ӷ', 'ӷ'), ('֢', '֢'),
+  ('ׅ', 'ׇ'), ('؋', '؋'), ('؞', '؞'), ('ٙ', 'ٞ'), ('ݐ', 'ݭ'),
+  ('ॽ', 'ॽ'), ('ৎ', 'ৎ'), ('ஶ', 'ஶ'), ('௦', '௦'),
+  ('࿐', '࿑'), ('ჹ', 'ჺ'), ('ჼ', 'ჼ'), ('ሇ', 'ሇ'),
+  ('ቇ', 'ቇ'), ('ኇ', 'ኇ'), ('ኯ', 'ኯ'), ('ዏ', 'ዏ'),
+  ('ዯ', 'ዯ'), ('ጏ', 'ጏ'), ('ጟ', 'ጟ'), ('ፇ', 'ፇ'),
+  ('፟', '፠'), ('ᎀ', '᎙'), ('ᦀ', 'ᦩ'), ('ᦰ', 'ᧉ'),
+  ('᧐', '᧙'), ('᧞', '᧟'), ('ᨀ', 'ᨛ'), ('᨞', '᨟'),
+  ('ᵬ', '᷃'), ('⁕', '⁖'), ('⁘', '⁞'), ('ₐ', 'ₔ'),
+  ('₲', '₵'), ('⃫', '⃫'), ('ℼ', 'ℼ'), ('⅌', '⅌'),
+  ('⏑', '⏛'), ('☘', '☘'), ('♾', '♿'), ('⚒', '⚜'),
+  ('⚢', '⚱'), ('⟀', '⟆'), ('⬎', '⬓'), ('Ⰰ', 'Ⱞ'),
+  ('ⰰ', 'ⱞ'), ('Ⲁ', '⳪'), ('⳹', 'ⴥ'), ('ⴰ', 'ⵥ'),
+  ('ⵯ', 'ⵯ'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'),
+  ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'),
+  ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('⸀', '⸗'), ('⸜', '⸝'),
+  ('㇀', '㇏'), ('㉾', '㉾'), ('龦', '龻'), ('꜀', '꜖'),
+  ('ꠀ', '꠫'), ('並', '龎'), ('︐', '︙'), ('𐅀', '𐆊'),
+  ('𐎠', '𐏃'), ('𐏈', '𐏕'), ('𐨀', '𐨃'), ('𐨅', '𐨆'),
+  ('𐨌', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐨸', '𐨺'),
+  ('𐨿', '𐩇'), ('𐩐', '𐩘'), ('𝈀', '𝉅'), ('𝚤', '𝚥'),
+];
+
+pub const V5_0: &'static [(char, char)] = &[
+  ('ɂ', 'ɏ'), ('ͻ', 'ͽ'), ('ӏ', 'ӏ'), ('Ӻ', 'ӿ'), ('Ԑ', 'ԓ'),
+  ('ֺ', 'ֺ'), ('߀', 'ߺ'), ('ॻ', 'ॼ'), ('ॾ', 'ॿ'), ('ೢ', 'ೣ'),
+  ('ೱ', 'ೲ'), ('ᬀ', 'ᭋ'), ('᭐', '᭼'), ('᷄', '᷊'),
+  ('᷾', '᷿'), ('⃬', '⃯'), ('⅍', 'ⅎ'), ('ↄ', 'ↄ'),
+  ('⏜', '⏧'), ('⚲', '⚲'), ('⟇', '⟊'), ('⬔', '⬚'),
+  ('⬠', '⬣'), ('Ⱡ', 'ⱬ'), ('ⱴ', 'ⱷ'), ('ꜗ', 'ꜚ'),
+  ('꜠', '꜡'), ('ꡀ', '꡷'), ('𐤀', '𐤙'), ('𐤟', '𐤟'),
+  ('𒀀', '𒍮'), ('𒐀', '𒑢'), ('𒑰', '𒑳'), ('𝍠', '𝍱'),
+  ('𝟊', '𝟋'),
+];
+
+pub const V5_1: &'static [(char, char)] = &[
+  ('Ͱ', 'ͳ'), ('Ͷ', 'ͷ'), ('Ϗ', 'Ϗ'), ('҇', '҇'), ('Ԕ', 'ԣ'),
+  ('؆', '؊'), ('ؖ', 'ؚ'), ('ػ', 'ؿ'), ('ݮ', 'ݿ'), ('ॱ', 'ॲ'),
+  ('ੑ', 'ੑ'), ('ੵ', 'ੵ'), ('ୄ', 'ୄ'), ('ୢ', 'ୣ'),
+  ('ௐ', 'ௐ'), ('ఽ', 'ఽ'), ('ౘ', 'ౙ'), ('ౢ', 'ౣ'),
+  ('౸', '౿'), ('ഽ', 'ഽ'), ('ൄ', 'ൄ'), ('ൢ', 'ൣ'),
+  ('൰', '൵'), ('൹', 'ൿ'), ('ཫ', 'ཬ'), ('࿎', '࿎'),
+  ('࿒', '࿔'), ('ဢ', 'ဢ'), ('ဨ', 'ဨ'), ('ါ', 'ါ'),
+  ('ဳ', 'ဵ'), ('်', 'ဿ'), ('ၚ', '႙'), ('႞', '႟'),
+  ('ᢪ', 'ᢪ'), ('ᮀ', '᮪'), ('ᮮ', '᮹'), ('ᰀ', '᰷'),
+  ('᰻', '᱉'), ('ᱍ', '᱿'), ('᷋', 'ᷦ'), ('ẜ', 'ẟ'),
+  ('Ỻ', 'ỿ'), ('\u{2064}', '\u{2064}'), ('⃰', '⃰'), ('⅏', '⅏'),
+  ('ↅ', 'ↈ'), ('⚝', '⚝'), ('⚳', '⚼'), ('⛀', '⛃'),
+  ('⟌', '⟌'), ('⟬', '⟯'), ('⬛', '⬟'), ('⬤', '⭌'),
+  ('⭐', '⭔'), ('Ɑ', 'Ɐ'), ('ⱱ', 'ⱳ'), ('ⱸ', 'ⱽ'),
+  ('ⷠ', 'ⷿ'), ('⸘', '⸛'), ('⸞', '⸰'), ('ㄭ', 'ㄭ'),
+  ('㇐', '㇣'), ('龼', '鿃'), ('ꔀ', 'ꘫ'), ('Ꙁ', 'ꙟ'),
+  ('Ꙣ', '꙳'), ('꙼', 'ꚗ'), ('ꜛ', 'ꜟ'), ('Ꜣ', 'ꞌ'),
+  ('ꟻ', 'ꟿ'), ('ꢀ', '꣄'), ('꣎', '꣙'), ('꤀', '꥓'),
+  ('꥟', '꥟'), ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'),
+  ('꩜', '꩟'), ('︤', '︦'), ('𐆐', '𐆛'), ('𐇐', '𐇽'),
+  ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐤠', '𐤹'), ('𐤿', '𐤿'),
+  ('𝄩', '𝄩'), ('🀀', '🀫'), ('🀰', '🂓'),
+];
+
+pub const V5_2: &'static [(char, char)] = &[
+  ('Ԥ', 'ԥ'), ('ࠀ', '࠭'), ('࠰', '࠾'), ('ऀ', 'ऀ'),
+  ('ॎ', 'ॎ'), ('ॕ', 'ॕ'), ('ॹ', 'ॺ'), ('৻', '৻'),
+  ('࿕', '࿘'), ('ႚ', 'ႝ'), ('ᅚ', 'ᅞ'), ('ᆣ', 'ᆧ'),
+  ('ᇺ', 'ᇿ'), ('᐀', '᐀'), ('ᙷ', 'ᙿ'), ('ᢰ', 'ᣵ'),
+  ('ᦪ', 'ᦫ'), ('᧚', '᧚'), ('ᨠ', 'ᩞ'), ('᩠', '᩼'),
+  ('᩿', '᪉'), ('᪐', '᪙'), ('᪠', '᪭'), ('᳐', 'ᳲ'),
+  ('᷽', '᷽'), ('₶', '₸'), ('⅐', '⅒'), ('↉', '↉'),
+  ('⏨', '⏨'), ('⚞', '⚟'), ('⚽', '⚿'), ('⛄', '⛍'),
+  ('⛏', '⛡'), ('⛣', '⛣'), ('⛨', '⛿'), ('❗', '❗'),
+  ('⭕', '⭙'), ('Ɒ', 'Ɒ'), ('Ȿ', 'Ɀ'), ('Ⳬ', '⳱'),
+  ('⸱', '⸱'), ('㉄', '㉏'), ('鿄', '鿋'), ('ꓐ', '꓿'),
+  ('ꚠ', '꛷'), ('꠰', '꠹'), ('꣠', 'ꣻ'), ('ꥠ', 'ꥼ'),
+  ('ꦀ', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟'), ('ꩠ', 'ꩻ'),
+  ('ꪀ', 'ꫂ'), ('ꫛ', '꫟'), ('ꯀ', '꯭'), ('꯰', '꯹'),
+  ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('恵', '舘'), ('𐡀', '𐡕'),
+  ('𐡗', '𐡟'), ('𐤚', '𐤛'), ('𐩠', '𐩿'), ('𐬀', '𐬵'),
+  ('𐬹', '𐭕'), ('𐭘', '𐭲'), ('𐭸', '𐭿'), ('𐰀', '𐱈'),
+  ('𐹠', '𐹾'), ('𑂀', '𑃁'), ('𓀀', '𓐮'), ('🄀', '🄊'),
+  ('🄐', '🄮'), ('🄱', '🄱'), ('🄽', '🄽'), ('🄿', '🄿'),
+  ('🅂', '🅂'), ('🅆', '🅆'), ('🅊', '🅎'), ('🅗', '🅗'),
+  ('🅟', '🅟'), ('🅹', '🅹'), ('🅻', '🅼'), ('🅿', '🅿'),
+  ('🆊', '🆍'), ('🆐', '🆐'), ('🈀', '🈀'), ('🈐', '🈱'),
+  ('🉀', '🉈'), ('𪜀', '𫜴'),
+];
+
+pub const V6_0: &'static [(char, char)] = &[
+  ('Ԧ', 'ԧ'), ('ؠ', 'ؠ'), ('ٟ', 'ٟ'), ('ࡀ', '࡛'), ('࡞', '࡞'),
+  ('ऺ', 'ऻ'), ('ॏ', 'ॏ'), ('ॖ', 'ॗ'), ('ॳ', 'ॷ'),
+  ('୲', '୷'), ('ഩ', 'ഩ'), ('ഺ', 'ഺ'), ('ൎ', 'ൎ'),
+  ('ྌ', 'ྏ'), ('࿙', '࿚'), ('፝', '፞'), ('ᯀ', '᯳'),
+  ('᯼', '᯿'), ('᷼', '᷼'), ('ₕ', 'ₜ'), ('₹', '₹'),
+  ('⏩', '⏳'), ('⛎', '⛎'), ('⛢', '⛢'), ('⛤', '⛧'),
+  ('✅', '✅'), ('✊', '✋'), ('✨', '✨'), ('❌', '❌'),
+  ('❎', '❎'), ('❓', '❕'), ('❟', '❠'), ('➕', '➗'),
+  ('➰', '➰'), ('➿', '➿'), ('⟎', '⟏'), ('⵰', '⵰'),
+  ('⵿', '⵿'), ('ㆸ', 'ㆺ'), ('Ꙡ', 'ꙡ'), ('Ɥ', 'ꞎ'),
+  ('Ꞑ', 'ꞑ'), ('Ꞡ', 'ꞩ'), ('ꟺ', 'ꟺ'), ('ꬁ', 'ꬆ'),
+  ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'),
+  ('﮲', '﯁'), ('𑀀', '𑁍'), ('𑁒', '𑁯'), ('𖠀', '𖨸'),
+  ('𛀀', '𛀁'), ('🂠', '🂮'), ('🂱', '🂾'), ('🃁', '🃏'),
+  ('🃑', '🃟'), ('🄰', '🄰'), ('🄲', '🄼'), ('🄾', '🄾'),
+  ('🅀', '🅁'), ('🅃', '🅅'), ('🅇', '🅉'), ('🅏', '🅖'),
+  ('🅘', '🅞'), ('🅠', '🅩'), ('🅰', '🅸'), ('🅺', '🅺'),
+  ('🅽', '🅾'), ('🆀', '🆉'), ('🆎', '🆏'), ('🆑', '🆚'),
+  ('🇦', '🇿'), ('🈁', '🈂'), ('🈲', '🈺'), ('🉐', '🉑'),
+  ('🌀', '🌠'), ('🌰', '🌵'), ('🌷', '🍼'), ('🎀', '🎓'),
+  ('🎠', '🏄'), ('🏆', '🏊'), ('🏠', '🏰'), ('🐀', '🐾'),
+  ('👀', '👀'), ('👂', '📷'), ('📹', '📼'), ('🔀', '🔽'),
+  ('🕐', '🕧'), ('🗻', '🗿'), ('😁', '😐'), ('😒', '😔'),
+  ('😖', '😖'), ('😘', '😘'), ('😚', '😚'), ('😜', '😞'),
+  ('😠', '😥'), ('😨', '😫'), ('😭', '😭'), ('😰', '😳'),
+  ('😵', '🙀'), ('🙅', '🙏'), ('🚀', '🛅'), ('🜀', '🝳'),
+  ('𫝀', '𫠝'),
+];
+
+pub const V6_1: &'static [(char, char)] = &[
+  ('֏', '֏'), ('\u{604}', '\u{604}'), ('ࢠ', 'ࢠ'), ('ࢢ', 'ࢬ'),
+  ('ࣤ', 'ࣾ'), ('૰', '૰'), ('ໞ', 'ໟ'), ('Ⴧ', 'Ⴧ'),
+  ('Ⴭ', 'Ⴭ'), ('ჽ', 'ჿ'), ('᮫', 'ᮭ'), ('ᮺ', 'ᮿ'),
+  ('᳀', '᳇'), ('ᳳ', 'ᳶ'), ('⟋', '⟋'), ('⟍', '⟍'),
+  ('Ⳳ', 'ⳳ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⵦ', 'ⵧ'),
+  ('⸲', '⸻'), ('鿌', '鿌'), ('ꙴ', 'ꙻ'), ('ꚟ', 'ꚟ'),
+  ('Ꞓ', 'ꞓ'), ('Ɦ', 'Ɦ'), ('ꟸ', 'ꟹ'), ('ꫠ', '꫶'),
+  ('郞', '隷'), ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𑃐', '𑃨'),
+  ('𑃰', '𑃹'), ('𑄀', '𑄴'), ('𑄶', '𑅃'), ('𑆀', '𑇈'),
+  ('𑇐', '𑇙'), ('𑚀', '𑚷'), ('𑛀', '𑛉'), ('𖼀', '𖽄'),
+  ('𖽐', '𖽾'), ('𖾏', '𖾟'), ('𞸀', '𞸃'), ('𞸅', '𞸟'),
+  ('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'),
+  ('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'),
+  ('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'),
+  ('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'),
+  ('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'),
+  ('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'),
+  ('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'),
+  ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𞻰', '𞻱'),
+  ('🅪', '🅫'), ('🕀', '🕃'), ('😀', '😀'), ('😑', '😑'),
+  ('😕', '😕'), ('😗', '😗'), ('😙', '😙'), ('😛', '😛'),
+  ('😟', '😟'), ('😦', '😧'), ('😬', '😬'), ('😮', '😯'),
+  ('😴', '😴'),
+];
+
+pub const V6_2: &'static [(char, char)] = &[
+  ('₺', '₺'),
+];
+
+pub const V6_3: &'static [(char, char)] = &[
+  ('\u{61c}', '\u{61c}'), ('\u{2066}', '\u{2069}'),
+];
+
+pub const V7_0: &'static [(char, char)] = &[
+  ('Ϳ', 'Ϳ'), ('Ԩ', 'ԯ'), ('֍', '֎'), ('\u{605}', '\u{605}'),
+  ('ࢡ', 'ࢡ'), ('ࢭ', 'ࢲ'), ('ࣿ', 'ࣿ'), ('ॸ', 'ॸ'),
+  ('ঀ', 'ঀ'), ('ఀ', 'ఀ'), ('ఴ', 'ఴ'), ('ಁ', 'ಁ'),
+  ('ഁ', 'ഁ'), ('෦', '෯'), ('ᛱ', 'ᛸ'), ('ᤝ', 'ᤞ'),
+  ('᪰', '᪾'), ('᳸', '᳹'), ('ᷧ', '᷵'), ('₻', '₽'),
+  ('⏴', '⏺'), ('✀', '✀'), ('⭍', '⭏'), ('⭚', '⭳'),
+  ('⭶', '⮕'), ('⮘', '⮹'), ('⮽', '⯈'), ('⯊', '⯑'),
+  ('⸼', '⹂'), ('Ꚙ', 'ꚝ'), ('ꞔ', 'ꞟ'), ('Ɜ', 'Ɬ'),
+  ('Ʞ', 'Ʇ'), ('ꟷ', 'ꟷ'), ('ꧠ', 'ꧾ'), ('ꩼ', 'ꩿ'),
+  ('ꬰ', 'ꭟ'), ('ꭤ', 'ꭥ'), ('︧', '︭'), ('𐆋', '𐆌'),
+  ('𐆠', '𐆠'), ('𐋠', '𐋻'), ('𐌟', '𐌟'), ('𐍐', '𐍺'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐕯', '𐕯'), ('𐘀', '𐜶'),
+  ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐡠', '𐢞'), ('𐢧', '𐢯'),
+  ('𐪀', '𐪟'), ('𐫀', '𐫦'), ('𐫫', '𐫶'), ('𐮀', '𐮑'),
+  ('𐮙', '𐮜'), ('𐮩', '𐮯'), ('𑁿', '𑁿'), ('𑅐', '𑅶'),
+  ('𑇍', '𑇍'), ('𑇚', '𑇚'), ('𑇡', '𑇴'), ('𑈀', '𑈑'),
+  ('𑈓', '𑈽'), ('𑊰', '𑋪'), ('𑋰', '𑋹'), ('𑌁', '𑌃'),
+  ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'),
+  ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌼', '𑍄'), ('𑍇', '𑍈'),
+  ('𑍋', '𑍍'), ('𑍗', '𑍗'), ('𑍝', '𑍣'), ('𑍦', '𑍬'),
+  ('𑍰', '𑍴'), ('𑒀', '𑓇'), ('𑓐', '𑓙'), ('𑖀', '𑖵'),
+  ('𑖸', '𑗉'), ('𑘀', '𑙄'), ('𑙐', '𑙙'), ('𑢠', '𑣲'),
+  ('𑣿', '𑣿'), ('𑫀', '𑫸'), ('𒍯', '𒎘'), ('𒑣', '𒑮'),
+  ('𒑴', '𒑴'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯'),
+  ('𖫐', '𖫭'), ('𖫰', '𖫵'), ('𖬀', '𖭅'), ('𖭐', '𖭙'),
+  ('𖭛', '𖭡'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), ('𛰀', '𛱪'),
+  ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲜', '\u{1bca3}'),
+  ('𞠀', '𞣄'), ('𞣇', '𞣖'), ('🂿', '🂿'), ('🃠', '🃵'),
+  ('🄋', '🄌'), ('🌡', '🌬'), ('🌶', '🌶'), ('🍽', '🍽'),
+  ('🎔', '🎟'), ('🏅', '🏅'), ('🏋', '🏎'), ('🏔', '🏟'),
+  ('🏱', '🏷'), ('🐿', '🐿'), ('👁', '👁'), ('📸', '📸'),
+  ('📽', '📾'), ('🔾', '🔿'), ('🕄', '🕊'), ('🕨', '🕹'),
+  ('🕻', '🖣'), ('🖥', '🗺'), ('🙁', '🙂'), ('🙐', '🙿'),
+  ('🛆', '🛏'), ('🛠', '🛬'), ('🛰', '🛳'), ('🞀', '🟔'),
+  ('🠀', '🠋'), ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'),
+  ('🢐', '🢭'),
+];
+
+pub const V8_0: &'static [(char, char)] = &[
+  ('ࢳ', 'ࢴ'), ('ࣣ', 'ࣣ'), ('ૹ', 'ૹ'), ('ౚ', 'ౚ'),
+  ('ൟ', 'ൟ'), ('Ᏽ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('₾', '₾'),
+  ('↊', '↋'), ('⯬', '⯯'), ('鿍', '鿕'), ('ꚞ', 'ꚞ'),
+  ('ꞏ', 'ꞏ'), ('Ʝ', 'ꞷ'), ('꣼', 'ꣽ'), ('ꭠ', 'ꭣ'),
+  ('ꭰ', 'ꮿ'), ('︮', '︯'), ('𐣠', '𐣲'), ('𐣴', '𐣵'),
+  ('𐣻', '𐣿'), ('𐦼', '𐦽'), ('𐧀', '𐧏'), ('𐧒', '𐧿'),
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿'), ('𑇉', '𑇌'),
+  ('𑇛', '𑇟'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊩'), ('𑌀', '𑌀'), ('𑍐', '𑍐'),
+  ('𑗊', '𑗝'), ('𑜀', '𑜙'), ('𑜝', '𑜫'), ('𑜰', '𑜿'),
+  ('𒎙', '𒎙'), ('𒒀', '𒕃'), ('𔐀', '𔙆'), ('𝇞', '𝇨'),
+  ('𝠀', '𝪋'), ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('🌭', '🌯'),
+  ('🍾', '🍿'), ('🏏', '🏓'), ('🏸', '🏿'), ('📿', '📿'),
+  ('🕋', '🕏'), ('🙃', '🙄'), ('🛐', '🛐'), ('🤐', '🤘'),
+  ('🦀', '🦄'), ('🧀', '🧀'), ('𫠠', '𬺡'),
+];
+
+pub const V9_0: &'static [(char, char)] = &[
+  ('ࢶ', 'ࢽ'), ('ࣔ', '\u{8e2}'), ('ಀ', 'ಀ'), ('൏', '൏'),
+  ('ൔ', 'ൖ'), ('൘', '൞'), ('൶', '൸'), ('ᲀ', 'ᲈ'),
+  ('᷻', '᷻'), ('⏻', '⏾'), ('⹃', '⹄'), ('Ɪ', 'Ɪ'),
+  ('ꣅ', 'ꣅ'), ('𐆍', '𐆎'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𑈾', '𑈾'), ('𑐀', '𑑙'), ('𑑛', '𑑛'), ('𑑝', '𑑝'),
+  ('𑙠', '𑙬'), ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱅'),
+  ('𑱐', '𑱬'), ('𑱰', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+  ('𖿠', '𖿠'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𞀀', '𞀆'),
+  ('𞀈', '𞀘'), ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+  ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞥞', '𞥟'), ('🆛', '🆬'),
+  ('🈻', '🈻'), ('🕺', '🕺'), ('🖤', '🖤'), ('🛑', '🛒'),
+  ('🛴', '🛶'), ('🤙', '🤞'), ('🤠', '🤧'), ('🤰', '🤰'),
+  ('🤳', '🤾'), ('🥀', '🥋'), ('🥐', '🥞'), ('🦅', '🦑'),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/case_folding_simple.rs
@@ -0,0 +1,662 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate case-folding-simple /home/andrew/tmp/ucd-10.0.0/ --chars --all-pairs
+//
+// ucd-generate is available on crates.io.
+
+pub const CASE_FOLDING_SIMPLE: &'static [(char, &'static [char])] = &[
+  ('A', &['a']), ('B', &['b']), ('C', &['c']), ('D', &['d']), ('E', &['e']),
+  ('F', &['f']), ('G', &['g']), ('H', &['h']), ('I', &['i']), ('J', &['j']),
+  ('K', &['k', 'K', ]), ('L', &['l']), ('M', &['m']), ('N', &['n']), ('O', &[
+  'o']), ('P', &['p']), ('Q', &['q']), ('R', &['r']), ('S', &['s', 'ſ', ]),
+  ('T', &['t']), ('U', &['u']), ('V', &['v']), ('W', &['w']), ('X', &['x']),
+  ('Y', &['y']), ('Z', &['z']), ('a', &['A']), ('b', &['B']), ('c', &['C']),
+  ('d', &['D']), ('e', &['E']), ('f', &['F']), ('g', &['G']), ('h', &['H']),
+  ('i', &['I']), ('j', &['J']), ('k', &['K', 'K', ]), ('l', &['L']), ('m', &[
+  'M']), ('n', &['N']), ('o', &['O']), ('p', &['P']), ('q', &['Q']), ('r', &[
+  'R']), ('s', &['S', 'ſ', ]), ('t', &['T']), ('u', &['U']), ('v', &['V']),
+  ('w', &['W']), ('x', &['X']), ('y', &['Y']), ('z', &['Z']), ('µ', &['Μ',
+  'μ', ]), ('À', &['à']), ('Á', &['á']), ('Â', &['â']), ('Ã', &['ã'
+  ]), ('Ä', &['ä']), ('Å', &['å', 'Å', ]), ('Æ', &['æ']), ('Ç', &['ç'
+  ]), ('È', &['è']), ('É', &['é']), ('Ê', &['ê']), ('Ë', &['ë']),
+  ('Ì', &['ì']), ('Í', &['í']), ('Î', &['î']), ('Ï', &['ï']), ('Ð', &[
+  'ð']), ('Ñ', &['ñ']), ('Ò', &['ò']), ('Ó', &['ó']), ('Ô', &['ô']),
+  ('Õ', &['õ']), ('Ö', &['ö']), ('Ø', &['ø']), ('Ù', &['ù']), ('Ú', &[
+  'ú']), ('Û', &['û']), ('Ü', &['ü']), ('Ý', &['ý']), ('Þ', &['þ']),
+  ('ß', &['ẞ']), ('à', &['À']), ('á', &['Á']), ('â', &['Â']),
+  ('ã', &['Ã']), ('ä', &['Ä']), ('å', &['Å', 'Å', ]), ('æ', &['Æ']),
+  ('ç', &['Ç']), ('è', &['È']), ('é', &['É']), ('ê', &['Ê']), ('ë', &[
+  'Ë']), ('ì', &['Ì']), ('í', &['Í']), ('î', &['Î']), ('ï', &['Ï']),
+  ('ð', &['Ð']), ('ñ', &['Ñ']), ('ò', &['Ò']), ('ó', &['Ó']), ('ô', &[
+  'Ô']), ('õ', &['Õ']), ('ö', &['Ö']), ('ø', &['Ø']), ('ù', &['Ù']),
+  ('ú', &['Ú']), ('û', &['Û']), ('ü', &['Ü']), ('ý', &['Ý']), ('þ', &[
+  'Þ']), ('ÿ', &['Ÿ']), ('Ā', &['ā']), ('ā', &['Ā']), ('Ă', &['ă']),
+  ('ă', &['Ă']), ('Ą', &['ą']), ('ą', &['Ą']), ('Ć', &['ć']), ('ć', &[
+  'Ć']), ('Ĉ', &['ĉ']), ('ĉ', &['Ĉ']), ('Ċ', &['ċ']), ('ċ', &['Ċ']),
+  ('Č', &['č']), ('č', &['Č']), ('Ď', &['ď']), ('ď', &['Ď']), ('Đ', &[
+  'đ']), ('đ', &['Đ']), ('Ē', &['ē']), ('ē', &['Ē']), ('Ĕ', &['ĕ']),
+  ('ĕ', &['Ĕ']), ('Ė', &['ė']), ('ė', &['Ė']), ('Ę', &['ę']), ('ę', &[
+  'Ę']), ('Ě', &['ě']), ('ě', &['Ě']), ('Ĝ', &['ĝ']), ('ĝ', &['Ĝ']),
+  ('Ğ', &['ğ']), ('ğ', &['Ğ']), ('Ġ', &['ġ']), ('ġ', &['Ġ']), ('Ģ', &[
+  'ģ']), ('ģ', &['Ģ']), ('Ĥ', &['ĥ']), ('ĥ', &['Ĥ']), ('Ħ', &['ħ']),
+  ('ħ', &['Ħ']), ('Ĩ', &['ĩ']), ('ĩ', &['Ĩ']), ('Ī', &['ī']), ('ī', &[
+  'Ī']), ('Ĭ', &['ĭ']), ('ĭ', &['Ĭ']), ('Į', &['į']), ('į', &['Į']),
+  ('Ĳ', &['ĳ']), ('ĳ', &['Ĳ']), ('Ĵ', &['ĵ']), ('ĵ', &['Ĵ']), ('Ķ', &[
+  'ķ']), ('ķ', &['Ķ']), ('Ĺ', &['ĺ']), ('ĺ', &['Ĺ']), ('Ļ', &['ļ']),
+  ('ļ', &['Ļ']), ('Ľ', &['ľ']), ('ľ', &['Ľ']), ('Ŀ', &['ŀ']), ('ŀ', &[
+  'Ŀ']), ('Ł', &['ł']), ('ł', &['Ł']), ('Ń', &['ń']), ('ń', &['Ń']),
+  ('Ņ', &['ņ']), ('ņ', &['Ņ']), ('Ň', &['ň']), ('ň', &['Ň']), ('Ŋ', &[
+  'ŋ']), ('ŋ', &['Ŋ']), ('Ō', &['ō']), ('ō', &['Ō']), ('Ŏ', &['ŏ']),
+  ('ŏ', &['Ŏ']), ('Ő', &['ő']), ('ő', &['Ő']), ('Œ', &['œ']), ('œ', &[
+  'Œ']), ('Ŕ', &['ŕ']), ('ŕ', &['Ŕ']), ('Ŗ', &['ŗ']), ('ŗ', &['Ŗ']),
+  ('Ř', &['ř']), ('ř', &['Ř']), ('Ś', &['ś']), ('ś', &['Ś']), ('Ŝ', &[
+  'ŝ']), ('ŝ', &['Ŝ']), ('Ş', &['ş']), ('ş', &['Ş']), ('Š', &['š']),
+  ('š', &['Š']), ('Ţ', &['ţ']), ('ţ', &['Ţ']), ('Ť', &['ť']), ('ť', &[
+  'Ť']), ('Ŧ', &['ŧ']), ('ŧ', &['Ŧ']), ('Ũ', &['ũ']), ('ũ', &['Ũ']),
+  ('Ū', &['ū']), ('ū', &['Ū']), ('Ŭ', &['ŭ']), ('ŭ', &['Ŭ']), ('Ů', &[
+  'ů']), ('ů', &['Ů']), ('Ű', &['ű']), ('ű', &['Ű']), ('Ų', &['ų']),
+  ('ų', &['Ų']), ('Ŵ', &['ŵ']), ('ŵ', &['Ŵ']), ('Ŷ', &['ŷ']), ('ŷ', &[
+  'Ŷ']), ('Ÿ', &['ÿ']), ('Ź', &['ź']), ('ź', &['Ź']), ('Ż', &['ż']),
+  ('ż', &['Ż']), ('Ž', &['ž']), ('ž', &['Ž']), ('ſ', &['S', 's', ]),
+  ('ƀ', &['Ƀ']), ('Ɓ', &['ɓ']), ('Ƃ', &['ƃ']), ('ƃ', &['Ƃ']), ('Ƅ', &[
+  'ƅ']), ('ƅ', &['Ƅ']), ('Ɔ', &['ɔ']), ('Ƈ', &['ƈ']), ('ƈ', &['Ƈ']),
+  ('Ɖ', &['ɖ']), ('Ɗ', &['ɗ']), ('Ƌ', &['ƌ']), ('ƌ', &['Ƌ']), ('Ǝ', &[
+  'ǝ']), ('Ə', &['ə']), ('Ɛ', &['ɛ']), ('Ƒ', &['ƒ']), ('ƒ', &['Ƒ']),
+  ('Ɠ', &['ɠ']), ('Ɣ', &['ɣ']), ('ƕ', &['Ƕ']), ('Ɩ', &['ɩ']), ('Ɨ', &[
+  'ɨ']), ('Ƙ', &['ƙ']), ('ƙ', &['Ƙ']), ('ƚ', &['Ƚ']), ('Ɯ', &['ɯ']),
+  ('Ɲ', &['ɲ']), ('ƞ', &['Ƞ']), ('Ɵ', &['ɵ']), ('Ơ', &['ơ']), ('ơ', &[
+  'Ơ']), ('Ƣ', &['ƣ']), ('ƣ', &['Ƣ']), ('Ƥ', &['ƥ']), ('ƥ', &['Ƥ']),
+  ('Ʀ', &['ʀ']), ('Ƨ', &['ƨ']), ('ƨ', &['Ƨ']), ('Ʃ', &['ʃ']), ('Ƭ', &[
+  'ƭ']), ('ƭ', &['Ƭ']), ('Ʈ', &['ʈ']), ('Ư', &['ư']), ('ư', &['Ư']),
+  ('Ʊ', &['ʊ']), ('Ʋ', &['ʋ']), ('Ƴ', &['ƴ']), ('ƴ', &['Ƴ']), ('Ƶ', &[
+  'ƶ']), ('ƶ', &['Ƶ']), ('Ʒ', &['ʒ']), ('Ƹ', &['ƹ']), ('ƹ', &['Ƹ']),
+  ('Ƽ', &['ƽ']), ('ƽ', &['Ƽ']), ('ƿ', &['Ƿ']), ('Ǆ', &['ǅ', 'ǆ', ]),
+  ('ǅ', &['Ǆ', 'ǆ', ]), ('ǆ', &['Ǆ', 'ǅ', ]), ('Ǉ', &['ǈ', 'ǉ', ]),
+  ('ǈ', &['Ǉ', 'ǉ', ]), ('ǉ', &['Ǉ', 'ǈ', ]), ('Ǌ', &['ǋ', 'ǌ', ]),
+  ('ǋ', &['Ǌ', 'ǌ', ]), ('ǌ', &['Ǌ', 'ǋ', ]), ('Ǎ', &['ǎ']), ('ǎ', &[
+  'Ǎ']), ('Ǐ', &['ǐ']), ('ǐ', &['Ǐ']), ('Ǒ', &['ǒ']), ('ǒ', &['Ǒ']),
+  ('Ǔ', &['ǔ']), ('ǔ', &['Ǔ']), ('Ǖ', &['ǖ']), ('ǖ', &['Ǖ']), ('Ǘ', &[
+  'ǘ']), ('ǘ', &['Ǘ']), ('Ǚ', &['ǚ']), ('ǚ', &['Ǚ']), ('Ǜ', &['ǜ']),
+  ('ǜ', &['Ǜ']), ('ǝ', &['Ǝ']), ('Ǟ', &['ǟ']), ('ǟ', &['Ǟ']), ('Ǡ', &[
+  'ǡ']), ('ǡ', &['Ǡ']), ('Ǣ', &['ǣ']), ('ǣ', &['Ǣ']), ('Ǥ', &['ǥ']),
+  ('ǥ', &['Ǥ']), ('Ǧ', &['ǧ']), ('ǧ', &['Ǧ']), ('Ǩ', &['ǩ']), ('ǩ', &[
+  'Ǩ']), ('Ǫ', &['ǫ']), ('ǫ', &['Ǫ']), ('Ǭ', &['ǭ']), ('ǭ', &['Ǭ']),
+  ('Ǯ', &['ǯ']), ('ǯ', &['Ǯ']), ('Ǳ', &['ǲ', 'ǳ', ]), ('ǲ', &['Ǳ',
+  'ǳ', ]), ('ǳ', &['Ǳ', 'ǲ', ]), ('Ǵ', &['ǵ']), ('ǵ', &['Ǵ']),
+  ('Ƕ', &['ƕ']), ('Ƿ', &['ƿ']), ('Ǹ', &['ǹ']), ('ǹ', &['Ǹ']), ('Ǻ', &[
+  'ǻ']), ('ǻ', &['Ǻ']), ('Ǽ', &['ǽ']), ('ǽ', &['Ǽ']), ('Ǿ', &['ǿ']),
+  ('ǿ', &['Ǿ']), ('Ȁ', &['ȁ']), ('ȁ', &['Ȁ']), ('Ȃ', &['ȃ']), ('ȃ', &[
+  'Ȃ']), ('Ȅ', &['ȅ']), ('ȅ', &['Ȅ']), ('Ȇ', &['ȇ']), ('ȇ', &['Ȇ']),
+  ('Ȉ', &['ȉ']), ('ȉ', &['Ȉ']), ('Ȋ', &['ȋ']), ('ȋ', &['Ȋ']), ('Ȍ', &[
+  'ȍ']), ('ȍ', &['Ȍ']), ('Ȏ', &['ȏ']), ('ȏ', &['Ȏ']), ('Ȑ', &['ȑ']),
+  ('ȑ', &['Ȑ']), ('Ȓ', &['ȓ']), ('ȓ', &['Ȓ']), ('Ȕ', &['ȕ']), ('ȕ', &[
+  'Ȕ']), ('Ȗ', &['ȗ']), ('ȗ', &['Ȗ']), ('Ș', &['ș']), ('ș', &['Ș']),
+  ('Ț', &['ț']), ('ț', &['Ț']), ('Ȝ', &['ȝ']), ('ȝ', &['Ȝ']), ('Ȟ', &[
+  'ȟ']), ('ȟ', &['Ȟ']), ('Ƞ', &['ƞ']), ('Ȣ', &['ȣ']), ('ȣ', &['Ȣ']),
+  ('Ȥ', &['ȥ']), ('ȥ', &['Ȥ']), ('Ȧ', &['ȧ']), ('ȧ', &['Ȧ']), ('Ȩ', &[
+  'ȩ']), ('ȩ', &['Ȩ']), ('Ȫ', &['ȫ']), ('ȫ', &['Ȫ']), ('Ȭ', &['ȭ']),
+  ('ȭ', &['Ȭ']), ('Ȯ', &['ȯ']), ('ȯ', &['Ȯ']), ('Ȱ', &['ȱ']), ('ȱ', &[
+  'Ȱ']), ('Ȳ', &['ȳ']), ('ȳ', &['Ȳ']), ('Ⱥ', &['ⱥ']), ('Ȼ', &['ȼ']),
+  ('ȼ', &['Ȼ']), ('Ƚ', &['ƚ']), ('Ⱦ', &['ⱦ']), ('ȿ', &['Ȿ']),
+  ('ɀ', &['Ɀ']), ('Ɂ', &['ɂ']), ('ɂ', &['Ɂ']), ('Ƀ', &['ƀ']),
+  ('Ʉ', &['ʉ']), ('Ʌ', &['ʌ']), ('Ɇ', &['ɇ']), ('ɇ', &['Ɇ']), ('Ɉ', &[
+  'ɉ']), ('ɉ', &['Ɉ']), ('Ɋ', &['ɋ']), ('ɋ', &['Ɋ']), ('Ɍ', &['ɍ']),
+  ('ɍ', &['Ɍ']), ('Ɏ', &['ɏ']), ('ɏ', &['Ɏ']), ('ɐ', &['Ɐ']),
+  ('ɑ', &['Ɑ']), ('ɒ', &['Ɒ']), ('ɓ', &['Ɓ']), ('ɔ', &['Ɔ']),
+  ('ɖ', &['Ɖ']), ('ɗ', &['Ɗ']), ('ə', &['Ə']), ('ɛ', &['Ɛ']), ('ɜ', &[
+  'Ɜ']), ('ɠ', &['Ɠ']), ('ɡ', &['Ɡ']), ('ɣ', &['Ɣ']), ('ɥ', &['Ɥ'
+  ]), ('ɦ', &['Ɦ']), ('ɨ', &['Ɨ']), ('ɩ', &['Ɩ']), ('ɪ', &['Ɪ']),
+  ('ɫ', &['Ɫ']), ('ɬ', &['Ɬ']), ('ɯ', &['Ɯ']), ('ɱ', &['Ɱ']),
+  ('ɲ', &['Ɲ']), ('ɵ', &['Ɵ']), ('ɽ', &['Ɽ']), ('ʀ', &['Ʀ']),
+  ('ʃ', &['Ʃ']), ('ʇ', &['Ʇ']), ('ʈ', &['Ʈ']), ('ʉ', &['Ʉ']),
+  ('ʊ', &['Ʊ']), ('ʋ', &['Ʋ']), ('ʌ', &['Ʌ']), ('ʒ', &['Ʒ']), ('ʝ', &[
+  'Ʝ']), ('ʞ', &['Ʞ']), ('ͅ', &['Ι', 'ι', 'ι', ]), ('Ͱ', &['ͱ']),
+  ('ͱ', &['Ͱ']), ('Ͳ', &['ͳ']), ('ͳ', &['Ͳ']), ('Ͷ', &['ͷ']), ('ͷ', &[
+  'Ͷ']), ('ͻ', &['Ͻ']), ('ͼ', &['Ͼ']), ('ͽ', &['Ͽ']), ('Ϳ', &['ϳ']),
+  ('Ά', &['ά']), ('Έ', &['έ']), ('Ή', &['ή']), ('Ί', &['ί']), ('Ό', &[
+  'ό']), ('Ύ', &['ύ']), ('Ώ', &['ώ']), ('Α', &['α']), ('Β', &['β',
+  'ϐ', ]), ('Γ', &['γ']), ('Δ', &['δ']), ('Ε', &['ε', 'ϵ', ]),
+  ('Ζ', &['ζ']), ('Η', &['η']), ('Θ', &['θ', 'ϑ', 'ϴ', ]), ('Ι', &[
+  'ͅ', 'ι', 'ι', ]), ('Κ', &['κ', 'ϰ', ]), ('Λ', &['λ']), ('Μ', &[
+  'µ', 'μ', ]), ('Ν', &['ν']), ('Ξ', &['ξ']), ('Ο', &['ο']), ('Π', &[
+  'π', 'ϖ', ]), ('Ρ', &['ρ', 'ϱ', ]), ('Σ', &['ς', 'σ', ]), ('Τ', &[
+  'τ']), ('Υ', &['υ']), ('Φ', &['φ', 'ϕ', ]), ('Χ', &['χ']), ('Ψ', &[
+  'ψ']), ('Ω', &['ω', 'Ω', ]), ('Ϊ', &['ϊ']), ('Ϋ', &['ϋ']), ('ά', &[
+  'Ά']), ('έ', &['Έ']), ('ή', &['Ή']), ('ί', &['Ί']), ('α', &['Α']),
+  ('β', &['Β', 'ϐ', ]), ('γ', &['Γ']), ('δ', &['Δ']), ('ε', &['Ε',
+  'ϵ', ]), ('ζ', &['Ζ']), ('η', &['Η']), ('θ', &['Θ', 'ϑ', 'ϴ', ]),
+  ('ι', &['ͅ', 'Ι', 'ι', ]), ('κ', &['Κ', 'ϰ', ]), ('λ', &['Λ']),
+  ('μ', &['µ', 'Μ', ]), ('ν', &['Ν']), ('ξ', &['Ξ']), ('ο', &['Ο']),
+  ('π', &['Π', 'ϖ', ]), ('ρ', &['Ρ', 'ϱ', ]), ('ς', &['Σ', 'σ', ]),
+  ('σ', &['Σ', 'ς', ]), ('τ', &['Τ']), ('υ', &['Υ']), ('φ', &['Φ',
+  'ϕ', ]), ('χ', &['Χ']), ('ψ', &['Ψ']), ('ω', &['Ω', 'Ω', ]),
+  ('ϊ', &['Ϊ']), ('ϋ', &['Ϋ']), ('ό', &['Ό']), ('ύ', &['Ύ']), ('ώ', &[
+  'Ώ']), ('Ϗ', &['ϗ']), ('ϐ', &['Β', 'β', ]), ('ϑ', &['Θ', 'θ', 'ϴ',
+  ]), ('ϕ', &['Φ', 'φ', ]), ('ϖ', &['Π', 'π', ]), ('ϗ', &['Ϗ']),
+  ('Ϙ', &['ϙ']), ('ϙ', &['Ϙ']), ('Ϛ', &['ϛ']), ('ϛ', &['Ϛ']), ('Ϝ', &[
+  'ϝ']), ('ϝ', &['Ϝ']), ('Ϟ', &['ϟ']), ('ϟ', &['Ϟ']), ('Ϡ', &['ϡ']),
+  ('ϡ', &['Ϡ']), ('Ϣ', &['ϣ']), ('ϣ', &['Ϣ']), ('Ϥ', &['ϥ']), ('ϥ', &[
+  'Ϥ']), ('Ϧ', &['ϧ']), ('ϧ', &['Ϧ']), ('Ϩ', &['ϩ']), ('ϩ', &['Ϩ']),
+  ('Ϫ', &['ϫ']), ('ϫ', &['Ϫ']), ('Ϭ', &['ϭ']), ('ϭ', &['Ϭ']), ('Ϯ', &[
+  'ϯ']), ('ϯ', &['Ϯ']), ('ϰ', &['Κ', 'κ', ]), ('ϱ', &['Ρ', 'ρ', ]),
+  ('ϲ', &['Ϲ']), ('ϳ', &['Ϳ']), ('ϴ', &['Θ', 'θ', 'ϑ', ]), ('ϵ', &[
+  'Ε', 'ε', ]), ('Ϸ', &['ϸ']), ('ϸ', &['Ϸ']), ('Ϲ', &['ϲ']), ('Ϻ', &[
+  'ϻ']), ('ϻ', &['Ϻ']), ('Ͻ', &['ͻ']), ('Ͼ', &['ͼ']), ('Ͽ', &['ͽ']),
+  ('Ѐ', &['ѐ']), ('Ё', &['ё']), ('Ђ', &['ђ']), ('Ѓ', &['ѓ']), ('Є', &[
+  'є']), ('Ѕ', &['ѕ']), ('І', &['і']), ('Ї', &['ї']), ('Ј', &['ј']),
+  ('Љ', &['љ']), ('Њ', &['њ']), ('Ћ', &['ћ']), ('Ќ', &['ќ']), ('Ѝ', &[
+  'ѝ']), ('Ў', &['ў']), ('Џ', &['џ']), ('А', &['а']), ('Б', &['б']),
+  ('В', &['в', 'ᲀ', ]), ('Г', &['г']), ('Д', &['д', 'ᲁ', ]),
+  ('Е', &['е']), ('Ж', &['ж']), ('З', &['з']), ('И', &['и']), ('Й', &[
+  'й']), ('К', &['к']), ('Л', &['л']), ('М', &['м']), ('Н', &['н']),
+  ('О', &['о', 'ᲂ', ]), ('П', &['п']), ('Р', &['р']), ('С', &['с',
+  'ᲃ', ]), ('Т', &['т', 'ᲄ', 'ᲅ', ]), ('У', &['у']), ('Ф', &['ф'
+  ]), ('Х', &['х']), ('Ц', &['ц']), ('Ч', &['ч']), ('Ш', &['ш']),
+  ('Щ', &['щ']), ('Ъ', &['ъ', 'ᲆ', ]), ('Ы', &['ы']), ('Ь', &['ь']),
+  ('Э', &['э']), ('Ю', &['ю']), ('Я', &['я']), ('а', &['А']), ('б', &[
+  'Б']), ('в', &['В', 'ᲀ', ]), ('г', &['Г']), ('д', &['Д', 'ᲁ', ]),
+  ('е', &['Е']), ('ж', &['Ж']), ('з', &['З']), ('и', &['И']), ('й', &[
+  'Й']), ('к', &['К']), ('л', &['Л']), ('м', &['М']), ('н', &['Н']),
+  ('о', &['О', 'ᲂ', ]), ('п', &['П']), ('р', &['Р']), ('с', &['С',
+  'ᲃ', ]), ('т', &['Т', 'ᲄ', 'ᲅ', ]), ('у', &['У']), ('ф', &['Ф'
+  ]), ('х', &['Х']), ('ц', &['Ц']), ('ч', &['Ч']), ('ш', &['Ш']),
+  ('щ', &['Щ']), ('ъ', &['Ъ', 'ᲆ', ]), ('ы', &['Ы']), ('ь', &['Ь']),
+  ('э', &['Э']), ('ю', &['Ю']), ('я', &['Я']), ('ѐ', &['Ѐ']), ('ё', &[
+  'Ё']), ('ђ', &['Ђ']), ('ѓ', &['Ѓ']), ('є', &['Є']), ('ѕ', &['Ѕ']),
+  ('і', &['І']), ('ї', &['Ї']), ('ј', &['Ј']), ('љ', &['Љ']), ('њ', &[
+  'Њ']), ('ћ', &['Ћ']), ('ќ', &['Ќ']), ('ѝ', &['Ѝ']), ('ў', &['Ў']),
+  ('џ', &['Џ']), ('Ѡ', &['ѡ']), ('ѡ', &['Ѡ']), ('Ѣ', &['ѣ', 'ᲇ', ]),
+  ('ѣ', &['Ѣ', 'ᲇ', ]), ('Ѥ', &['ѥ']), ('ѥ', &['Ѥ']), ('Ѧ', &['ѧ']),
+  ('ѧ', &['Ѧ']), ('Ѩ', &['ѩ']), ('ѩ', &['Ѩ']), ('Ѫ', &['ѫ']), ('ѫ', &[
+  'Ѫ']), ('Ѭ', &['ѭ']), ('ѭ', &['Ѭ']), ('Ѯ', &['ѯ']), ('ѯ', &['Ѯ']),
+  ('Ѱ', &['ѱ']), ('ѱ', &['Ѱ']), ('Ѳ', &['ѳ']), ('ѳ', &['Ѳ']), ('Ѵ', &[
+  'ѵ']), ('ѵ', &['Ѵ']), ('Ѷ', &['ѷ']), ('ѷ', &['Ѷ']), ('Ѹ', &['ѹ']),
+  ('ѹ', &['Ѹ']), ('Ѻ', &['ѻ']), ('ѻ', &['Ѻ']), ('Ѽ', &['ѽ']), ('ѽ', &[
+  'Ѽ']), ('Ѿ', &['ѿ']), ('ѿ', &['Ѿ']), ('Ҁ', &['ҁ']), ('ҁ', &['Ҁ']),
+  ('Ҋ', &['ҋ']), ('ҋ', &['Ҋ']), ('Ҍ', &['ҍ']), ('ҍ', &['Ҍ']), ('Ҏ', &[
+  'ҏ']), ('ҏ', &['Ҏ']), ('Ґ', &['ґ']), ('ґ', &['Ґ']), ('Ғ', &['ғ']),
+  ('ғ', &['Ғ']), ('Ҕ', &['ҕ']), ('ҕ', &['Ҕ']), ('Җ', &['җ']), ('җ', &[
+  'Җ']), ('Ҙ', &['ҙ']), ('ҙ', &['Ҙ']), ('Қ', &['қ']), ('қ', &['Қ']),
+  ('Ҝ', &['ҝ']), ('ҝ', &['Ҝ']), ('Ҟ', &['ҟ']), ('ҟ', &['Ҟ']), ('Ҡ', &[
+  'ҡ']), ('ҡ', &['Ҡ']), ('Ң', &['ң']), ('ң', &['Ң']), ('Ҥ', &['ҥ']),
+  ('ҥ', &['Ҥ']), ('Ҧ', &['ҧ']), ('ҧ', &['Ҧ']), ('Ҩ', &['ҩ']), ('ҩ', &[
+  'Ҩ']), ('Ҫ', &['ҫ']), ('ҫ', &['Ҫ']), ('Ҭ', &['ҭ']), ('ҭ', &['Ҭ']),
+  ('Ү', &['ү']), ('ү', &['Ү']), ('Ұ', &['ұ']), ('ұ', &['Ұ']), ('Ҳ', &[
+  'ҳ']), ('ҳ', &['Ҳ']), ('Ҵ', &['ҵ']), ('ҵ', &['Ҵ']), ('Ҷ', &['ҷ']),
+  ('ҷ', &['Ҷ']), ('Ҹ', &['ҹ']), ('ҹ', &['Ҹ']), ('Һ', &['һ']), ('һ', &[
+  'Һ']), ('Ҽ', &['ҽ']), ('ҽ', &['Ҽ']), ('Ҿ', &['ҿ']), ('ҿ', &['Ҿ']),
+  ('Ӏ', &['ӏ']), ('Ӂ', &['ӂ']), ('ӂ', &['Ӂ']), ('Ӄ', &['ӄ']), ('ӄ', &[
+  'Ӄ']), ('Ӆ', &['ӆ']), ('ӆ', &['Ӆ']), ('Ӈ', &['ӈ']), ('ӈ', &['Ӈ']),
+  ('Ӊ', &['ӊ']), ('ӊ', &['Ӊ']), ('Ӌ', &['ӌ']), ('ӌ', &['Ӌ']), ('Ӎ', &[
+  'ӎ']), ('ӎ', &['Ӎ']), ('ӏ', &['Ӏ']), ('Ӑ', &['ӑ']), ('ӑ', &['Ӑ']),
+  ('Ӓ', &['ӓ']), ('ӓ', &['Ӓ']), ('Ӕ', &['ӕ']), ('ӕ', &['Ӕ']), ('Ӗ', &[
+  'ӗ']), ('ӗ', &['Ӗ']), ('Ә', &['ә']), ('ә', &['Ә']), ('Ӛ', &['ӛ']),
+  ('ӛ', &['Ӛ']), ('Ӝ', &['ӝ']), ('ӝ', &['Ӝ']), ('Ӟ', &['ӟ']), ('ӟ', &[
+  'Ӟ']), ('Ӡ', &['ӡ']), ('ӡ', &['Ӡ']), ('Ӣ', &['ӣ']), ('ӣ', &['Ӣ']),
+  ('Ӥ', &['ӥ']), ('ӥ', &['Ӥ']), ('Ӧ', &['ӧ']), ('ӧ', &['Ӧ']), ('Ө', &[
+  'ө']), ('ө', &['Ө']), ('Ӫ', &['ӫ']), ('ӫ', &['Ӫ']), ('Ӭ', &['ӭ']),
+  ('ӭ', &['Ӭ']), ('Ӯ', &['ӯ']), ('ӯ', &['Ӯ']), ('Ӱ', &['ӱ']), ('ӱ', &[
+  'Ӱ']), ('Ӳ', &['ӳ']), ('ӳ', &['Ӳ']), ('Ӵ', &['ӵ']), ('ӵ', &['Ӵ']),
+  ('Ӷ', &['ӷ']), ('ӷ', &['Ӷ']), ('Ӹ', &['ӹ']), ('ӹ', &['Ӹ']), ('Ӻ', &[
+  'ӻ']), ('ӻ', &['Ӻ']), ('Ӽ', &['ӽ']), ('ӽ', &['Ӽ']), ('Ӿ', &['ӿ']),
+  ('ӿ', &['Ӿ']), ('Ԁ', &['ԁ']), ('ԁ', &['Ԁ']), ('Ԃ', &['ԃ']), ('ԃ', &[
+  'Ԃ']), ('Ԅ', &['ԅ']), ('ԅ', &['Ԅ']), ('Ԇ', &['ԇ']), ('ԇ', &['Ԇ']),
+  ('Ԉ', &['ԉ']), ('ԉ', &['Ԉ']), ('Ԋ', &['ԋ']), ('ԋ', &['Ԋ']), ('Ԍ', &[
+  'ԍ']), ('ԍ', &['Ԍ']), ('Ԏ', &['ԏ']), ('ԏ', &['Ԏ']), ('Ԑ', &['ԑ']),
+  ('ԑ', &['Ԑ']), ('Ԓ', &['ԓ']), ('ԓ', &['Ԓ']), ('Ԕ', &['ԕ']), ('ԕ', &[
+  'Ԕ']), ('Ԗ', &['ԗ']), ('ԗ', &['Ԗ']), ('Ԙ', &['ԙ']), ('ԙ', &['Ԙ']),
+  ('Ԛ', &['ԛ']), ('ԛ', &['Ԛ']), ('Ԝ', &['ԝ']), ('ԝ', &['Ԝ']), ('Ԟ', &[
+  'ԟ']), ('ԟ', &['Ԟ']), ('Ԡ', &['ԡ']), ('ԡ', &['Ԡ']), ('Ԣ', &['ԣ']),
+  ('ԣ', &['Ԣ']), ('Ԥ', &['ԥ']), ('ԥ', &['Ԥ']), ('Ԧ', &['ԧ']), ('ԧ', &[
+  'Ԧ']), ('Ԩ', &['ԩ']), ('ԩ', &['Ԩ']), ('Ԫ', &['ԫ']), ('ԫ', &['Ԫ']),
+  ('Ԭ', &['ԭ']), ('ԭ', &['Ԭ']), ('Ԯ', &['ԯ']), ('ԯ', &['Ԯ']), ('Ա', &[
+  'ա']), ('Բ', &['բ']), ('Գ', &['գ']), ('Դ', &['դ']), ('Ե', &['ե']),
+  ('Զ', &['զ']), ('Է', &['է']), ('Ը', &['ը']), ('Թ', &['թ']), ('Ժ', &[
+  'ժ']), ('Ի', &['ի']), ('Լ', &['լ']), ('Խ', &['խ']), ('Ծ', &['ծ']),
+  ('Կ', &['կ']), ('Հ', &['հ']), ('Ձ', &['ձ']), ('Ղ', &['ղ']), ('Ճ', &[
+  'ճ']), ('Մ', &['մ']), ('Յ', &['յ']), ('Ն', &['ն']), ('Շ', &['շ']),
+  ('Ո', &['ո']), ('Չ', &['չ']), ('Պ', &['պ']), ('Ջ', &['ջ']), ('Ռ', &[
+  'ռ']), ('Ս', &['ս']), ('Վ', &['վ']), ('Տ', &['տ']), ('Ր', &['ր']),
+  ('Ց', &['ց']), ('Ւ', &['ւ']), ('Փ', &['փ']), ('Ք', &['ք']), ('Օ', &[
+  'օ']), ('Ֆ', &['ֆ']), ('ա', &['Ա']), ('բ', &['Բ']), ('գ', &['Գ']),
+  ('դ', &['Դ']), ('ե', &['Ե']), ('զ', &['Զ']), ('է', &['Է']), ('ը', &[
+  'Ը']), ('թ', &['Թ']), ('ժ', &['Ժ']), ('ի', &['Ի']), ('լ', &['Լ']),
+  ('խ', &['Խ']), ('ծ', &['Ծ']), ('կ', &['Կ']), ('հ', &['Հ']), ('ձ', &[
+  'Ձ']), ('ղ', &['Ղ']), ('ճ', &['Ճ']), ('մ', &['Մ']), ('յ', &['Յ']),
+  ('ն', &['Ն']), ('շ', &['Շ']), ('ո', &['Ո']), ('չ', &['Չ']), ('պ', &[
+  'Պ']), ('ջ', &['Ջ']), ('ռ', &['Ռ']), ('ս', &['Ս']), ('վ', &['Վ']),
+  ('տ', &['Տ']), ('ր', &['Ր']), ('ց', &['Ց']), ('ւ', &['Ւ']), ('փ', &[
+  'Փ']), ('ք', &['Ք']), ('օ', &['Օ']), ('ֆ', &['Ֆ']), ('Ⴀ', &['ⴀ'
+  ]), ('Ⴁ', &['ⴁ']), ('Ⴂ', &['ⴂ']), ('Ⴃ', &['ⴃ']), ('Ⴄ', &['ⴄ'
+  ]), ('Ⴅ', &['ⴅ']), ('Ⴆ', &['ⴆ']), ('Ⴇ', &['ⴇ']), ('Ⴈ', &['ⴈ'
+  ]), ('Ⴉ', &['ⴉ']), ('Ⴊ', &['ⴊ']), ('Ⴋ', &['ⴋ']), ('Ⴌ', &['ⴌ'
+  ]), ('Ⴍ', &['ⴍ']), ('Ⴎ', &['ⴎ']), ('Ⴏ', &['ⴏ']), ('Ⴐ', &['ⴐ'
+  ]), ('Ⴑ', &['ⴑ']), ('Ⴒ', &['ⴒ']), ('Ⴓ', &['ⴓ']), ('Ⴔ', &['ⴔ'
+  ]), ('Ⴕ', &['ⴕ']), ('Ⴖ', &['ⴖ']), ('Ⴗ', &['ⴗ']), ('Ⴘ', &['ⴘ'
+  ]), ('Ⴙ', &['ⴙ']), ('Ⴚ', &['ⴚ']), ('Ⴛ', &['ⴛ']), ('Ⴜ', &['ⴜ'
+  ]), ('Ⴝ', &['ⴝ']), ('Ⴞ', &['ⴞ']), ('Ⴟ', &['ⴟ']), ('Ⴠ', &['ⴠ'
+  ]), ('Ⴡ', &['ⴡ']), ('Ⴢ', &['ⴢ']), ('Ⴣ', &['ⴣ']), ('Ⴤ', &['ⴤ'
+  ]), ('Ⴥ', &['ⴥ']), ('Ⴧ', &['ⴧ']), ('Ⴭ', &['ⴭ']), ('Ꭰ', &['ꭰ'
+  ]), ('Ꭱ', &['ꭱ']), ('Ꭲ', &['ꭲ']), ('Ꭳ', &['ꭳ']), ('Ꭴ', &['ꭴ'
+  ]), ('Ꭵ', &['ꭵ']), ('Ꭶ', &['ꭶ']), ('Ꭷ', &['ꭷ']), ('Ꭸ', &['ꭸ'
+  ]), ('Ꭹ', &['ꭹ']), ('Ꭺ', &['ꭺ']), ('Ꭻ', &['ꭻ']), ('Ꭼ', &['ꭼ'
+  ]), ('Ꭽ', &['ꭽ']), ('Ꭾ', &['ꭾ']), ('Ꭿ', &['ꭿ']), ('Ꮀ', &['ꮀ'
+  ]), ('Ꮁ', &['ꮁ']), ('Ꮂ', &['ꮂ']), ('Ꮃ', &['ꮃ']), ('Ꮄ', &['ꮄ'
+  ]), ('Ꮅ', &['ꮅ']), ('Ꮆ', &['ꮆ']), ('Ꮇ', &['ꮇ']), ('Ꮈ', &['ꮈ'
+  ]), ('Ꮉ', &['ꮉ']), ('Ꮊ', &['ꮊ']), ('Ꮋ', &['ꮋ']), ('Ꮌ', &['ꮌ'
+  ]), ('Ꮍ', &['ꮍ']), ('Ꮎ', &['ꮎ']), ('Ꮏ', &['ꮏ']), ('Ꮐ', &['ꮐ'
+  ]), ('Ꮑ', &['ꮑ']), ('Ꮒ', &['ꮒ']), ('Ꮓ', &['ꮓ']), ('Ꮔ', &['ꮔ'
+  ]), ('Ꮕ', &['ꮕ']), ('Ꮖ', &['ꮖ']), ('Ꮗ', &['ꮗ']), ('Ꮘ', &['ꮘ'
+  ]), ('Ꮙ', &['ꮙ']), ('Ꮚ', &['ꮚ']), ('Ꮛ', &['ꮛ']), ('Ꮜ', &['ꮜ'
+  ]), ('Ꮝ', &['ꮝ']), ('Ꮞ', &['ꮞ']), ('Ꮟ', &['ꮟ']), ('Ꮠ', &['ꮠ'
+  ]), ('Ꮡ', &['ꮡ']), ('Ꮢ', &['ꮢ']), ('Ꮣ', &['ꮣ']), ('Ꮤ', &['ꮤ'
+  ]), ('Ꮥ', &['ꮥ']), ('Ꮦ', &['ꮦ']), ('Ꮧ', &['ꮧ']), ('Ꮨ', &['ꮨ'
+  ]), ('Ꮩ', &['ꮩ']), ('Ꮪ', &['ꮪ']), ('Ꮫ', &['ꮫ']), ('Ꮬ', &['ꮬ'
+  ]), ('Ꮭ', &['ꮭ']), ('Ꮮ', &['ꮮ']), ('Ꮯ', &['ꮯ']), ('Ꮰ', &['ꮰ'
+  ]), ('Ꮱ', &['ꮱ']), ('Ꮲ', &['ꮲ']), ('Ꮳ', &['ꮳ']), ('Ꮴ', &['ꮴ'
+  ]), ('Ꮵ', &['ꮵ']), ('Ꮶ', &['ꮶ']), ('Ꮷ', &['ꮷ']), ('Ꮸ', &['ꮸ'
+  ]), ('Ꮹ', &['ꮹ']), ('Ꮺ', &['ꮺ']), ('Ꮻ', &['ꮻ']), ('Ꮼ', &['ꮼ'
+  ]), ('Ꮽ', &['ꮽ']), ('Ꮾ', &['ꮾ']), ('Ꮿ', &['ꮿ']), ('Ᏸ', &['ᏸ'
+  ]), ('Ᏹ', &['ᏹ']), ('Ᏺ', &['ᏺ']), ('Ᏻ', &['ᏻ']), ('Ᏼ', &['ᏼ'
+  ]), ('Ᏽ', &['ᏽ']), ('ᏸ', &['Ᏸ']), ('ᏹ', &['Ᏹ']), ('ᏺ', &['Ᏺ'
+  ]), ('ᏻ', &['Ᏻ']), ('ᏼ', &['Ᏼ']), ('ᏽ', &['Ᏽ']), ('ᲀ', &['В',
+  'в', ]), ('ᲁ', &['Д', 'д', ]), ('ᲂ', &['О', 'о', ]), ('ᲃ', &[
+  'С', 'с', ]), ('ᲄ', &['Т', 'т', 'ᲅ', ]), ('ᲅ', &['Т', 'т',
+  'ᲄ', ]), ('ᲆ', &['Ъ', 'ъ', ]), ('ᲇ', &['Ѣ', 'ѣ', ]), ('ᲈ', &[
+  'Ꙋ', 'ꙋ', ]), ('ᵹ', &['Ᵹ']), ('ᵽ', &['Ᵽ']), ('Ḁ', &['ḁ']),
+  ('ḁ', &['Ḁ']), ('Ḃ', &['ḃ']), ('ḃ', &['Ḃ']), ('Ḅ', &['ḅ']),
+  ('ḅ', &['Ḅ']), ('Ḇ', &['ḇ']), ('ḇ', &['Ḇ']), ('Ḉ', &['ḉ']),
+  ('ḉ', &['Ḉ']), ('Ḋ', &['ḋ']), ('ḋ', &['Ḋ']), ('Ḍ', &['ḍ']),
+  ('ḍ', &['Ḍ']), ('Ḏ', &['ḏ']), ('ḏ', &['Ḏ']), ('Ḑ', &['ḑ']),
+  ('ḑ', &['Ḑ']), ('Ḓ', &['ḓ']), ('ḓ', &['Ḓ']), ('Ḕ', &['ḕ']),
+  ('ḕ', &['Ḕ']), ('Ḗ', &['ḗ']), ('ḗ', &['Ḗ']), ('Ḙ', &['ḙ']),
+  ('ḙ', &['Ḙ']), ('Ḛ', &['ḛ']), ('ḛ', &['Ḛ']), ('Ḝ', &['ḝ']),
+  ('ḝ', &['Ḝ']), ('Ḟ', &['ḟ']), ('ḟ', &['Ḟ']), ('Ḡ', &['ḡ']),
+  ('ḡ', &['Ḡ']), ('Ḣ', &['ḣ']), ('ḣ', &['Ḣ']), ('Ḥ', &['ḥ']),
+  ('ḥ', &['Ḥ']), ('Ḧ', &['ḧ']), ('ḧ', &['Ḧ']), ('Ḩ', &['ḩ']),
+  ('ḩ', &['Ḩ']), ('Ḫ', &['ḫ']), ('ḫ', &['Ḫ']), ('Ḭ', &['ḭ']),
+  ('ḭ', &['Ḭ']), ('Ḯ', &['ḯ']), ('ḯ', &['Ḯ']), ('Ḱ', &['ḱ']),
+  ('ḱ', &['Ḱ']), ('Ḳ', &['ḳ']), ('ḳ', &['Ḳ']), ('Ḵ', &['ḵ']),
+  ('ḵ', &['Ḵ']), ('Ḷ', &['ḷ']), ('ḷ', &['Ḷ']), ('Ḹ', &['ḹ']),
+  ('ḹ', &['Ḹ']), ('Ḻ', &['ḻ']), ('ḻ', &['Ḻ']), ('Ḽ', &['ḽ']),
+  ('ḽ', &['Ḽ']), ('Ḿ', &['ḿ']), ('ḿ', &['Ḿ']), ('Ṁ', &['ṁ']),
+  ('ṁ', &['Ṁ']), ('Ṃ', &['ṃ']), ('ṃ', &['Ṃ']), ('Ṅ', &['ṅ']),
+  ('ṅ', &['Ṅ']), ('Ṇ', &['ṇ']), ('ṇ', &['Ṇ']), ('Ṉ', &['ṉ']),
+  ('ṉ', &['Ṉ']), ('Ṋ', &['ṋ']), ('ṋ', &['Ṋ']), ('Ṍ', &['ṍ']),
+  ('ṍ', &['Ṍ']), ('Ṏ', &['ṏ']), ('ṏ', &['Ṏ']), ('Ṑ', &['ṑ']),
+  ('ṑ', &['Ṑ']), ('Ṓ', &['ṓ']), ('ṓ', &['Ṓ']), ('Ṕ', &['ṕ']),
+  ('ṕ', &['Ṕ']), ('Ṗ', &['ṗ']), ('ṗ', &['Ṗ']), ('Ṙ', &['ṙ']),
+  ('ṙ', &['Ṙ']), ('Ṛ', &['ṛ']), ('ṛ', &['Ṛ']), ('Ṝ', &['ṝ']),
+  ('ṝ', &['Ṝ']), ('Ṟ', &['ṟ']), ('ṟ', &['Ṟ']), ('Ṡ', &['ṡ',
+  'ẛ', ]), ('ṡ', &['Ṡ', 'ẛ', ]), ('Ṣ', &['ṣ']), ('ṣ', &['Ṣ']),
+  ('Ṥ', &['ṥ']), ('ṥ', &['Ṥ']), ('Ṧ', &['ṧ']), ('ṧ', &['Ṧ']),
+  ('Ṩ', &['ṩ']), ('ṩ', &['Ṩ']), ('Ṫ', &['ṫ']), ('ṫ', &['Ṫ']),
+  ('Ṭ', &['ṭ']), ('ṭ', &['Ṭ']), ('Ṯ', &['ṯ']), ('ṯ', &['Ṯ']),
+  ('Ṱ', &['ṱ']), ('ṱ', &['Ṱ']), ('Ṳ', &['ṳ']), ('ṳ', &['Ṳ']),
+  ('Ṵ', &['ṵ']), ('ṵ', &['Ṵ']), ('Ṷ', &['ṷ']), ('ṷ', &['Ṷ']),
+  ('Ṹ', &['ṹ']), ('ṹ', &['Ṹ']), ('Ṻ', &['ṻ']), ('ṻ', &['Ṻ']),
+  ('Ṽ', &['ṽ']), ('ṽ', &['Ṽ']), ('Ṿ', &['ṿ']), ('ṿ', &['Ṿ']),
+  ('Ẁ', &['ẁ']), ('ẁ', &['Ẁ']), ('Ẃ', &['ẃ']), ('ẃ', &['Ẃ']),
+  ('Ẅ', &['ẅ']), ('ẅ', &['Ẅ']), ('Ẇ', &['ẇ']), ('ẇ', &['Ẇ']),
+  ('Ẉ', &['ẉ']), ('ẉ', &['Ẉ']), ('Ẋ', &['ẋ']), ('ẋ', &['Ẋ']),
+  ('Ẍ', &['ẍ']), ('ẍ', &['Ẍ']), ('Ẏ', &['ẏ']), ('ẏ', &['Ẏ']),
+  ('Ẑ', &['ẑ']), ('ẑ', &['Ẑ']), ('Ẓ', &['ẓ']), ('ẓ', &['Ẓ']),
+  ('Ẕ', &['ẕ']), ('ẕ', &['Ẕ']), ('ẛ', &['Ṡ', 'ṡ', ]), ('ẞ', &[
+  'ß']), ('Ạ', &['ạ']), ('ạ', &['Ạ']), ('Ả', &['ả']), ('ả', &[
+  'Ả']), ('Ấ', &['ấ']), ('ấ', &['Ấ']), ('Ầ', &['ầ']), ('ầ', &[
+  'Ầ']), ('Ẩ', &['ẩ']), ('ẩ', &['Ẩ']), ('Ẫ', &['ẫ']), ('ẫ', &[
+  'Ẫ']), ('Ậ', &['ậ']), ('ậ', &['Ậ']), ('Ắ', &['ắ']), ('ắ', &[
+  'Ắ']), ('Ằ', &['ằ']), ('ằ', &['Ằ']), ('Ẳ', &['ẳ']), ('ẳ', &[
+  'Ẳ']), ('Ẵ', &['ẵ']), ('ẵ', &['Ẵ']), ('Ặ', &['ặ']), ('ặ', &[
+  'Ặ']), ('Ẹ', &['ẹ']), ('ẹ', &['Ẹ']), ('Ẻ', &['ẻ']), ('ẻ', &[
+  'Ẻ']), ('Ẽ', &['ẽ']), ('ẽ', &['Ẽ']), ('Ế', &['ế']), ('ế', &[
+  'Ế']), ('Ề', &['ề']), ('ề', &['Ề']), ('Ể', &['ể']), ('ể', &[
+  'Ể']), ('Ễ', &['ễ']), ('ễ', &['Ễ']), ('Ệ', &['ệ']), ('ệ', &[
+  'Ệ']), ('Ỉ', &['ỉ']), ('ỉ', &['Ỉ']), ('Ị', &['ị']), ('ị', &[
+  'Ị']), ('Ọ', &['ọ']), ('ọ', &['Ọ']), ('Ỏ', &['ỏ']), ('ỏ', &[
+  'Ỏ']), ('Ố', &['ố']), ('ố', &['Ố']), ('Ồ', &['ồ']), ('ồ', &[
+  'Ồ']), ('Ổ', &['ổ']), ('ổ', &['Ổ']), ('Ỗ', &['ỗ']), ('ỗ', &[
+  'Ỗ']), ('Ộ', &['ộ']), ('ộ', &['Ộ']), ('Ớ', &['ớ']), ('ớ', &[
+  'Ớ']), ('Ờ', &['ờ']), ('ờ', &['Ờ']), ('Ở', &['ở']), ('ở', &[
+  'Ở']), ('Ỡ', &['ỡ']), ('ỡ', &['Ỡ']), ('Ợ', &['ợ']), ('ợ', &[
+  'Ợ']), ('Ụ', &['ụ']), ('ụ', &['Ụ']), ('Ủ', &['ủ']), ('ủ', &[
+  'Ủ']), ('Ứ', &['ứ']), ('ứ', &['Ứ']), ('Ừ', &['ừ']), ('ừ', &[
+  'Ừ']), ('Ử', &['ử']), ('ử', &['Ử']), ('Ữ', &['ữ']), ('ữ', &[
+  'Ữ']), ('Ự', &['ự']), ('ự', &['Ự']), ('Ỳ', &['ỳ']), ('ỳ', &[
+  'Ỳ']), ('Ỵ', &['ỵ']), ('ỵ', &['Ỵ']), ('Ỷ', &['ỷ']), ('ỷ', &[
+  'Ỷ']), ('Ỹ', &['ỹ']), ('ỹ', &['Ỹ']), ('Ỻ', &['ỻ']), ('ỻ', &[
+  'Ỻ']), ('Ỽ', &['ỽ']), ('ỽ', &['Ỽ']), ('Ỿ', &['ỿ']), ('ỿ', &[
+  'Ỿ']), ('ἀ', &['Ἀ']), ('ἁ', &['Ἁ']), ('ἂ', &['Ἂ']), ('ἃ', &[
+  'Ἃ']), ('ἄ', &['Ἄ']), ('ἅ', &['Ἅ']), ('ἆ', &['Ἆ']), ('ἇ', &[
+  'Ἇ']), ('Ἀ', &['ἀ']), ('Ἁ', &['ἁ']), ('Ἂ', &['ἂ']), ('Ἃ', &[
+  'ἃ']), ('Ἄ', &['ἄ']), ('Ἅ', &['ἅ']), ('Ἆ', &['ἆ']), ('Ἇ', &[
+  'ἇ']), ('ἐ', &['Ἐ']), ('ἑ', &['Ἑ']), ('ἒ', &['Ἒ']), ('ἓ', &[
+  'Ἓ']), ('ἔ', &['Ἔ']), ('ἕ', &['Ἕ']), ('Ἐ', &['ἐ']), ('Ἑ', &[
+  'ἑ']), ('Ἒ', &['ἒ']), ('Ἓ', &['ἓ']), ('Ἔ', &['ἔ']), ('Ἕ', &[
+  'ἕ']), ('ἠ', &['Ἠ']), ('ἡ', &['Ἡ']), ('ἢ', &['Ἢ']), ('ἣ', &[
+  'Ἣ']), ('ἤ', &['Ἤ']), ('ἥ', &['Ἥ']), ('ἦ', &['Ἦ']), ('ἧ', &[
+  'Ἧ']), ('Ἠ', &['ἠ']), ('Ἡ', &['ἡ']), ('Ἢ', &['ἢ']), ('Ἣ', &[
+  'ἣ']), ('Ἤ', &['ἤ']), ('Ἥ', &['ἥ']), ('Ἦ', &['ἦ']), ('Ἧ', &[
+  'ἧ']), ('ἰ', &['Ἰ']), ('ἱ', &['Ἱ']), ('ἲ', &['Ἲ']), ('ἳ', &[
+  'Ἳ']), ('ἴ', &['Ἴ']), ('ἵ', &['Ἵ']), ('ἶ', &['Ἶ']), ('ἷ', &[
+  'Ἷ']), ('Ἰ', &['ἰ']), ('Ἱ', &['ἱ']), ('Ἲ', &['ἲ']), ('Ἳ', &[
+  'ἳ']), ('Ἴ', &['ἴ']), ('Ἵ', &['ἵ']), ('Ἶ', &['ἶ']), ('Ἷ', &[
+  'ἷ']), ('ὀ', &['Ὀ']), ('ὁ', &['Ὁ']), ('ὂ', &['Ὂ']), ('ὃ', &[
+  'Ὃ']), ('ὄ', &['Ὄ']), ('ὅ', &['Ὅ']), ('Ὀ', &['ὀ']), ('Ὁ', &[
+  'ὁ']), ('Ὂ', &['ὂ']), ('Ὃ', &['ὃ']), ('Ὄ', &['ὄ']), ('Ὅ', &[
+  'ὅ']), ('ὑ', &['Ὑ']), ('ὓ', &['Ὓ']), ('ὕ', &['Ὕ']), ('ὗ', &[
+  'Ὗ']), ('Ὑ', &['ὑ']), ('Ὓ', &['ὓ']), ('Ὕ', &['ὕ']), ('Ὗ', &[
+  'ὗ']), ('ὠ', &['Ὠ']), ('ὡ', &['Ὡ']), ('ὢ', &['Ὢ']), ('ὣ', &[
+  'Ὣ']), ('ὤ', &['Ὤ']), ('ὥ', &['Ὥ']), ('ὦ', &['Ὦ']), ('ὧ', &[
+  'Ὧ']), ('Ὠ', &['ὠ']), ('Ὡ', &['ὡ']), ('Ὢ', &['ὢ']), ('Ὣ', &[
+  'ὣ']), ('Ὤ', &['ὤ']), ('Ὥ', &['ὥ']), ('Ὦ', &['ὦ']), ('Ὧ', &[
+  'ὧ']), ('ὰ', &['Ὰ']), ('ά', &['Ά']), ('ὲ', &['Ὲ']), ('έ', &[
+  'Έ']), ('ὴ', &['Ὴ']), ('ή', &['Ή']), ('ὶ', &['Ὶ']), ('ί', &[
+  'Ί']), ('ὸ', &['Ὸ']), ('ό', &['Ό']), ('ὺ', &['Ὺ']), ('ύ', &[
+  'Ύ']), ('ὼ', &['Ὼ']), ('ώ', &['Ώ']), ('ᾀ', &['ᾈ']), ('ᾁ', &[
+  'ᾉ']), ('ᾂ', &['ᾊ']), ('ᾃ', &['ᾋ']), ('ᾄ', &['ᾌ']), ('ᾅ', &[
+  'ᾍ']), ('ᾆ', &['ᾎ']), ('ᾇ', &['ᾏ']), ('ᾈ', &['ᾀ']), ('ᾉ', &[
+  'ᾁ']), ('ᾊ', &['ᾂ']), ('ᾋ', &['ᾃ']), ('ᾌ', &['ᾄ']), ('ᾍ', &[
+  'ᾅ']), ('ᾎ', &['ᾆ']), ('ᾏ', &['ᾇ']), ('ᾐ', &['ᾘ']), ('ᾑ', &[
+  'ᾙ']), ('ᾒ', &['ᾚ']), ('ᾓ', &['ᾛ']), ('ᾔ', &['ᾜ']), ('ᾕ', &[
+  'ᾝ']), ('ᾖ', &['ᾞ']), ('ᾗ', &['ᾟ']), ('ᾘ', &['ᾐ']), ('ᾙ', &[
+  'ᾑ']), ('ᾚ', &['ᾒ']), ('ᾛ', &['ᾓ']), ('ᾜ', &['ᾔ']), ('ᾝ', &[
+  'ᾕ']), ('ᾞ', &['ᾖ']), ('ᾟ', &['ᾗ']), ('ᾠ', &['ᾨ']), ('ᾡ', &[
+  'ᾩ']), ('ᾢ', &['ᾪ']), ('ᾣ', &['ᾫ']), ('ᾤ', &['ᾬ']), ('ᾥ', &[
+  'ᾭ']), ('ᾦ', &['ᾮ']), ('ᾧ', &['ᾯ']), ('ᾨ', &['ᾠ']), ('ᾩ', &[
+  'ᾡ']), ('ᾪ', &['ᾢ']), ('ᾫ', &['ᾣ']), ('ᾬ', &['ᾤ']), ('ᾭ', &[
+  'ᾥ']), ('ᾮ', &['ᾦ']), ('ᾯ', &['ᾧ']), ('ᾰ', &['Ᾰ']), ('ᾱ', &[
+  'Ᾱ']), ('ᾳ', &['ᾼ']), ('Ᾰ', &['ᾰ']), ('Ᾱ', &['ᾱ']), ('Ὰ', &[
+  'ὰ']), ('Ά', &['ά']), ('ᾼ', &['ᾳ']), ('ι', &['ͅ', 'Ι', 'ι',
+  ]), ('ῃ', &['ῌ']), ('Ὲ', &['ὲ']), ('Έ', &['έ']), ('Ὴ', &['ὴ'
+  ]), ('Ή', &['ή']), ('ῌ', &['ῃ']), ('ῐ', &['Ῐ']), ('ῑ', &['Ῑ'
+  ]), ('Ῐ', &['ῐ']), ('Ῑ', &['ῑ']), ('Ὶ', &['ὶ']), ('Ί', &['ί'
+  ]), ('ῠ', &['Ῠ']), ('ῡ', &['Ῡ']), ('ῥ', &['Ῥ']), ('Ῠ', &['ῠ'
+  ]), ('Ῡ', &['ῡ']), ('Ὺ', &['ὺ']), ('Ύ', &['ύ']), ('Ῥ', &['ῥ'
+  ]), ('ῳ', &['ῼ']), ('Ὸ', &['ὸ']), ('Ό', &['ό']), ('Ὼ', &['ὼ'
+  ]), ('Ώ', &['ώ']), ('ῼ', &['ῳ']), ('Ω', &['Ω', 'ω', ]),
+  ('K', &['K', 'k', ]), ('Å', &['Å', 'å', ]), ('Ⅎ', &['ⅎ']),
+  ('ⅎ', &['Ⅎ']), ('Ⅰ', &['ⅰ']), ('Ⅱ', &['ⅱ']), ('Ⅲ', &['ⅲ']),
+  ('Ⅳ', &['ⅳ']), ('Ⅴ', &['ⅴ']), ('Ⅵ', &['ⅵ']), ('Ⅶ', &['ⅶ']),
+  ('Ⅷ', &['ⅷ']), ('Ⅸ', &['ⅸ']), ('Ⅹ', &['ⅹ']), ('Ⅺ', &['ⅺ']),
+  ('Ⅻ', &['ⅻ']), ('Ⅼ', &['ⅼ']), ('Ⅽ', &['ⅽ']), ('Ⅾ', &['ⅾ']),
+  ('Ⅿ', &['ⅿ']), ('ⅰ', &['Ⅰ']), ('ⅱ', &['Ⅱ']), ('ⅲ', &['Ⅲ']),
+  ('ⅳ', &['Ⅳ']), ('ⅴ', &['Ⅴ']), ('ⅵ', &['Ⅵ']), ('ⅶ', &['Ⅶ']),
+  ('ⅷ', &['Ⅷ']), ('ⅸ', &['Ⅸ']), ('ⅹ', &['Ⅹ']), ('ⅺ', &['Ⅺ']),
+  ('ⅻ', &['Ⅻ']), ('ⅼ', &['Ⅼ']), ('ⅽ', &['Ⅽ']), ('ⅾ', &['Ⅾ']),
+  ('ⅿ', &['Ⅿ']), ('Ↄ', &['ↄ']), ('ↄ', &['Ↄ']), ('Ⓐ', &['ⓐ']),
+  ('Ⓑ', &['ⓑ']), ('Ⓒ', &['ⓒ']), ('Ⓓ', &['ⓓ']), ('Ⓔ', &['ⓔ']),
+  ('Ⓕ', &['ⓕ']), ('Ⓖ', &['ⓖ']), ('Ⓗ', &['ⓗ']), ('Ⓘ', &['ⓘ']),
+  ('Ⓙ', &['ⓙ']), ('Ⓚ', &['ⓚ']), ('Ⓛ', &['ⓛ']), ('Ⓜ', &['ⓜ']),
+  ('Ⓝ', &['ⓝ']), ('Ⓞ', &['ⓞ']), ('Ⓟ', &['ⓟ']), ('Ⓠ', &['ⓠ']),
+  ('Ⓡ', &['ⓡ']), ('Ⓢ', &['ⓢ']), ('Ⓣ', &['ⓣ']), ('Ⓤ', &['ⓤ']),
+  ('Ⓥ', &['ⓥ']), ('Ⓦ', &['ⓦ']), ('Ⓧ', &['ⓧ']), ('Ⓨ', &['ⓨ']),
+  ('Ⓩ', &['ⓩ']), ('ⓐ', &['Ⓐ']), ('ⓑ', &['Ⓑ']), ('ⓒ', &['Ⓒ']),
+  ('ⓓ', &['Ⓓ']), ('ⓔ', &['Ⓔ']), ('ⓕ', &['Ⓕ']), ('ⓖ', &['Ⓖ']),
+  ('ⓗ', &['Ⓗ']), ('ⓘ', &['Ⓘ']), ('ⓙ', &['Ⓙ']), ('ⓚ', &['Ⓚ']),
+  ('ⓛ', &['Ⓛ']), ('ⓜ', &['Ⓜ']), ('ⓝ', &['Ⓝ']), ('ⓞ', &['Ⓞ']),
+  ('ⓟ', &['Ⓟ']), ('ⓠ', &['Ⓠ']), ('ⓡ', &['Ⓡ']), ('ⓢ', &['Ⓢ']),
+  ('ⓣ', &['Ⓣ']), ('ⓤ', &['Ⓤ']), ('ⓥ', &['Ⓥ']), ('ⓦ', &['Ⓦ']),
+  ('ⓧ', &['Ⓧ']), ('ⓨ', &['Ⓨ']), ('ⓩ', &['Ⓩ']), ('Ⰰ', &['ⰰ']),
+  ('Ⰱ', &['ⰱ']), ('Ⰲ', &['ⰲ']), ('Ⰳ', &['ⰳ']), ('Ⰴ', &['ⰴ']),
+  ('Ⰵ', &['ⰵ']), ('Ⰶ', &['ⰶ']), ('Ⰷ', &['ⰷ']), ('Ⰸ', &['ⰸ']),
+  ('Ⰹ', &['ⰹ']), ('Ⰺ', &['ⰺ']), ('Ⰻ', &['ⰻ']), ('Ⰼ', &['ⰼ']),
+  ('Ⰽ', &['ⰽ']), ('Ⰾ', &['ⰾ']), ('Ⰿ', &['ⰿ']), ('Ⱀ', &['ⱀ']),
+  ('Ⱁ', &['ⱁ']), ('Ⱂ', &['ⱂ']), ('Ⱃ', &['ⱃ']), ('Ⱄ', &['ⱄ']),
+  ('Ⱅ', &['ⱅ']), ('Ⱆ', &['ⱆ']), ('Ⱇ', &['ⱇ']), ('Ⱈ', &['ⱈ']),
+  ('Ⱉ', &['ⱉ']), ('Ⱊ', &['ⱊ']), ('Ⱋ', &['ⱋ']), ('Ⱌ', &['ⱌ']),
+  ('Ⱍ', &['ⱍ']), ('Ⱎ', &['ⱎ']), ('Ⱏ', &['ⱏ']), ('Ⱐ', &['ⱐ']),
+  ('Ⱑ', &['ⱑ']), ('Ⱒ', &['ⱒ']), ('Ⱓ', &['ⱓ']), ('Ⱔ', &['ⱔ']),
+  ('Ⱕ', &['ⱕ']), ('Ⱖ', &['ⱖ']), ('Ⱗ', &['ⱗ']), ('Ⱘ', &['ⱘ']),
+  ('Ⱙ', &['ⱙ']), ('Ⱚ', &['ⱚ']), ('Ⱛ', &['ⱛ']), ('Ⱜ', &['ⱜ']),
+  ('Ⱝ', &['ⱝ']), ('Ⱞ', &['ⱞ']), ('ⰰ', &['Ⰰ']), ('ⰱ', &['Ⰱ']),
+  ('ⰲ', &['Ⰲ']), ('ⰳ', &['Ⰳ']), ('ⰴ', &['Ⰴ']), ('ⰵ', &['Ⰵ']),
+  ('ⰶ', &['Ⰶ']), ('ⰷ', &['Ⰷ']), ('ⰸ', &['Ⰸ']), ('ⰹ', &['Ⰹ']),
+  ('ⰺ', &['Ⰺ']), ('ⰻ', &['Ⰻ']), ('ⰼ', &['Ⰼ']), ('ⰽ', &['Ⰽ']),
+  ('ⰾ', &['Ⰾ']), ('ⰿ', &['Ⰿ']), ('ⱀ', &['Ⱀ']), ('ⱁ', &['Ⱁ']),
+  ('ⱂ', &['Ⱂ']), ('ⱃ', &['Ⱃ']), ('ⱄ', &['Ⱄ']), ('ⱅ', &['Ⱅ']),
+  ('ⱆ', &['Ⱆ']), ('ⱇ', &['Ⱇ']), ('ⱈ', &['Ⱈ']), ('ⱉ', &['Ⱉ']),
+  ('ⱊ', &['Ⱊ']), ('ⱋ', &['Ⱋ']), ('ⱌ', &['Ⱌ']), ('ⱍ', &['Ⱍ']),
+  ('ⱎ', &['Ⱎ']), ('ⱏ', &['Ⱏ']), ('ⱐ', &['Ⱐ']), ('ⱑ', &['Ⱑ']),
+  ('ⱒ', &['Ⱒ']), ('ⱓ', &['Ⱓ']), ('ⱔ', &['Ⱔ']), ('ⱕ', &['Ⱕ']),
+  ('ⱖ', &['Ⱖ']), ('ⱗ', &['Ⱗ']), ('ⱘ', &['Ⱘ']), ('ⱙ', &['Ⱙ']),
+  ('ⱚ', &['Ⱚ']), ('ⱛ', &['Ⱛ']), ('ⱜ', &['Ⱜ']), ('ⱝ', &['Ⱝ']),
+  ('ⱞ', &['Ⱞ']), ('Ⱡ', &['ⱡ']), ('ⱡ', &['Ⱡ']), ('Ɫ', &['ɫ']),
+  ('Ᵽ', &['ᵽ']), ('Ɽ', &['ɽ']), ('ⱥ', &['Ⱥ']), ('ⱦ', &['Ⱦ']),
+  ('Ⱨ', &['ⱨ']), ('ⱨ', &['Ⱨ']), ('Ⱪ', &['ⱪ']), ('ⱪ', &['Ⱪ']),
+  ('Ⱬ', &['ⱬ']), ('ⱬ', &['Ⱬ']), ('Ɑ', &['ɑ']), ('Ɱ', &['ɱ']),
+  ('Ɐ', &['ɐ']), ('Ɒ', &['ɒ']), ('Ⱳ', &['ⱳ']), ('ⱳ', &['Ⱳ']),
+  ('Ⱶ', &['ⱶ']), ('ⱶ', &['Ⱶ']), ('Ȿ', &['ȿ']), ('Ɀ', &['ɀ']),
+  ('Ⲁ', &['ⲁ']), ('ⲁ', &['Ⲁ']), ('Ⲃ', &['ⲃ']), ('ⲃ', &['Ⲃ']),
+  ('Ⲅ', &['ⲅ']), ('ⲅ', &['Ⲅ']), ('Ⲇ', &['ⲇ']), ('ⲇ', &['Ⲇ']),
+  ('Ⲉ', &['ⲉ']), ('ⲉ', &['Ⲉ']), ('Ⲋ', &['ⲋ']), ('ⲋ', &['Ⲋ']),
+  ('Ⲍ', &['ⲍ']), ('ⲍ', &['Ⲍ']), ('Ⲏ', &['ⲏ']), ('ⲏ', &['Ⲏ']),
+  ('Ⲑ', &['ⲑ']), ('ⲑ', &['Ⲑ']), ('Ⲓ', &['ⲓ']), ('ⲓ', &['Ⲓ']),
+  ('Ⲕ', &['ⲕ']), ('ⲕ', &['Ⲕ']), ('Ⲗ', &['ⲗ']), ('ⲗ', &['Ⲗ']),
+  ('Ⲙ', &['ⲙ']), ('ⲙ', &['Ⲙ']), ('Ⲛ', &['ⲛ']), ('ⲛ', &['Ⲛ']),
+  ('Ⲝ', &['ⲝ']), ('ⲝ', &['Ⲝ']), ('Ⲟ', &['ⲟ']), ('ⲟ', &['Ⲟ']),
+  ('Ⲡ', &['ⲡ']), ('ⲡ', &['Ⲡ']), ('Ⲣ', &['ⲣ']), ('ⲣ', &['Ⲣ']),
+  ('Ⲥ', &['ⲥ']), ('ⲥ', &['Ⲥ']), ('Ⲧ', &['ⲧ']), ('ⲧ', &['Ⲧ']),
+  ('Ⲩ', &['ⲩ']), ('ⲩ', &['Ⲩ']), ('Ⲫ', &['ⲫ']), ('ⲫ', &['Ⲫ']),
+  ('Ⲭ', &['ⲭ']), ('ⲭ', &['Ⲭ']), ('Ⲯ', &['ⲯ']), ('ⲯ', &['Ⲯ']),
+  ('Ⲱ', &['ⲱ']), ('ⲱ', &['Ⲱ']), ('Ⲳ', &['ⲳ']), ('ⲳ', &['Ⲳ']),
+  ('Ⲵ', &['ⲵ']), ('ⲵ', &['Ⲵ']), ('Ⲷ', &['ⲷ']), ('ⲷ', &['Ⲷ']),
+  ('Ⲹ', &['ⲹ']), ('ⲹ', &['Ⲹ']), ('Ⲻ', &['ⲻ']), ('ⲻ', &['Ⲻ']),
+  ('Ⲽ', &['ⲽ']), ('ⲽ', &['Ⲽ']), ('Ⲿ', &['ⲿ']), ('ⲿ', &['Ⲿ']),
+  ('Ⳁ', &['ⳁ']), ('ⳁ', &['Ⳁ']), ('Ⳃ', &['ⳃ']), ('ⳃ', &['Ⳃ']),
+  ('Ⳅ', &['ⳅ']), ('ⳅ', &['Ⳅ']), ('Ⳇ', &['ⳇ']), ('ⳇ', &['Ⳇ']),
+  ('Ⳉ', &['ⳉ']), ('ⳉ', &['Ⳉ']), ('Ⳋ', &['ⳋ']), ('ⳋ', &['Ⳋ']),
+  ('Ⳍ', &['ⳍ']), ('ⳍ', &['Ⳍ']), ('Ⳏ', &['ⳏ']), ('ⳏ', &['Ⳏ']),
+  ('Ⳑ', &['ⳑ']), ('ⳑ', &['Ⳑ']), ('Ⳓ', &['ⳓ']), ('ⳓ', &['Ⳓ']),
+  ('Ⳕ', &['ⳕ']), ('ⳕ', &['Ⳕ']), ('Ⳗ', &['ⳗ']), ('ⳗ', &['Ⳗ']),
+  ('Ⳙ', &['ⳙ']), ('ⳙ', &['Ⳙ']), ('Ⳛ', &['ⳛ']), ('ⳛ', &['Ⳛ']),
+  ('Ⳝ', &['ⳝ']), ('ⳝ', &['Ⳝ']), ('Ⳟ', &['ⳟ']), ('ⳟ', &['Ⳟ']),
+  ('Ⳡ', &['ⳡ']), ('ⳡ', &['Ⳡ']), ('Ⳣ', &['ⳣ']), ('ⳣ', &['Ⳣ']),
+  ('Ⳬ', &['ⳬ']), ('ⳬ', &['Ⳬ']), ('Ⳮ', &['ⳮ']), ('ⳮ', &['Ⳮ']),
+  ('Ⳳ', &['ⳳ']), ('ⳳ', &['Ⳳ']), ('ⴀ', &['Ⴀ']), ('ⴁ', &['Ⴁ']),
+  ('ⴂ', &['Ⴂ']), ('ⴃ', &['Ⴃ']), ('ⴄ', &['Ⴄ']), ('ⴅ', &['Ⴅ']),
+  ('ⴆ', &['Ⴆ']), ('ⴇ', &['Ⴇ']), ('ⴈ', &['Ⴈ']), ('ⴉ', &['Ⴉ']),
+  ('ⴊ', &['Ⴊ']), ('ⴋ', &['Ⴋ']), ('ⴌ', &['Ⴌ']), ('ⴍ', &['Ⴍ']),
+  ('ⴎ', &['Ⴎ']), ('ⴏ', &['Ⴏ']), ('ⴐ', &['Ⴐ']), ('ⴑ', &['Ⴑ']),
+  ('ⴒ', &['Ⴒ']), ('ⴓ', &['Ⴓ']), ('ⴔ', &['Ⴔ']), ('ⴕ', &['Ⴕ']),
+  ('ⴖ', &['Ⴖ']), ('ⴗ', &['Ⴗ']), ('ⴘ', &['Ⴘ']), ('ⴙ', &['Ⴙ']),
+  ('ⴚ', &['Ⴚ']), ('ⴛ', &['Ⴛ']), ('ⴜ', &['Ⴜ']), ('ⴝ', &['Ⴝ']),
+  ('ⴞ', &['Ⴞ']), ('ⴟ', &['Ⴟ']), ('ⴠ', &['Ⴠ']), ('ⴡ', &['Ⴡ']),
+  ('ⴢ', &['Ⴢ']), ('ⴣ', &['Ⴣ']), ('ⴤ', &['Ⴤ']), ('ⴥ', &['Ⴥ']),
+  ('ⴧ', &['Ⴧ']), ('ⴭ', &['Ⴭ']), ('Ꙁ', &['ꙁ']), ('ꙁ', &['Ꙁ']),
+  ('Ꙃ', &['ꙃ']), ('ꙃ', &['Ꙃ']), ('Ꙅ', &['ꙅ']), ('ꙅ', &['Ꙅ']),
+  ('Ꙇ', &['ꙇ']), ('ꙇ', &['Ꙇ']), ('Ꙉ', &['ꙉ']), ('ꙉ', &['Ꙉ']),
+  ('Ꙋ', &['ᲈ', 'ꙋ', ]), ('ꙋ', &['ᲈ', 'Ꙋ', ]), ('Ꙍ', &['ꙍ']),
+  ('ꙍ', &['Ꙍ']), ('Ꙏ', &['ꙏ']), ('ꙏ', &['Ꙏ']), ('Ꙑ', &['ꙑ']),
+  ('ꙑ', &['Ꙑ']), ('Ꙓ', &['ꙓ']), ('ꙓ', &['Ꙓ']), ('Ꙕ', &['ꙕ']),
+  ('ꙕ', &['Ꙕ']), ('Ꙗ', &['ꙗ']), ('ꙗ', &['Ꙗ']), ('Ꙙ', &['ꙙ']),
+  ('ꙙ', &['Ꙙ']), ('Ꙛ', &['ꙛ']), ('ꙛ', &['Ꙛ']), ('Ꙝ', &['ꙝ']),
+  ('ꙝ', &['Ꙝ']), ('Ꙟ', &['ꙟ']), ('ꙟ', &['Ꙟ']), ('Ꙡ', &['ꙡ']),
+  ('ꙡ', &['Ꙡ']), ('Ꙣ', &['ꙣ']), ('ꙣ', &['Ꙣ']), ('Ꙥ', &['ꙥ']),
+  ('ꙥ', &['Ꙥ']), ('Ꙧ', &['ꙧ']), ('ꙧ', &['Ꙧ']), ('Ꙩ', &['ꙩ']),
+  ('ꙩ', &['Ꙩ']), ('Ꙫ', &['ꙫ']), ('ꙫ', &['Ꙫ']), ('Ꙭ', &['ꙭ']),
+  ('ꙭ', &['Ꙭ']), ('Ꚁ', &['ꚁ']), ('ꚁ', &['Ꚁ']), ('Ꚃ', &['ꚃ']),
+  ('ꚃ', &['Ꚃ']), ('Ꚅ', &['ꚅ']), ('ꚅ', &['Ꚅ']), ('Ꚇ', &['ꚇ']),
+  ('ꚇ', &['Ꚇ']), ('Ꚉ', &['ꚉ']), ('ꚉ', &['Ꚉ']), ('Ꚋ', &['ꚋ']),
+  ('ꚋ', &['Ꚋ']), ('Ꚍ', &['ꚍ']), ('ꚍ', &['Ꚍ']), ('Ꚏ', &['ꚏ']),
+  ('ꚏ', &['Ꚏ']), ('Ꚑ', &['ꚑ']), ('ꚑ', &['Ꚑ']), ('Ꚓ', &['ꚓ']),
+  ('ꚓ', &['Ꚓ']), ('Ꚕ', &['ꚕ']), ('ꚕ', &['Ꚕ']), ('Ꚗ', &['ꚗ']),
+  ('ꚗ', &['Ꚗ']), ('Ꚙ', &['ꚙ']), ('ꚙ', &['Ꚙ']), ('Ꚛ', &['ꚛ']),
+  ('ꚛ', &['Ꚛ']), ('Ꜣ', &['ꜣ']), ('ꜣ', &['Ꜣ']), ('Ꜥ', &['ꜥ']),
+  ('ꜥ', &['Ꜥ']), ('Ꜧ', &['ꜧ']), ('ꜧ', &['Ꜧ']), ('Ꜩ', &['ꜩ']),
+  ('ꜩ', &['Ꜩ']), ('Ꜫ', &['ꜫ']), ('ꜫ', &['Ꜫ']), ('Ꜭ', &['ꜭ']),
+  ('ꜭ', &['Ꜭ']), ('Ꜯ', &['ꜯ']), ('ꜯ', &['Ꜯ']), ('Ꜳ', &['ꜳ']),
+  ('ꜳ', &['Ꜳ']), ('Ꜵ', &['ꜵ']), ('ꜵ', &['Ꜵ']), ('Ꜷ', &['ꜷ']),
+  ('ꜷ', &['Ꜷ']), ('Ꜹ', &['ꜹ']), ('ꜹ', &['Ꜹ']), ('Ꜻ', &['ꜻ']),
+  ('ꜻ', &['Ꜻ']), ('Ꜽ', &['ꜽ']), ('ꜽ', &['Ꜽ']), ('Ꜿ', &['ꜿ']),
+  ('ꜿ', &['Ꜿ']), ('Ꝁ', &['ꝁ']), ('ꝁ', &['Ꝁ']), ('Ꝃ', &['ꝃ']),
+  ('ꝃ', &['Ꝃ']), ('Ꝅ', &['ꝅ']), ('ꝅ', &['Ꝅ']), ('Ꝇ', &['ꝇ']),
+  ('ꝇ', &['Ꝇ']), ('Ꝉ', &['ꝉ']), ('ꝉ', &['Ꝉ']), ('Ꝋ', &['ꝋ']),
+  ('ꝋ', &['Ꝋ']), ('Ꝍ', &['ꝍ']), ('ꝍ', &['Ꝍ']), ('Ꝏ', &['ꝏ']),
+  ('ꝏ', &['Ꝏ']), ('Ꝑ', &['ꝑ']), ('ꝑ', &['Ꝑ']), ('Ꝓ', &['ꝓ']),
+  ('ꝓ', &['Ꝓ']), ('Ꝕ', &['ꝕ']), ('ꝕ', &['Ꝕ']), ('Ꝗ', &['ꝗ']),
+  ('ꝗ', &['Ꝗ']), ('Ꝙ', &['ꝙ']), ('ꝙ', &['Ꝙ']), ('Ꝛ', &['ꝛ']),
+  ('ꝛ', &['Ꝛ']), ('Ꝝ', &['ꝝ']), ('ꝝ', &['Ꝝ']), ('Ꝟ', &['ꝟ']),
+  ('ꝟ', &['Ꝟ']), ('Ꝡ', &['ꝡ']), ('ꝡ', &['Ꝡ']), ('Ꝣ', &['ꝣ']),
+  ('ꝣ', &['Ꝣ']), ('Ꝥ', &['ꝥ']), ('ꝥ', &['Ꝥ']), ('Ꝧ', &['ꝧ']),
+  ('ꝧ', &['Ꝧ']), ('Ꝩ', &['ꝩ']), ('ꝩ', &['Ꝩ']), ('Ꝫ', &['ꝫ']),
+  ('ꝫ', &['Ꝫ']), ('Ꝭ', &['ꝭ']), ('ꝭ', &['Ꝭ']), ('Ꝯ', &['ꝯ']),
+  ('ꝯ', &['Ꝯ']), ('Ꝺ', &['ꝺ']), ('ꝺ', &['Ꝺ']), ('Ꝼ', &['ꝼ']),
+  ('ꝼ', &['Ꝼ']), ('Ᵹ', &['ᵹ']), ('Ꝿ', &['ꝿ']), ('ꝿ', &['Ꝿ']),
+  ('Ꞁ', &['ꞁ']), ('ꞁ', &['Ꞁ']), ('Ꞃ', &['ꞃ']), ('ꞃ', &['Ꞃ']),
+  ('Ꞅ', &['ꞅ']), ('ꞅ', &['Ꞅ']), ('Ꞇ', &['ꞇ']), ('ꞇ', &['Ꞇ']),
+  ('Ꞌ', &['ꞌ']), ('ꞌ', &['Ꞌ']), ('Ɥ', &['ɥ']), ('Ꞑ', &['ꞑ']),
+  ('ꞑ', &['Ꞑ']), ('Ꞓ', &['ꞓ']), ('ꞓ', &['Ꞓ']), ('Ꞗ', &['ꞗ']),
+  ('ꞗ', &['Ꞗ']), ('Ꞙ', &['ꞙ']), ('ꞙ', &['Ꞙ']), ('Ꞛ', &['ꞛ']),
+  ('ꞛ', &['Ꞛ']), ('Ꞝ', &['ꞝ']), ('ꞝ', &['Ꞝ']), ('Ꞟ', &['ꞟ']),
+  ('ꞟ', &['Ꞟ']), ('Ꞡ', &['ꞡ']), ('ꞡ', &['Ꞡ']), ('Ꞣ', &['ꞣ']),
+  ('ꞣ', &['Ꞣ']), ('Ꞥ', &['ꞥ']), ('ꞥ', &['Ꞥ']), ('Ꞧ', &['ꞧ']),
+  ('ꞧ', &['Ꞧ']), ('Ꞩ', &['ꞩ']), ('ꞩ', &['Ꞩ']), ('Ɦ', &['ɦ']),
+  ('Ɜ', &['ɜ']), ('Ɡ', &['ɡ']), ('Ɬ', &['ɬ']), ('Ɪ', &['ɪ']),
+  ('Ʞ', &['ʞ']), ('Ʇ', &['ʇ']), ('Ʝ', &['ʝ']), ('Ꭓ', &['ꭓ']),
+  ('Ꞵ', &['ꞵ']), ('ꞵ', &['Ꞵ']), ('Ꞷ', &['ꞷ']), ('ꞷ', &['Ꞷ']),
+  ('ꭓ', &['Ꭓ']), ('ꭰ', &['Ꭰ']), ('ꭱ', &['Ꭱ']), ('ꭲ', &['Ꭲ']),
+  ('ꭳ', &['Ꭳ']), ('ꭴ', &['Ꭴ']), ('ꭵ', &['Ꭵ']), ('ꭶ', &['Ꭶ']),
+  ('ꭷ', &['Ꭷ']), ('ꭸ', &['Ꭸ']), ('ꭹ', &['Ꭹ']), ('ꭺ', &['Ꭺ']),
+  ('ꭻ', &['Ꭻ']), ('ꭼ', &['Ꭼ']), ('ꭽ', &['Ꭽ']), ('ꭾ', &['Ꭾ']),
+  ('ꭿ', &['Ꭿ']), ('ꮀ', &['Ꮀ']), ('ꮁ', &['Ꮁ']), ('ꮂ', &['Ꮂ']),
+  ('ꮃ', &['Ꮃ']), ('ꮄ', &['Ꮄ']), ('ꮅ', &['Ꮅ']), ('ꮆ', &['Ꮆ']),
+  ('ꮇ', &['Ꮇ']), ('ꮈ', &['Ꮈ']), ('ꮉ', &['Ꮉ']), ('ꮊ', &['Ꮊ']),
+  ('ꮋ', &['Ꮋ']), ('ꮌ', &['Ꮌ']), ('ꮍ', &['Ꮍ']), ('ꮎ', &['Ꮎ']),
+  ('ꮏ', &['Ꮏ']), ('ꮐ', &['Ꮐ']), ('ꮑ', &['Ꮑ']), ('ꮒ', &['Ꮒ']),
+  ('ꮓ', &['Ꮓ']), ('ꮔ', &['Ꮔ']), ('ꮕ', &['Ꮕ']), ('ꮖ', &['Ꮖ']),
+  ('ꮗ', &['Ꮗ']), ('ꮘ', &['Ꮘ']), ('ꮙ', &['Ꮙ']), ('ꮚ', &['Ꮚ']),
+  ('ꮛ', &['Ꮛ']), ('ꮜ', &['Ꮜ']), ('ꮝ', &['Ꮝ']), ('ꮞ', &['Ꮞ']),
+  ('ꮟ', &['Ꮟ']), ('ꮠ', &['Ꮠ']), ('ꮡ', &['Ꮡ']), ('ꮢ', &['Ꮢ']),
+  ('ꮣ', &['Ꮣ']), ('ꮤ', &['Ꮤ']), ('ꮥ', &['Ꮥ']), ('ꮦ', &['Ꮦ']),
+  ('ꮧ', &['Ꮧ']), ('ꮨ', &['Ꮨ']), ('ꮩ', &['Ꮩ']), ('ꮪ', &['Ꮪ']),
+  ('ꮫ', &['Ꮫ']), ('ꮬ', &['Ꮬ']), ('ꮭ', &['Ꮭ']), ('ꮮ', &['Ꮮ']),
+  ('ꮯ', &['Ꮯ']), ('ꮰ', &['Ꮰ']), ('ꮱ', &['Ꮱ']), ('ꮲ', &['Ꮲ']),
+  ('ꮳ', &['Ꮳ']), ('ꮴ', &['Ꮴ']), ('ꮵ', &['Ꮵ']), ('ꮶ', &['Ꮶ']),
+  ('ꮷ', &['Ꮷ']), ('ꮸ', &['Ꮸ']), ('ꮹ', &['Ꮹ']), ('ꮺ', &['Ꮺ']),
+  ('ꮻ', &['Ꮻ']), ('ꮼ', &['Ꮼ']), ('ꮽ', &['Ꮽ']), ('ꮾ', &['Ꮾ']),
+  ('ꮿ', &['Ꮿ']), ('Ａ', &['ａ']), ('Ｂ', &['ｂ']), ('Ｃ', &['ｃ']),
+  ('Ｄ', &['ｄ']), ('Ｅ', &['ｅ']), ('Ｆ', &['ｆ']), ('Ｇ', &['ｇ']),
+  ('Ｈ', &['ｈ']), ('Ｉ', &['ｉ']), ('Ｊ', &['ｊ']), ('Ｋ', &['ｋ']),
+  ('Ｌ', &['ｌ']), ('Ｍ', &['ｍ']), ('Ｎ', &['ｎ']), ('Ｏ', &['ｏ']),
+  ('Ｐ', &['ｐ']), ('Ｑ', &['ｑ']), ('Ｒ', &['ｒ']), ('Ｓ', &['ｓ']),
+  ('Ｔ', &['ｔ']), ('Ｕ', &['ｕ']), ('Ｖ', &['ｖ']), ('Ｗ', &['ｗ']),
+  ('Ｘ', &['ｘ']), ('Ｙ', &['ｙ']), ('Ｚ', &['ｚ']), ('ａ', &['Ａ']),
+  ('ｂ', &['Ｂ']), ('ｃ', &['Ｃ']), ('ｄ', &['Ｄ']), ('ｅ', &['Ｅ']),
+  ('ｆ', &['Ｆ']), ('ｇ', &['Ｇ']), ('ｈ', &['Ｈ']), ('ｉ', &['Ｉ']),
+  ('ｊ', &['Ｊ']), ('ｋ', &['Ｋ']), ('ｌ', &['Ｌ']), ('ｍ', &['Ｍ']),
+  ('ｎ', &['Ｎ']), ('ｏ', &['Ｏ']), ('ｐ', &['Ｐ']), ('ｑ', &['Ｑ']),
+  ('ｒ', &['Ｒ']), ('ｓ', &['Ｓ']), ('ｔ', &['Ｔ']), ('ｕ', &['Ｕ']),
+  ('ｖ', &['Ｖ']), ('ｗ', &['Ｗ']), ('ｘ', &['Ｘ']), ('ｙ', &['Ｙ']),
+  ('ｚ', &['Ｚ']), ('𐐀', &['𐐨']), ('𐐁', &['𐐩']), ('𐐂', &[
+  '𐐪']), ('𐐃', &['𐐫']), ('𐐄', &['𐐬']), ('𐐅', &['𐐭']),
+  ('𐐆', &['𐐮']), ('𐐇', &['𐐯']), ('𐐈', &['𐐰']), ('𐐉', &[
+  '𐐱']), ('𐐊', &['𐐲']), ('𐐋', &['𐐳']), ('𐐌', &['𐐴']),
+  ('𐐍', &['𐐵']), ('𐐎', &['𐐶']), ('𐐏', &['𐐷']), ('𐐐', &[
+  '𐐸']), ('𐐑', &['𐐹']), ('𐐒', &['𐐺']), ('𐐓', &['𐐻']),
+  ('𐐔', &['𐐼']), ('𐐕', &['𐐽']), ('𐐖', &['𐐾']), ('𐐗', &[
+  '𐐿']), ('𐐘', &['𐑀']), ('𐐙', &['𐑁']), ('𐐚', &['𐑂']),
+  ('𐐛', &['𐑃']), ('𐐜', &['𐑄']), ('𐐝', &['𐑅']), ('𐐞', &[
+  '𐑆']), ('𐐟', &['𐑇']), ('𐐠', &['𐑈']), ('𐐡', &['𐑉']),
+  ('𐐢', &['𐑊']), ('𐐣', &['𐑋']), ('𐐤', &['𐑌']), ('𐐥', &[
+  '𐑍']), ('𐐦', &['𐑎']), ('𐐧', &['𐑏']), ('𐐨', &['𐐀']),
+  ('𐐩', &['𐐁']), ('𐐪', &['𐐂']), ('𐐫', &['𐐃']), ('𐐬', &[
+  '𐐄']), ('𐐭', &['𐐅']), ('𐐮', &['𐐆']), ('𐐯', &['𐐇']),
+  ('𐐰', &['𐐈']), ('𐐱', &['𐐉']), ('𐐲', &['𐐊']), ('𐐳', &[
+  '𐐋']), ('𐐴', &['𐐌']), ('𐐵', &['𐐍']), ('𐐶', &['𐐎']),
+  ('𐐷', &['𐐏']), ('𐐸', &['𐐐']), ('𐐹', &['𐐑']), ('𐐺', &[
+  '𐐒']), ('𐐻', &['𐐓']), ('𐐼', &['𐐔']), ('𐐽', &['𐐕']),
+  ('𐐾', &['𐐖']), ('𐐿', &['𐐗']), ('𐑀', &['𐐘']), ('𐑁', &[
+  '𐐙']), ('𐑂', &['𐐚']), ('𐑃', &['𐐛']), ('𐑄', &['𐐜']),
+  ('𐑅', &['𐐝']), ('𐑆', &['𐐞']), ('𐑇', &['𐐟']), ('𐑈', &[
+  '𐐠']), ('𐑉', &['𐐡']), ('𐑊', &['𐐢']), ('𐑋', &['𐐣']),
+  ('𐑌', &['𐐤']), ('𐑍', &['𐐥']), ('𐑎', &['𐐦']), ('𐑏', &[
+  '𐐧']), ('𐒰', &['𐓘']), ('𐒱', &['𐓙']), ('𐒲', &['𐓚']),
+  ('𐒳', &['𐓛']), ('𐒴', &['𐓜']), ('𐒵', &['𐓝']), ('𐒶', &[
+  '𐓞']), ('𐒷', &['𐓟']), ('𐒸', &['𐓠']), ('𐒹', &['𐓡']),
+  ('𐒺', &['𐓢']), ('𐒻', &['𐓣']), ('𐒼', &['𐓤']), ('𐒽', &[
+  '𐓥']), ('𐒾', &['𐓦']), ('𐒿', &['𐓧']), ('𐓀', &['𐓨']),
+  ('𐓁', &['𐓩']), ('𐓂', &['𐓪']), ('𐓃', &['𐓫']), ('𐓄', &[
+  '𐓬']), ('𐓅', &['𐓭']), ('𐓆', &['𐓮']), ('𐓇', &['𐓯']),
+  ('𐓈', &['𐓰']), ('𐓉', &['𐓱']), ('𐓊', &['𐓲']), ('𐓋', &[
+  '𐓳']), ('𐓌', &['𐓴']), ('𐓍', &['𐓵']), ('𐓎', &['𐓶']),
+  ('𐓏', &['𐓷']), ('𐓐', &['𐓸']), ('𐓑', &['𐓹']), ('𐓒', &[
+  '𐓺']), ('𐓓', &['𐓻']), ('𐓘', &['𐒰']), ('𐓙', &['𐒱']),
+  ('𐓚', &['𐒲']), ('𐓛', &['𐒳']), ('𐓜', &['𐒴']), ('𐓝', &[
+  '𐒵']), ('𐓞', &['𐒶']), ('𐓟', &['𐒷']), ('𐓠', &['𐒸']),
+  ('𐓡', &['𐒹']), ('𐓢', &['𐒺']), ('𐓣', &['𐒻']), ('𐓤', &[
+  '𐒼']), ('𐓥', &['𐒽']), ('𐓦', &['𐒾']), ('𐓧', &['𐒿']),
+  ('𐓨', &['𐓀']), ('𐓩', &['𐓁']), ('𐓪', &['𐓂']), ('𐓫', &[
+  '𐓃']), ('𐓬', &['𐓄']), ('𐓭', &['𐓅']), ('𐓮', &['𐓆']),
+  ('𐓯', &['𐓇']), ('𐓰', &['𐓈']), ('𐓱', &['𐓉']), ('𐓲', &[
+  '𐓊']), ('𐓳', &['𐓋']), ('𐓴', &['𐓌']), ('𐓵', &['𐓍']),
+  ('𐓶', &['𐓎']), ('𐓷', &['𐓏']), ('𐓸', &['𐓐']), ('𐓹', &[
+  '𐓑']), ('𐓺', &['𐓒']), ('𐓻', &['𐓓']), ('𐲀', &['𐳀']),
+  ('𐲁', &['𐳁']), ('𐲂', &['𐳂']), ('𐲃', &['𐳃']), ('𐲄', &[
+  '𐳄']), ('𐲅', &['𐳅']), ('𐲆', &['𐳆']), ('𐲇', &['𐳇']),
+  ('𐲈', &['𐳈']), ('𐲉', &['𐳉']), ('𐲊', &['𐳊']), ('𐲋', &[
+  '𐳋']), ('𐲌', &['𐳌']), ('𐲍', &['𐳍']), ('𐲎', &['𐳎']),
+  ('𐲏', &['𐳏']), ('𐲐', &['𐳐']), ('𐲑', &['𐳑']), ('𐲒', &[
+  '𐳒']), ('𐲓', &['𐳓']), ('𐲔', &['𐳔']), ('𐲕', &['𐳕']),
+  ('𐲖', &['𐳖']), ('𐲗', &['𐳗']), ('𐲘', &['𐳘']), ('𐲙', &[
+  '𐳙']), ('𐲚', &['𐳚']), ('𐲛', &['𐳛']), ('𐲜', &['𐳜']),
+  ('𐲝', &['𐳝']), ('𐲞', &['𐳞']), ('𐲟', &['𐳟']), ('𐲠', &[
+  '𐳠']), ('𐲡', &['𐳡']), ('𐲢', &['𐳢']), ('𐲣', &['𐳣']),
+  ('𐲤', &['𐳤']), ('𐲥', &['𐳥']), ('𐲦', &['𐳦']), ('𐲧', &[
+  '𐳧']), ('𐲨', &['𐳨']), ('𐲩', &['𐳩']), ('𐲪', &['𐳪']),
+  ('𐲫', &['𐳫']), ('𐲬', &['𐳬']), ('𐲭', &['𐳭']), ('𐲮', &[
+  '𐳮']), ('𐲯', &['𐳯']), ('𐲰', &['𐳰']), ('𐲱', &['𐳱']),
+  ('𐲲', &['𐳲']), ('𐳀', &['𐲀']), ('𐳁', &['𐲁']), ('𐳂', &[
+  '𐲂']), ('𐳃', &['𐲃']), ('𐳄', &['𐲄']), ('𐳅', &['𐲅']),
+  ('𐳆', &['𐲆']), ('𐳇', &['𐲇']), ('𐳈', &['𐲈']), ('𐳉', &[
+  '𐲉']), ('𐳊', &['𐲊']), ('𐳋', &['𐲋']), ('𐳌', &['𐲌']),
+  ('𐳍', &['𐲍']), ('𐳎', &['𐲎']), ('𐳏', &['𐲏']), ('𐳐', &[
+  '𐲐']), ('𐳑', &['𐲑']), ('𐳒', &['𐲒']), ('𐳓', &['𐲓']),
+  ('𐳔', &['𐲔']), ('𐳕', &['𐲕']), ('𐳖', &['𐲖']), ('𐳗', &[
+  '𐲗']), ('𐳘', &['𐲘']), ('𐳙', &['𐲙']), ('𐳚', &['𐲚']),
+  ('𐳛', &['𐲛']), ('𐳜', &['𐲜']), ('𐳝', &['𐲝']), ('𐳞', &[
+  '𐲞']), ('𐳟', &['𐲟']), ('𐳠', &['𐲠']), ('𐳡', &['𐲡']),
+  ('𐳢', &['𐲢']), ('𐳣', &['𐲣']), ('𐳤', &['𐲤']), ('𐳥', &[
+  '𐲥']), ('𐳦', &['𐲦']), ('𐳧', &['𐲧']), ('𐳨', &['𐲨']),
+  ('𐳩', &['𐲩']), ('𐳪', &['𐲪']), ('𐳫', &['𐲫']), ('𐳬', &[
+  '𐲬']), ('𐳭', &['𐲭']), ('𐳮', &['𐲮']), ('𐳯', &['𐲯']),
+  ('𐳰', &['𐲰']), ('𐳱', &['𐲱']), ('𐳲', &['𐲲']), ('𑢠', &[
+  '𑣀']), ('𑢡', &['𑣁']), ('𑢢', &['𑣂']), ('𑢣', &['𑣃']),
+  ('𑢤', &['𑣄']), ('𑢥', &['𑣅']), ('𑢦', &['𑣆']), ('𑢧', &[
+  '𑣇']), ('𑢨', &['𑣈']), ('𑢩', &['𑣉']), ('𑢪', &['𑣊']),
+  ('𑢫', &['𑣋']), ('𑢬', &['𑣌']), ('𑢭', &['𑣍']), ('𑢮', &[
+  '𑣎']), ('𑢯', &['𑣏']), ('𑢰', &['𑣐']), ('𑢱', &['𑣑']),
+  ('𑢲', &['𑣒']), ('𑢳', &['𑣓']), ('𑢴', &['𑣔']), ('𑢵', &[
+  '𑣕']), ('𑢶', &['𑣖']), ('𑢷', &['𑣗']), ('𑢸', &['𑣘']),
+  ('𑢹', &['𑣙']), ('𑢺', &['𑣚']), ('𑢻', &['𑣛']), ('𑢼', &[
+  '𑣜']), ('𑢽', &['𑣝']), ('𑢾', &['𑣞']), ('𑢿', &['𑣟']),
+  ('𑣀', &['𑢠']), ('𑣁', &['𑢡']), ('𑣂', &['𑢢']), ('𑣃', &[
+  '𑢣']), ('𑣄', &['𑢤']), ('𑣅', &['𑢥']), ('𑣆', &['𑢦']),
+  ('𑣇', &['𑢧']), ('𑣈', &['𑢨']), ('𑣉', &['𑢩']), ('𑣊', &[
+  '𑢪']), ('𑣋', &['𑢫']), ('𑣌', &['𑢬']), ('𑣍', &['𑢭']),
+  ('𑣎', &['𑢮']), ('𑣏', &['𑢯']), ('𑣐', &['𑢰']), ('𑣑', &[
+  '𑢱']), ('𑣒', &['𑢲']), ('𑣓', &['𑢳']), ('𑣔', &['𑢴']),
+  ('𑣕', &['𑢵']), ('𑣖', &['𑢶']), ('𑣗', &['𑢷']), ('𑣘', &[
+  '𑢸']), ('𑣙', &['𑢹']), ('𑣚', &['𑢺']), ('𑣛', &['𑢻']),
+  ('𑣜', &['𑢼']), ('𑣝', &['𑢽']), ('𑣞', &['𑢾']), ('𑣟', &[
+  '𑢿']), ('𞤀', &['𞤢']), ('𞤁', &['𞤣']), ('𞤂', &['𞤤']),
+  ('𞤃', &['𞤥']), ('𞤄', &['𞤦']), ('𞤅', &['𞤧']), ('𞤆', &[
+  '𞤨']), ('𞤇', &['𞤩']), ('𞤈', &['𞤪']), ('𞤉', &['𞤫']),
+  ('𞤊', &['𞤬']), ('𞤋', &['𞤭']), ('𞤌', &['𞤮']), ('𞤍', &[
+  '𞤯']), ('𞤎', &['𞤰']), ('𞤏', &['𞤱']), ('𞤐', &['𞤲']),
+  ('𞤑', &['𞤳']), ('𞤒', &['𞤴']), ('𞤓', &['𞤵']), ('𞤔', &[
+  '𞤶']), ('𞤕', &['𞤷']), ('𞤖', &['𞤸']), ('𞤗', &['𞤹']),
+  ('𞤘', &['𞤺']), ('𞤙', &['𞤻']), ('𞤚', &['𞤼']), ('𞤛', &[
+  '𞤽']), ('𞤜', &['𞤾']), ('𞤝', &['𞤿']), ('𞤞', &['𞥀']),
+  ('𞤟', &['𞥁']), ('𞤠', &['𞥂']), ('𞤡', &['𞥃']), ('𞤢', &[
+  '𞤀']), ('𞤣', &['𞤁']), ('𞤤', &['𞤂']), ('𞤥', &['𞤃']),
+  ('𞤦', &['𞤄']), ('𞤧', &['𞤅']), ('𞤨', &['𞤆']), ('𞤩', &[
+  '𞤇']), ('𞤪', &['𞤈']), ('𞤫', &['𞤉']), ('𞤬', &['𞤊']),
+  ('𞤭', &['𞤋']), ('𞤮', &['𞤌']), ('𞤯', &['𞤍']), ('𞤰', &[
+  '𞤎']), ('𞤱', &['𞤏']), ('𞤲', &['𞤐']), ('𞤳', &['𞤑']),
+  ('𞤴', &['𞤒']), ('𞤵', &['𞤓']), ('𞤶', &['𞤔']), ('𞤷', &[
+  '𞤕']), ('𞤸', &['𞤖']), ('𞤹', &['𞤗']), ('𞤺', &['𞤘']),
+  ('𞤻', &['𞤙']), ('𞤼', &['𞤚']), ('𞤽', &['𞤛']), ('𞤾', &[
+  '𞤜']), ('𞤿', &['𞤝']), ('𞥀', &['𞤞']), ('𞥁', &['𞤟']),
+  ('𞥂', &['𞤠']), ('𞥃', &['𞤡']),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/general_category.rs
@@ -0,0 +1,1844 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate general-category tmp/ucd-10.0.0/ --chars --exclude surrogate
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("Cased_Letter", CASED_LETTER), ("Close_Punctuation", CLOSE_PUNCTUATION),
+  ("Connector_Punctuation", CONNECTOR_PUNCTUATION), ("Control", CONTROL),
+  ("Currency_Symbol", CURRENCY_SYMBOL),
+  ("Dash_Punctuation", DASH_PUNCTUATION), ("Decimal_Number", DECIMAL_NUMBER),
+  ("Enclosing_Mark", ENCLOSING_MARK),
+  ("Final_Punctuation", FINAL_PUNCTUATION), ("Format", FORMAT),
+  ("Initial_Punctuation", INITIAL_PUNCTUATION), ("Letter", LETTER),
+  ("Letter_Number", LETTER_NUMBER), ("Line_Separator", LINE_SEPARATOR),
+  ("Lowercase_Letter", LOWERCASE_LETTER), ("Mark", MARK),
+  ("Math_Symbol", MATH_SYMBOL), ("Modifier_Letter", MODIFIER_LETTER),
+  ("Modifier_Symbol", MODIFIER_SYMBOL), ("Nonspacing_Mark", NONSPACING_MARK),
+  ("Number", NUMBER), ("Open_Punctuation", OPEN_PUNCTUATION),
+  ("Other", OTHER), ("Other_Letter", OTHER_LETTER),
+  ("Other_Number", OTHER_NUMBER), ("Other_Punctuation", OTHER_PUNCTUATION),
+  ("Other_Symbol", OTHER_SYMBOL),
+  ("Paragraph_Separator", PARAGRAPH_SEPARATOR), ("Private_Use", PRIVATE_USE),
+  ("Punctuation", PUNCTUATION), ("Separator", SEPARATOR),
+  ("Space_Separator", SPACE_SEPARATOR), ("Spacing_Mark", SPACING_MARK),
+  ("Symbol", SYMBOL), ("Titlecase_Letter", TITLECASE_LETTER),
+  ("Unassigned", UNASSIGNED), ("Uppercase_Letter", UPPERCASE_LETTER),
+];
+
+pub const CASED_LETTER: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('µ', 'µ'), ('À', 'Ö'), ('Ø', 'ö'),
+  ('ø', 'ƺ'), ('Ƽ', 'ƿ'), ('Ǆ', 'ʓ'), ('ʕ', 'ʯ'), ('Ͱ', 'ͳ'),
+  ('Ͷ', 'ͷ'), ('ͻ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ա', 'և'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᴫ'),
+  ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶚ'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'),
+  ('ῐ', 'ΐ'), ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'),
+  ('ῶ', 'ῼ'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℊ', 'ℓ'),
+  ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'),
+  ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℯ', 'ℴ'), ('ℹ', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ↄ', 'ↄ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⱻ'), ('Ȿ', 'ⳤ'),
+  ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'),
+  ('ⴭ', 'ⴭ'), ('Ꙁ', 'ꙭ'), ('Ꚁ', 'ꚛ'), ('Ꜣ', 'ꝯ'),
+  ('ꝱ', 'ꞇ'), ('Ꞌ', 'ꞎ'), ('Ꞑ', 'Ɪ'), ('Ʞ', 'ꞷ'),
+  ('ꟺ', 'ꟺ'), ('ꬰ', 'ꭚ'), ('ꭠ', 'ꭥ'), ('ꭰ', 'ꮿ'),
+  ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'),
+  ('𐐀', '𐑏'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐲀', '𐲲'),
+  ('𐳀', '𐳲'), ('𑢠', '𑣟'), ('𝐀', '𝑔'), ('𝑖', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'),
+  ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'),
+  ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'),
+  ('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'),
+  ('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'),
+  ('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'),
+  ('𞤀', '𞥃'),
+];
+
+pub const CLOSE_PUNCTUATION: &'static [(char, char)] = &[
+  (')', ')'), (']', ']'), ('}', '}'), ('༻', '༻'), ('༽', '༽'),
+  ('᚜', '᚜'), ('⁆', '⁆'), ('⁾', '⁾'), ('₎', '₎'),
+  ('⌉', '⌉'), ('⌋', '⌋'), ('〉', '〉'), ('❩', '❩'),
+  ('❫', '❫'), ('❭', '❭'), ('❯', '❯'), ('❱', '❱'),
+  ('❳', '❳'), ('❵', '❵'), ('⟆', '⟆'), ('⟧', '⟧'),
+  ('⟩', '⟩'), ('⟫', '⟫'), ('⟭', '⟭'), ('⟯', '⟯'),
+  ('⦄', '⦄'), ('⦆', '⦆'), ('⦈', '⦈'), ('⦊', '⦊'),
+  ('⦌', '⦌'), ('⦎', '⦎'), ('⦐', '⦐'), ('⦒', '⦒'),
+  ('⦔', '⦔'), ('⦖', '⦖'), ('⦘', '⦘'), ('⧙', '⧙'),
+  ('⧛', '⧛'), ('⧽', '⧽'), ('⸣', '⸣'), ('⸥', '⸥'),
+  ('⸧', '⸧'), ('⸩', '⸩'), ('〉', '〉'), ('》', '》'),
+  ('」', '」'), ('』', '』'), ('】', '】'), ('〕', '〕'),
+  ('〗', '〗'), ('〙', '〙'), ('〛', '〛'), ('〞', '〟'),
+  ('﴾', '﴾'), ('︘', '︘'), ('︶', '︶'), ('︸', '︸'),
+  ('︺', '︺'), ('︼', '︼'), ('︾', '︾'), ('﹀', '﹀'),
+  ('﹂', '﹂'), ('﹄', '﹄'), ('﹈', '﹈'), ('﹚', '﹚'),
+  ('﹜', '﹜'), ('﹞', '﹞'), ('）', '）'), ('］', '］'),
+  ('｝', '｝'), ('｠', '｠'), ('｣', '｣'),
+];
+
+pub const CONNECTOR_PUNCTUATION: &'static [(char, char)] = &[
+  ('_', '_'), ('‿', '⁀'), ('⁔', '⁔'), ('︳', '︴'), ('﹍', '﹏'),
+  ('＿', '＿'),
+];
+
+pub const CONTROL: &'static [(char, char)] = &[
+  ('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}'),
+];
+
+pub const CURRENCY_SYMBOL: &'static [(char, char)] = &[
+  ('$', '$'), ('¢', '¥'), ('֏', '֏'), ('؋', '؋'), ('৲', '৳'),
+  ('৻', '৻'), ('૱', '૱'), ('௹', '௹'), ('฿', '฿'),
+  ('៛', '៛'), ('₠', '₿'), ('꠸', '꠸'), ('﷼', '﷼'),
+  ('﹩', '﹩'), ('＄', '＄'), ('￠', '￡'), ('￥', '￦'),
+];
+
+pub const DASH_PUNCTUATION: &'static [(char, char)] = &[
+  ('-', '-'), ('֊', '֊'), ('־', '־'), ('᐀', '᐀'), ('᠆', '᠆'),
+  ('‐', '―'), ('⸗', '⸗'), ('⸚', '⸚'), ('⸺', '⸻'),
+  ('⹀', '⹀'), ('〜', '〜'), ('〰', '〰'), ('゠', '゠'),
+  ('︱', '︲'), ('﹘', '﹘'), ('﹣', '﹣'), ('－', '－'),
+];
+
+pub const DECIMAL_NUMBER: &'static [(char, char)] = &[
+  ('0', '9'), ('٠', '٩'), ('۰', '۹'), ('߀', '߉'), ('०', '९'),
+  ('০', '৯'), ('੦', '੯'), ('૦', '૯'), ('୦', '୯'),
+  ('௦', '௯'), ('౦', '౯'), ('೦', '೯'), ('൦', '൯'),
+  ('෦', '෯'), ('๐', '๙'), ('໐', '໙'), ('༠', '༩'),
+  ('၀', '၉'), ('႐', '႙'), ('០', '៩'), ('᠐', '᠙'),
+  ('᥆', '᥏'), ('᧐', '᧙'), ('᪀', '᪉'), ('᪐', '᪙'),
+  ('᭐', '᭙'), ('᮰', '᮹'), ('᱀', '᱉'), ('᱐', '᱙'),
+  ('꘠', '꘩'), ('꣐', '꣙'), ('꤀', '꤉'), ('꧐', '꧙'),
+  ('꧰', '꧹'), ('꩐', '꩙'), ('꯰', '꯹'), ('０', '９'),
+  ('𐒠', '𐒩'), ('𑁦', '𑁯'), ('𑃰', '𑃹'), ('𑄶', '𑄿'),
+  ('𑇐', '𑇙'), ('𑋰', '𑋹'), ('𑑐', '𑑙'), ('𑓐', '𑓙'),
+  ('𑙐', '𑙙'), ('𑛀', '𑛉'), ('𑜰', '𑜹'), ('𑣠', '𑣩'),
+  ('𑱐', '𑱙'), ('𑵐', '𑵙'), ('𖩠', '𖩩'), ('𖭐', '𖭙'),
+  ('𝟎', '𝟿'), ('𞥐', '𞥙'),
+];
+
+pub const ENCLOSING_MARK: &'static [(char, char)] = &[
+  ('҈', '҉'), ('᪾', '᪾'), ('⃝', '⃠'), ('⃢', '⃤'),
+  ('꙰', '꙲'),
+];
+
+pub const FINAL_PUNCTUATION: &'static [(char, char)] = &[
+  ('»', '»'), ('’', '’'), ('”', '”'), ('›', '›'),
+  ('⸃', '⸃'), ('⸅', '⸅'), ('⸊', '⸊'), ('⸍', '⸍'),
+  ('⸝', '⸝'), ('⸡', '⸡'),
+];
+
+pub const FORMAT: &'static [(char, char)] = &[
+  ('\u{ad}', '\u{ad}'), ('\u{600}', '\u{605}'), ('\u{61c}', '\u{61c}'),
+  ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'), ('\u{8e2}', '\u{8e2}'),
+  ('\u{180e}', '\u{180e}'), ('\u{200b}', '\u{200f}'),
+  ('\u{202a}', '\u{202e}'), ('\u{2060}', '\u{2064}'),
+  ('\u{2066}', '\u{206f}'), ('\u{feff}', '\u{feff}'),
+  ('\u{fff9}', '\u{fffb}'), ('\u{110bd}', '\u{110bd}'),
+  ('\u{1bca0}', '\u{1bca3}'), ('\u{1d173}', '\u{1d17a}'),
+  ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const INITIAL_PUNCTUATION: &'static [(char, char)] = &[
+  ('«', '«'), ('‘', '‘'), ('‛', '“'), ('‟', '‟'),
+  ('‹', '‹'), ('⸂', '⸂'), ('⸄', '⸄'), ('⸉', '⸉'),
+  ('⸌', '⸌'), ('⸜', '⸜'), ('⸠', '⸠'),
+];
+
+pub const LETTER: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'),
+  ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('Ͱ', 'ʹ'), ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'),
+  ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'), ('Ա', 'Ֆ'), ('ՙ', 'ՙ'),
+  ('ա', 'և'), ('א', 'ת'), ('װ', 'ײ'), ('ؠ', 'ي'), ('ٮ', 'ٯ'),
+  ('ٱ', 'ۓ'), ('ە', 'ە'), ('ۥ', 'ۦ'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'),
+  ('ۿ', 'ۿ'), ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'),
+  ('ߊ', 'ߪ'), ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'),
+  ('ࠤ', 'ࠤ'), ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'),
+  ('ॐ', 'ॐ'), ('क़', 'ॡ'), ('ॱ', 'ঀ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৡ'), ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('ੲ', 'ੴ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'),
+  ('ୱ', 'ୱ'), ('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'),
+  ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'),
+  ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'),
+  ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'),
+  ('ವ', 'ಹ'), ('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'),
+  ('ೱ', 'ೲ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'),
+  ('ഽ', 'ഽ'), ('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'),
+  ('ൺ', 'ൿ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('ก', 'ะ'), ('า', 'ำ'),
+  ('เ', 'ๆ'), ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'),
+  ('ຊ', 'ຊ'), ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'),
+  ('ມ', 'ຣ'), ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'),
+  ('ອ', 'ະ'), ('າ', 'ຳ'), ('ຽ', 'ຽ'), ('ເ', 'ໄ'),
+  ('ໆ', 'ໆ'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('ཀ', 'ཇ'),
+  ('ཉ', 'ཬ'), ('ྈ', 'ྌ'), ('က', 'ဪ'), ('ဿ', 'ဿ'),
+  ('ၐ', 'ၕ'), ('ၚ', 'ၝ'), ('ၡ', 'ၡ'), ('ၥ', 'ၦ'),
+  ('ၮ', 'ၰ'), ('ၵ', 'ႁ'), ('ႎ', 'ႎ'), ('Ⴀ', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'),
+  ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'),
+  ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'),
+  ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'),
+  ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'),
+  ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛱ', 'ᛸ'), ('ᜀ', 'ᜌ'),
+  ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'),
+  ('ᝮ', 'ᝰ'), ('ក', 'ឳ'), ('ៗ', 'ៗ'), ('ៜ', 'ៜ'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢄ'), ('ᢇ', 'ᢨ'), ('ᢪ', 'ᢪ'),
+  ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'),
+  ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('ᨀ', 'ᨖ'), ('ᨠ', 'ᩔ'),
+  ('ᪧ', 'ᪧ'), ('ᬅ', 'ᬳ'), ('ᭅ', 'ᭋ'), ('ᮃ', 'ᮠ'),
+  ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'),
+  ('ᱚ', 'ᱽ'), ('ᲀ', 'ᲈ'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳱ'),
+  ('ᳵ', 'ᳶ'), ('ᴀ', 'ᶿ'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'),
+  ('ῐ', 'ΐ'), ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'),
+  ('ῶ', 'ῼ'), ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'),
+  ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'),
+  ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'),
+  ('K', 'ℭ'), ('ℯ', 'ℹ'), ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'),
+  ('ⅎ', 'ⅎ'), ('Ↄ', 'ↄ'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'),
+  ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'),
+  ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'),
+  ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'),
+  ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'),
+  ('ⷘ', 'ⷞ'), ('ⸯ', 'ⸯ'), ('々', '〆'), ('〱', '〵'),
+  ('〻', '〼'), ('ぁ', 'ゖ'), ('ゝ', 'ゟ'), ('ァ', 'ヺ'),
+  ('ー', 'ヿ'), ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'),
+  ('ㇰ', 'ㇿ'), ('㐀', '䶵'), ('一', '鿪'), ('ꀀ', 'ꒌ'),
+  ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'), ('ꘪ', 'ꘫ'),
+  ('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'), ('ꚠ', 'ꛥ'), ('ꜗ', 'ꜟ'),
+  ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠁ'),
+  ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠢ'), ('ꡀ', 'ꡳ'),
+  ('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'),
+  ('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'), ('ꥠ', 'ꥼ'), ('ꦄ', 'ꦲ'),
+  ('ꧏ', 'ꧏ'), ('ꧠ', 'ꧤ'), ('ꧦ', 'ꧯ'), ('ꧺ', 'ꧾ'),
+  ('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'), ('ꩄ', 'ꩋ'), ('ꩠ', 'ꩶ'),
+  ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪯ'), ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'),
+  ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'), ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫝ'),
+  ('ꫠ', 'ꫪ'), ('ꫲ', 'ꫴ'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'),
+  ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'),
+  ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'),
+  ('ퟋ', 'ퟻ'), ('豈', '舘'), ('並', '龎'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('יִ', 'יִ'), ('ײַ', 'ﬨ'), ('שׁ', 'זּ'),
+  ('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'),
+  ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'),
+  ('ﷰ', 'ﷻ'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('Ａ', 'Ｚ'),
+  ('ａ', 'ｚ'), ('ｦ', 'ﾾ'), ('ￂ', 'ￇ'), ('ￊ', 'ￏ'),
+  ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('𐀀', '𐀋'), ('𐀍', '𐀦'),
+  ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'), ('𐁐', '𐁝'),
+  ('𐂀', '𐃺'), ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐌀', '𐌟'),
+  ('𐌭', '𐍀'), ('𐍂', '𐍉'), ('𐍐', '𐍵'), ('𐎀', '𐎝'),
+  ('𐎠', '𐏃'), ('𐏈', '𐏏'), ('𐐀', '𐒝'), ('𐒰', '𐓓'),
+  ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'),
+  ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'),
+  ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'),
+  ('𐡠', '𐡶'), ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'),
+  ('𐤀', '𐤕'), ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'),
+  ('𐨀', '𐨀'), ('𐨐', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'),
+  ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫤'),
+  ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'),
+  ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑀃', '𑀷'),
+  ('𑂃', '𑂯'), ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅐', '𑅲'),
+  ('𑅶', '𑅶'), ('𑆃', '𑆲'), ('𑇁', '𑇄'), ('𑇚', '𑇚'),
+  ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈫'), ('𑊀', '𑊆'),
+  ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊨'),
+  ('𑊰', '𑋞'), ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'),
+  ('𑌪', '𑌰'), ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌽', '𑌽'),
+  ('𑍐', '𑍐'), ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'),
+  ('𑒀', '𑒯'), ('𑓄', '𑓅'), ('𑓇', '𑓇'), ('𑖀', '𑖮'),
+  ('𑗘', '𑗛'), ('𑘀', '𑘯'), ('𑙄', '𑙄'), ('𑚀', '𑚪'),
+  ('𑜀', '𑜙'), ('𑢠', '𑣟'), ('𑣿', '𑣿'), ('𑨀', '𑨀'),
+  ('𑨋', '𑨲'), ('𑨺', '𑨺'), ('𑩐', '𑩐'), ('𑩜', '𑪃'),
+  ('𑪆', '𑪉'), ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'),
+  ('𑱀', '𑱀'), ('𑱲', '𑲏'), ('𑴀', '𑴆'), ('𑴈', '𑴉'),
+  ('𑴋', '𑴰'), ('𑵆', '𑵆'), ('𒀀', '𒎙'), ('𒒀', '𒕃'),
+  ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'),
+  ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽐'), ('𖾓', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'),
+  ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'),
+  ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'),
+  ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𞠀', '𞣄'),
+  ('𞤀', '𞥃'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'),
+  ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'),
+  ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'),
+  ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'),
+  ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'),
+  ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'),
+  ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'),
+  ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'),
+  ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'),
+  ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const LETTER_NUMBER: &'static [(char, char)] = &[
+  ('ᛮ', 'ᛰ'), ('Ⅰ', 'ↂ'), ('ↅ', 'ↈ'), ('〇', '〇'),
+  ('〡', '〩'), ('〸', '〺'), ('ꛦ', 'ꛯ'), ('𐅀', '𐅴'),
+  ('𐍁', '𐍁'), ('𐍊', '𐍊'), ('𐏑', '𐏕'), ('𒐀', '𒑮'),
+];
+
+pub const LINE_SEPARATOR: &'static [(char, char)] = &[
+  ('\u{2028}', '\u{2028}'),
+];
+
+pub const LOWERCASE_LETTER: &'static [(char, char)] = &[
+  ('a', 'z'), ('µ', 'µ'), ('ß', 'ö'), ('ø', 'ÿ'), ('ā', 'ā'),
+  ('ă', 'ă'), ('ą', 'ą'), ('ć', 'ć'), ('ĉ', 'ĉ'), ('ċ', 'ċ'),
+  ('č', 'č'), ('ď', 'ď'), ('đ', 'đ'), ('ē', 'ē'), ('ĕ', 'ĕ'),
+  ('ė', 'ė'), ('ę', 'ę'), ('ě', 'ě'), ('ĝ', 'ĝ'), ('ğ', 'ğ'),
+  ('ġ', 'ġ'), ('ģ', 'ģ'), ('ĥ', 'ĥ'), ('ħ', 'ħ'), ('ĩ', 'ĩ'),
+  ('ī', 'ī'), ('ĭ', 'ĭ'), ('į', 'į'), ('ı', 'ı'), ('ĳ', 'ĳ'),
+  ('ĵ', 'ĵ'), ('ķ', 'ĸ'), ('ĺ', 'ĺ'), ('ļ', 'ļ'), ('ľ', 'ľ'),
+  ('ŀ', 'ŀ'), ('ł', 'ł'), ('ń', 'ń'), ('ņ', 'ņ'), ('ň', 'ŉ'),
+  ('ŋ', 'ŋ'), ('ō', 'ō'), ('ŏ', 'ŏ'), ('ő', 'ő'), ('œ', 'œ'),
+  ('ŕ', 'ŕ'), ('ŗ', 'ŗ'), ('ř', 'ř'), ('ś', 'ś'), ('ŝ', 'ŝ'),
+  ('ş', 'ş'), ('š', 'š'), ('ţ', 'ţ'), ('ť', 'ť'), ('ŧ', 'ŧ'),
+  ('ũ', 'ũ'), ('ū', 'ū'), ('ŭ', 'ŭ'), ('ů', 'ů'), ('ű', 'ű'),
+  ('ų', 'ų'), ('ŵ', 'ŵ'), ('ŷ', 'ŷ'), ('ź', 'ź'), ('ż', 'ż'),
+  ('ž', 'ƀ'), ('ƃ', 'ƃ'), ('ƅ', 'ƅ'), ('ƈ', 'ƈ'), ('ƌ', 'ƍ'),
+  ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), ('ƙ', 'ƛ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'),
+  ('ƣ', 'ƣ'), ('ƥ', 'ƥ'), ('ƨ', 'ƨ'), ('ƪ', 'ƫ'), ('ƭ', 'ƭ'),
+  ('ư', 'ư'), ('ƴ', 'ƴ'), ('ƶ', 'ƶ'), ('ƹ', 'ƺ'), ('ƽ', 'ƿ'),
+  ('ǆ', 'ǆ'), ('ǉ', 'ǉ'), ('ǌ', 'ǌ'), ('ǎ', 'ǎ'), ('ǐ', 'ǐ'),
+  ('ǒ', 'ǒ'), ('ǔ', 'ǔ'), ('ǖ', 'ǖ'), ('ǘ', 'ǘ'), ('ǚ', 'ǚ'),
+  ('ǜ', 'ǝ'), ('ǟ', 'ǟ'), ('ǡ', 'ǡ'), ('ǣ', 'ǣ'), ('ǥ', 'ǥ'),
+  ('ǧ', 'ǧ'), ('ǩ', 'ǩ'), ('ǫ', 'ǫ'), ('ǭ', 'ǭ'), ('ǯ', 'ǰ'),
+  ('ǳ', 'ǳ'), ('ǵ', 'ǵ'), ('ǹ', 'ǹ'), ('ǻ', 'ǻ'), ('ǽ', 'ǽ'),
+  ('ǿ', 'ǿ'), ('ȁ', 'ȁ'), ('ȃ', 'ȃ'), ('ȅ', 'ȅ'), ('ȇ', 'ȇ'),
+  ('ȉ', 'ȉ'), ('ȋ', 'ȋ'), ('ȍ', 'ȍ'), ('ȏ', 'ȏ'), ('ȑ', 'ȑ'),
+  ('ȓ', 'ȓ'), ('ȕ', 'ȕ'), ('ȗ', 'ȗ'), ('ș', 'ș'), ('ț', 'ț'),
+  ('ȝ', 'ȝ'), ('ȟ', 'ȟ'), ('ȡ', 'ȡ'), ('ȣ', 'ȣ'), ('ȥ', 'ȥ'),
+  ('ȧ', 'ȧ'), ('ȩ', 'ȩ'), ('ȫ', 'ȫ'), ('ȭ', 'ȭ'), ('ȯ', 'ȯ'),
+  ('ȱ', 'ȱ'), ('ȳ', 'ȹ'), ('ȼ', 'ȼ'), ('ȿ', 'ɀ'), ('ɂ', 'ɂ'),
+  ('ɇ', 'ɇ'), ('ɉ', 'ɉ'), ('ɋ', 'ɋ'), ('ɍ', 'ɍ'), ('ɏ', 'ʓ'),
+  ('ʕ', 'ʯ'), ('ͱ', 'ͱ'), ('ͳ', 'ͳ'), ('ͷ', 'ͷ'), ('ͻ', 'ͽ'),
+  ('ΐ', 'ΐ'), ('ά', 'ώ'), ('ϐ', 'ϑ'), ('ϕ', 'ϗ'), ('ϙ', 'ϙ'),
+  ('ϛ', 'ϛ'), ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'), ('ϣ', 'ϣ'),
+  ('ϥ', 'ϥ'), ('ϧ', 'ϧ'), ('ϩ', 'ϩ'), ('ϫ', 'ϫ'), ('ϭ', 'ϭ'),
+  ('ϯ', 'ϳ'), ('ϵ', 'ϵ'), ('ϸ', 'ϸ'), ('ϻ', 'ϼ'), ('а', 'џ'),
+  ('ѡ', 'ѡ'), ('ѣ', 'ѣ'), ('ѥ', 'ѥ'), ('ѧ', 'ѧ'), ('ѩ', 'ѩ'),
+  ('ѫ', 'ѫ'), ('ѭ', 'ѭ'), ('ѯ', 'ѯ'), ('ѱ', 'ѱ'), ('ѳ', 'ѳ'),
+  ('ѵ', 'ѵ'), ('ѷ', 'ѷ'), ('ѹ', 'ѹ'), ('ѻ', 'ѻ'), ('ѽ', 'ѽ'),
+  ('ѿ', 'ѿ'), ('ҁ', 'ҁ'), ('ҋ', 'ҋ'), ('ҍ', 'ҍ'), ('ҏ', 'ҏ'),
+  ('ґ', 'ґ'), ('ғ', 'ғ'), ('ҕ', 'ҕ'), ('җ', 'җ'), ('ҙ', 'ҙ'),
+  ('қ', 'қ'), ('ҝ', 'ҝ'), ('ҟ', 'ҟ'), ('ҡ', 'ҡ'), ('ң', 'ң'),
+  ('ҥ', 'ҥ'), ('ҧ', 'ҧ'), ('ҩ', 'ҩ'), ('ҫ', 'ҫ'), ('ҭ', 'ҭ'),
+  ('ү', 'ү'), ('ұ', 'ұ'), ('ҳ', 'ҳ'), ('ҵ', 'ҵ'), ('ҷ', 'ҷ'),
+  ('ҹ', 'ҹ'), ('һ', 'һ'), ('ҽ', 'ҽ'), ('ҿ', 'ҿ'), ('ӂ', 'ӂ'),
+  ('ӄ', 'ӄ'), ('ӆ', 'ӆ'), ('ӈ', 'ӈ'), ('ӊ', 'ӊ'), ('ӌ', 'ӌ'),
+  ('ӎ', 'ӏ'), ('ӑ', 'ӑ'), ('ӓ', 'ӓ'), ('ӕ', 'ӕ'), ('ӗ', 'ӗ'),
+  ('ә', 'ә'), ('ӛ', 'ӛ'), ('ӝ', 'ӝ'), ('ӟ', 'ӟ'), ('ӡ', 'ӡ'),
+  ('ӣ', 'ӣ'), ('ӥ', 'ӥ'), ('ӧ', 'ӧ'), ('ө', 'ө'), ('ӫ', 'ӫ'),
+  ('ӭ', 'ӭ'), ('ӯ', 'ӯ'), ('ӱ', 'ӱ'), ('ӳ', 'ӳ'), ('ӵ', 'ӵ'),
+  ('ӷ', 'ӷ'), ('ӹ', 'ӹ'), ('ӻ', 'ӻ'), ('ӽ', 'ӽ'), ('ӿ', 'ӿ'),
+  ('ԁ', 'ԁ'), ('ԃ', 'ԃ'), ('ԅ', 'ԅ'), ('ԇ', 'ԇ'), ('ԉ', 'ԉ'),
+  ('ԋ', 'ԋ'), ('ԍ', 'ԍ'), ('ԏ', 'ԏ'), ('ԑ', 'ԑ'), ('ԓ', 'ԓ'),
+  ('ԕ', 'ԕ'), ('ԗ', 'ԗ'), ('ԙ', 'ԙ'), ('ԛ', 'ԛ'), ('ԝ', 'ԝ'),
+  ('ԟ', 'ԟ'), ('ԡ', 'ԡ'), ('ԣ', 'ԣ'), ('ԥ', 'ԥ'), ('ԧ', 'ԧ'),
+  ('ԩ', 'ԩ'), ('ԫ', 'ԫ'), ('ԭ', 'ԭ'), ('ԯ', 'ԯ'), ('ա', 'և'),
+  ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᴫ'), ('ᵫ', 'ᵷ'),
+  ('ᵹ', 'ᶚ'), ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), ('ḅ', 'ḅ'),
+  ('ḇ', 'ḇ'), ('ḉ', 'ḉ'), ('ḋ', 'ḋ'), ('ḍ', 'ḍ'),
+  ('ḏ', 'ḏ'), ('ḑ', 'ḑ'), ('ḓ', 'ḓ'), ('ḕ', 'ḕ'),
+  ('ḗ', 'ḗ'), ('ḙ', 'ḙ'), ('ḛ', 'ḛ'), ('ḝ', 'ḝ'),
+  ('ḟ', 'ḟ'), ('ḡ', 'ḡ'), ('ḣ', 'ḣ'), ('ḥ', 'ḥ'),
+  ('ḧ', 'ḧ'), ('ḩ', 'ḩ'), ('ḫ', 'ḫ'), ('ḭ', 'ḭ'),
+  ('ḯ', 'ḯ'), ('ḱ', 'ḱ'), ('ḳ', 'ḳ'), ('ḵ', 'ḵ'),
+  ('ḷ', 'ḷ'), ('ḹ', 'ḹ'), ('ḻ', 'ḻ'), ('ḽ', 'ḽ'),
+  ('ḿ', 'ḿ'), ('ṁ', 'ṁ'), ('ṃ', 'ṃ'), ('ṅ', 'ṅ'),
+  ('ṇ', 'ṇ'), ('ṉ', 'ṉ'), ('ṋ', 'ṋ'), ('ṍ', 'ṍ'),
+  ('ṏ', 'ṏ'), ('ṑ', 'ṑ'), ('ṓ', 'ṓ'), ('ṕ', 'ṕ'),
+  ('ṗ', 'ṗ'), ('ṙ', 'ṙ'), ('ṛ', 'ṛ'), ('ṝ', 'ṝ'),
+  ('ṟ', 'ṟ'), ('ṡ', 'ṡ'), ('ṣ', 'ṣ'), ('ṥ', 'ṥ'),
+  ('ṧ', 'ṧ'), ('ṩ', 'ṩ'), ('ṫ', 'ṫ'), ('ṭ', 'ṭ'),
+  ('ṯ', 'ṯ'), ('ṱ', 'ṱ'), ('ṳ', 'ṳ'), ('ṵ', 'ṵ'),
+  ('ṷ', 'ṷ'), ('ṹ', 'ṹ'), ('ṻ', 'ṻ'), ('ṽ', 'ṽ'),
+  ('ṿ', 'ṿ'), ('ẁ', 'ẁ'), ('ẃ', 'ẃ'), ('ẅ', 'ẅ'),
+  ('ẇ', 'ẇ'), ('ẉ', 'ẉ'), ('ẋ', 'ẋ'), ('ẍ', 'ẍ'),
+  ('ẏ', 'ẏ'), ('ẑ', 'ẑ'), ('ẓ', 'ẓ'), ('ẕ', 'ẝ'),
+  ('ẟ', 'ẟ'), ('ạ', 'ạ'), ('ả', 'ả'), ('ấ', 'ấ'),
+  ('ầ', 'ầ'), ('ẩ', 'ẩ'), ('ẫ', 'ẫ'), ('ậ', 'ậ'),
+  ('ắ', 'ắ'), ('ằ', 'ằ'), ('ẳ', 'ẳ'), ('ẵ', 'ẵ'),
+  ('ặ', 'ặ'), ('ẹ', 'ẹ'), ('ẻ', 'ẻ'), ('ẽ', 'ẽ'),
+  ('ế', 'ế'), ('ề', 'ề'), ('ể', 'ể'), ('ễ', 'ễ'),
+  ('ệ', 'ệ'), ('ỉ', 'ỉ'), ('ị', 'ị'), ('ọ', 'ọ'),
+  ('ỏ', 'ỏ'), ('ố', 'ố'), ('ồ', 'ồ'), ('ổ', 'ổ'),
+  ('ỗ', 'ỗ'), ('ộ', 'ộ'), ('ớ', 'ớ'), ('ờ', 'ờ'),
+  ('ở', 'ở'), ('ỡ', 'ỡ'), ('ợ', 'ợ'), ('ụ', 'ụ'),
+  ('ủ', 'ủ'), ('ứ', 'ứ'), ('ừ', 'ừ'), ('ử', 'ử'),
+  ('ữ', 'ữ'), ('ự', 'ự'), ('ỳ', 'ỳ'), ('ỵ', 'ỵ'),
+  ('ỷ', 'ỷ'), ('ỹ', 'ỹ'), ('ỻ', 'ỻ'), ('ỽ', 'ỽ'),
+  ('ỿ', 'ἇ'), ('ἐ', 'ἕ'), ('ἠ', 'ἧ'), ('ἰ', 'ἷ'),
+  ('ὀ', 'ὅ'), ('ὐ', 'ὗ'), ('ὠ', 'ὧ'), ('ὰ', 'ώ'),
+  ('ᾀ', 'ᾇ'), ('ᾐ', 'ᾗ'), ('ᾠ', 'ᾧ'), ('ᾰ', 'ᾴ'),
+  ('ᾶ', 'ᾷ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῇ'),
+  ('ῐ', 'ΐ'), ('ῖ', 'ῗ'), ('ῠ', 'ῧ'), ('ῲ', 'ῴ'),
+  ('ῶ', 'ῷ'), ('ℊ', 'ℊ'), ('ℎ', 'ℏ'), ('ℓ', 'ℓ'),
+  ('ℯ', 'ℯ'), ('ℴ', 'ℴ'), ('ℹ', 'ℹ'), ('ℼ', 'ℽ'),
+  ('ⅆ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('ↄ', 'ↄ'), ('ⰰ', 'ⱞ'),
+  ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), ('ⱪ', 'ⱪ'),
+  ('ⱬ', 'ⱬ'), ('ⱱ', 'ⱱ'), ('ⱳ', 'ⱴ'), ('ⱶ', 'ⱻ'),
+  ('ⲁ', 'ⲁ'), ('ⲃ', 'ⲃ'), ('ⲅ', 'ⲅ'), ('ⲇ', 'ⲇ'),
+  ('ⲉ', 'ⲉ'), ('ⲋ', 'ⲋ'), ('ⲍ', 'ⲍ'), ('ⲏ', 'ⲏ'),
+  ('ⲑ', 'ⲑ'), ('ⲓ', 'ⲓ'), ('ⲕ', 'ⲕ'), ('ⲗ', 'ⲗ'),
+  ('ⲙ', 'ⲙ'), ('ⲛ', 'ⲛ'), ('ⲝ', 'ⲝ'), ('ⲟ', 'ⲟ'),
+  ('ⲡ', 'ⲡ'), ('ⲣ', 'ⲣ'), ('ⲥ', 'ⲥ'), ('ⲧ', 'ⲧ'),
+  ('ⲩ', 'ⲩ'), ('ⲫ', 'ⲫ'), ('ⲭ', 'ⲭ'), ('ⲯ', 'ⲯ'),
+  ('ⲱ', 'ⲱ'), ('ⲳ', 'ⲳ'), ('ⲵ', 'ⲵ'), ('ⲷ', 'ⲷ'),
+  ('ⲹ', 'ⲹ'), ('ⲻ', 'ⲻ'), ('ⲽ', 'ⲽ'), ('ⲿ', 'ⲿ'),
+  ('ⳁ', 'ⳁ'), ('ⳃ', 'ⳃ'), ('ⳅ', 'ⳅ'), ('ⳇ', 'ⳇ'),
+  ('ⳉ', 'ⳉ'), ('ⳋ', 'ⳋ'), ('ⳍ', 'ⳍ'), ('ⳏ', 'ⳏ'),
+  ('ⳑ', 'ⳑ'), ('ⳓ', 'ⳓ'), ('ⳕ', 'ⳕ'), ('ⳗ', 'ⳗ'),
+  ('ⳙ', 'ⳙ'), ('ⳛ', 'ⳛ'), ('ⳝ', 'ⳝ'), ('ⳟ', 'ⳟ'),
+  ('ⳡ', 'ⳡ'), ('ⳣ', 'ⳤ'), ('ⳬ', 'ⳬ'), ('ⳮ', 'ⳮ'),
+  ('ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ꙁ', 'ꙁ'), ('ꙃ', 'ꙃ'), ('ꙅ', 'ꙅ'), ('ꙇ', 'ꙇ'),
+  ('ꙉ', 'ꙉ'), ('ꙋ', 'ꙋ'), ('ꙍ', 'ꙍ'), ('ꙏ', 'ꙏ'),
+  ('ꙑ', 'ꙑ'), ('ꙓ', 'ꙓ'), ('ꙕ', 'ꙕ'), ('ꙗ', 'ꙗ'),
+  ('ꙙ', 'ꙙ'), ('ꙛ', 'ꙛ'), ('ꙝ', 'ꙝ'), ('ꙟ', 'ꙟ'),
+  ('ꙡ', 'ꙡ'), ('ꙣ', 'ꙣ'), ('ꙥ', 'ꙥ'), ('ꙧ', 'ꙧ'),
+  ('ꙩ', 'ꙩ'), ('ꙫ', 'ꙫ'), ('ꙭ', 'ꙭ'), ('ꚁ', 'ꚁ'),
+  ('ꚃ', 'ꚃ'), ('ꚅ', 'ꚅ'), ('ꚇ', 'ꚇ'), ('ꚉ', 'ꚉ'),
+  ('ꚋ', 'ꚋ'), ('ꚍ', 'ꚍ'), ('ꚏ', 'ꚏ'), ('ꚑ', 'ꚑ'),
+  ('ꚓ', 'ꚓ'), ('ꚕ', 'ꚕ'), ('ꚗ', 'ꚗ'), ('ꚙ', 'ꚙ'),
+  ('ꚛ', 'ꚛ'), ('ꜣ', 'ꜣ'), ('ꜥ', 'ꜥ'), ('ꜧ', 'ꜧ'),
+  ('ꜩ', 'ꜩ'), ('ꜫ', 'ꜫ'), ('ꜭ', 'ꜭ'), ('ꜯ', 'ꜱ'),
+  ('ꜳ', 'ꜳ'), ('ꜵ', 'ꜵ'), ('ꜷ', 'ꜷ'), ('ꜹ', 'ꜹ'),
+  ('ꜻ', 'ꜻ'), ('ꜽ', 'ꜽ'), ('ꜿ', 'ꜿ'), ('ꝁ', 'ꝁ'),
+  ('ꝃ', 'ꝃ'), ('ꝅ', 'ꝅ'), ('ꝇ', 'ꝇ'), ('ꝉ', 'ꝉ'),
+  ('ꝋ', 'ꝋ'), ('ꝍ', 'ꝍ'), ('ꝏ', 'ꝏ'), ('ꝑ', 'ꝑ'),
+  ('ꝓ', 'ꝓ'), ('ꝕ', 'ꝕ'), ('ꝗ', 'ꝗ'), ('ꝙ', 'ꝙ'),
+  ('ꝛ', 'ꝛ'), ('ꝝ', 'ꝝ'), ('ꝟ', 'ꝟ'), ('ꝡ', 'ꝡ'),
+  ('ꝣ', 'ꝣ'), ('ꝥ', 'ꝥ'), ('ꝧ', 'ꝧ'), ('ꝩ', 'ꝩ'),
+  ('ꝫ', 'ꝫ'), ('ꝭ', 'ꝭ'), ('ꝯ', 'ꝯ'), ('ꝱ', 'ꝸ'),
+  ('ꝺ', 'ꝺ'), ('ꝼ', 'ꝼ'), ('ꝿ', 'ꝿ'), ('ꞁ', 'ꞁ'),
+  ('ꞃ', 'ꞃ'), ('ꞅ', 'ꞅ'), ('ꞇ', 'ꞇ'), ('ꞌ', 'ꞌ'),
+  ('ꞎ', 'ꞎ'), ('ꞑ', 'ꞑ'), ('ꞓ', 'ꞕ'), ('ꞗ', 'ꞗ'),
+  ('ꞙ', 'ꞙ'), ('ꞛ', 'ꞛ'), ('ꞝ', 'ꞝ'), ('ꞟ', 'ꞟ'),
+  ('ꞡ', 'ꞡ'), ('ꞣ', 'ꞣ'), ('ꞥ', 'ꞥ'), ('ꞧ', 'ꞧ'),
+  ('ꞩ', 'ꞩ'), ('ꞵ', 'ꞵ'), ('ꞷ', 'ꞷ'), ('ꟺ', 'ꟺ'),
+  ('ꬰ', 'ꭚ'), ('ꭠ', 'ꭥ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('ａ', 'ｚ'), ('𐐨', '𐑏'), ('𐓘', '𐓻'),
+  ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𝐚', '𝐳'), ('𝑎', '𝑔'),
+  ('𝑖', '𝑧'), ('𝒂', '𝒛'), ('𝒶', '𝒹'), ('𝒻', '𝒻'),
+  ('𝒽', '𝓃'), ('𝓅', '𝓏'), ('𝓪', '𝔃'), ('𝔞', '𝔷'),
+  ('𝕒', '𝕫'), ('𝖆', '𝖟'), ('𝖺', '𝗓'), ('𝗮', '𝘇'),
+  ('𝘢', '𝘻'), ('𝙖', '𝙯'), ('𝚊', '𝚥'), ('𝛂', '𝛚'),
+  ('𝛜', '𝛡'), ('𝛼', '𝜔'), ('𝜖', '𝜛'), ('𝜶', '𝝎'),
+  ('𝝐', '𝝕'), ('𝝰', '𝞈'), ('𝞊', '𝞏'), ('𝞪', '𝟂'),
+  ('𝟄', '𝟉'), ('𝟋', '𝟋'), ('𞤢', '𞥃'),
+];
+
+pub const MARK: &'static [(char, char)] = &[
+  ('̀', 'ͯ'), ('҃', '҉'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('ؐ', 'ؚ'), ('ً', 'ٟ'), ('ٰ', 'ٰ'),
+  ('ۖ', 'ۜ'), ('۟', 'ۤ'), ('ۧ', 'ۨ'), ('۪', 'ۭ'), ('ܑ', 'ܑ'),
+  ('ܰ', '݊'), ('ަ', 'ް'), ('߫', '߳'), ('ࠖ', '࠙'), ('ࠛ', 'ࠣ'),
+  ('ࠥ', 'ࠧ'), ('ࠩ', '࠭'), ('࡙', '࡛'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ः'), ('ऺ', '़'), ('ा', 'ॏ'), ('॑', 'ॗ'),
+  ('ॢ', 'ॣ'), ('ঁ', 'ঃ'), ('়', '়'), ('া', 'ৄ'),
+  ('ে', 'ৈ'), ('ো', '্'), ('ৗ', 'ৗ'), ('ৢ', 'ৣ'),
+  ('ਁ', 'ਃ'), ('਼', '਼'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'),
+  ('ੋ', '੍'), ('ੑ', 'ੑ'), ('ੰ', 'ੱ'), ('ੵ', 'ੵ'),
+  ('ઁ', 'ઃ'), ('઼', '઼'), ('ા', 'ૅ'), ('ે', 'ૉ'),
+  ('ો', '્'), ('ૢ', 'ૣ'), ('ૺ', '૿'), ('ଁ', 'ଃ'),
+  ('଼', '଼'), ('ା', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'),
+  ('ୖ', 'ୗ'), ('ୢ', 'ୣ'), ('ஂ', 'ஂ'), ('ா', 'ூ'),
+  ('ெ', 'ை'), ('ொ', '்'), ('ௗ', 'ௗ'), ('ఀ', 'ః'),
+  ('ా', 'ౄ'), ('ె', 'ై'), ('ొ', '్'), ('ౕ', 'ౖ'),
+  ('ౢ', 'ౣ'), ('ಁ', 'ಃ'), ('಼', '಼'), ('ಾ', 'ೄ'),
+  ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೢ', 'ೣ'),
+  ('ഀ', 'ഃ'), ('഻', '഼'), ('ാ', 'ൄ'), ('െ', 'ൈ'),
+  ('ൊ', '്'), ('ൗ', 'ൗ'), ('ൢ', 'ൣ'), ('ං', 'ඃ'),
+  ('්', '්'), ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'),
+  ('ෲ', 'ෳ'), ('ั', 'ั'), ('ิ', 'ฺ'), ('็', '๎'),
+  ('ັ', 'ັ'), ('ິ', 'ູ'), ('ົ', 'ຼ'), ('່', 'ໍ'),
+  ('༘', '༙'), ('༵', '༵'), ('༷', '༷'), ('༹', '༹'),
+  ('༾', '༿'), ('ཱ', '྄'), ('྆', '྇'), ('ྍ', 'ྗ'),
+  ('ྙ', 'ྼ'), ('࿆', '࿆'), ('ါ', 'ှ'), ('ၖ', 'ၙ'),
+  ('ၞ', 'ၠ'), ('ၢ', 'ၤ'), ('ၧ', 'ၭ'), ('ၱ', 'ၴ'),
+  ('ႂ', 'ႍ'), ('ႏ', 'ႏ'), ('ႚ', 'ႝ'), ('፝', '፟'),
+  ('ᜒ', '᜔'), ('ᜲ', '᜴'), ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'),
+  ('឴', '៓'), ('៝', '៝'), ('᠋', '᠍'), ('ᢅ', 'ᢆ'),
+  ('ᢩ', 'ᢩ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'), ('ᨗ', 'ᨛ'),
+  ('ᩕ', 'ᩞ'), ('᩠', '᩼'), ('᩿', '᩿'), ('᪰', '᪾'),
+  ('ᬀ', 'ᬄ'), ('᬴', '᭄'), ('᭫', '᭳'), ('ᮀ', 'ᮂ'),
+  ('ᮡ', 'ᮭ'), ('᯦', '᯳'), ('ᰤ', '᰷'), ('᳐', '᳒'),
+  ('᳔', '᳨'), ('᳭', '᳭'), ('ᳲ', '᳴'), ('᳷', '᳹'),
+  ('᷀', '᷹'), ('᷻', '᷿'), ('⃐', '⃰'), ('⳯', '⳱'),
+  ('⵿', '⵿'), ('ⷠ', 'ⷿ'), ('〪', '〯'), ('゙', '゚'),
+  ('꙯', '꙲'), ('ꙴ', '꙽'), ('ꚞ', 'ꚟ'), ('꛰', '꛱'),
+  ('ꠂ', 'ꠂ'), ('꠆', '꠆'), ('ꠋ', 'ꠋ'), ('ꠣ', 'ꠧ'),
+  ('ꢀ', 'ꢁ'), ('ꢴ', 'ꣅ'), ('꣠', '꣱'), ('ꤦ', '꤭'),
+  ('ꥇ', '꥓'), ('ꦀ', 'ꦃ'), ('꦳', '꧀'), ('ꧥ', 'ꧥ'),
+  ('ꨩ', 'ꨶ'), ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩍ'), ('ꩻ', 'ꩽ'),
+  ('ꪰ', 'ꪰ'), ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', '꪿'),
+  ('꫁', '꫁'), ('ꫫ', 'ꫯ'), ('ꫵ', '꫶'), ('ꯣ', 'ꯪ'),
+  ('꯬', '꯭'), ('ﬞ', 'ﬞ'), ('︀', '️'), ('︠', '︯'),
+  ('𐇽', '𐇽'), ('𐋠', '𐋠'), ('𐍶', '𐍺'), ('𐨁', '𐨃'),
+  ('𐨅', '𐨆'), ('𐨌', '𐨏'), ('𐨸', '𐨺'), ('𐨿', '𐨿'),
+  ('𐫥', '𐫦'), ('𑀀', '𑀂'), ('𑀸', '𑁆'), ('𑁿', '𑂂'),
+  ('𑂰', '𑂺'), ('𑄀', '𑄂'), ('𑄧', '𑄴'), ('𑅳', '𑅳'),
+  ('𑆀', '𑆂'), ('𑆳', '𑇀'), ('𑇊', '𑇌'), ('𑈬', '𑈷'),
+  ('𑈾', '𑈾'), ('𑋟', '𑋪'), ('𑌀', '𑌃'), ('𑌼', '𑌼'),
+  ('𑌾', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍍'), ('𑍗', '𑍗'),
+  ('𑍢', '𑍣'), ('𑍦', '𑍬'), ('𑍰', '𑍴'), ('𑐵', '𑑆'),
+  ('𑒰', '𑓃'), ('𑖯', '𑖵'), ('𑖸', '𑗀'), ('𑗜', '𑗝'),
+  ('𑘰', '𑙀'), ('𑚫', '𑚷'), ('𑜝', '𑜫'), ('𑨁', '𑨊'),
+  ('𑨳', '𑨹'), ('𑨻', '𑨾'), ('𑩇', '𑩇'), ('𑩑', '𑩛'),
+  ('𑪊', '𑪙'), ('𑰯', '𑰶'), ('𑰸', '𑰿'), ('𑲒', '𑲧'),
+  ('𑲩', '𑲶'), ('𑴱', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'),
+  ('𑴿', '𑵅'), ('𑵇', '𑵇'), ('𖫰', '𖫴'), ('𖬰', '𖬶'),
+  ('𖽑', '𖽾'), ('𖾏', '𖾒'), ('𛲝', '𛲞'), ('𝅥', '𝅩'),
+  ('𝅭', '𝅲'), ('𝅻', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'),
+  ('𝉂', '𝉄'), ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'),
+  ('𝪄', '𝪄'), ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'),
+  ('𞀈', '𞀘'), ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+  ('𞣐', '𞣖'), ('𞥄', '𞥊'), ('󠄀', '󠇯'),
+];
+
+pub const MATH_SYMBOL: &'static [(char, char)] = &[
+  ('+', '+'), ('<', '>'), ('|', '|'), ('~', '~'), ('¬', '¬'), ('±', '±'),
+  ('×', '×'), ('÷', '÷'), ('϶', '϶'), ('؆', '؈'), ('⁄', '⁄'),
+  ('⁒', '⁒'), ('⁺', '⁼'), ('₊', '₌'), ('℘', '℘'),
+  ('⅀', '⅄'), ('⅋', '⅋'), ('←', '↔'), ('↚', '↛'),
+  ('↠', '↠'), ('↣', '↣'), ('↦', '↦'), ('↮', '↮'),
+  ('⇎', '⇏'), ('⇒', '⇒'), ('⇔', '⇔'), ('⇴', '⋿'),
+  ('⌠', '⌡'), ('⍼', '⍼'), ('⎛', '⎳'), ('⏜', '⏡'),
+  ('▷', '▷'), ('◁', '◁'), ('◸', '◿'), ('♯', '♯'),
+  ('⟀', '⟄'), ('⟇', '⟥'), ('⟰', '⟿'), ('⤀', '⦂'),
+  ('⦙', '⧗'), ('⧜', '⧻'), ('⧾', '⫿'), ('⬰', '⭄'),
+  ('⭇', '⭌'), ('﬩', '﬩'), ('﹢', '﹢'), ('﹤', '﹦'),
+  ('＋', '＋'), ('＜', '＞'), ('｜', '｜'), ('～', '～'),
+  ('￢', '￢'), ('￩', '￬'), ('𝛁', '𝛁'), ('𝛛', '𝛛'),
+  ('𝛻', '𝛻'), ('𝜕', '𝜕'), ('𝜵', '𝜵'), ('𝝏', '𝝏'),
+  ('𝝯', '𝝯'), ('𝞉', '𝞉'), ('𝞩', '𝞩'), ('𝟃', '𝟃'),
+  ('𞻰', '𞻱'),
+];
+
+pub const MODIFIER_LETTER: &'static [(char, char)] = &[
+  ('ʰ', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'), ('ˬ', 'ˬ'), ('ˮ', 'ˮ'),
+  ('ʹ', 'ʹ'), ('ͺ', 'ͺ'), ('ՙ', 'ՙ'), ('ـ', 'ـ'), ('ۥ', 'ۦ'),
+  ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠚ', 'ࠚ'), ('ࠤ', 'ࠤ'), ('ࠨ', 'ࠨ'),
+  ('ॱ', 'ॱ'), ('ๆ', 'ๆ'), ('ໆ', 'ໆ'), ('ჼ', 'ჼ'),
+  ('ៗ', 'ៗ'), ('ᡃ', 'ᡃ'), ('ᪧ', 'ᪧ'), ('ᱸ', 'ᱽ'),
+  ('ᴬ', 'ᵪ'), ('ᵸ', 'ᵸ'), ('ᶛ', 'ᶿ'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ⱼ', 'ⱽ'), ('ⵯ', 'ⵯ'),
+  ('ⸯ', 'ⸯ'), ('々', '々'), ('〱', '〵'), ('〻', '〻'),
+  ('ゝ', 'ゞ'), ('ー', 'ヾ'), ('ꀕ', 'ꀕ'), ('ꓸ', 'ꓽ'),
+  ('ꘌ', 'ꘌ'), ('ꙿ', 'ꙿ'), ('ꚜ', 'ꚝ'), ('ꜗ', 'ꜟ'),
+  ('ꝰ', 'ꝰ'), ('ꞈ', 'ꞈ'), ('ꟸ', 'ꟹ'), ('ꧏ', 'ꧏ'),
+  ('ꧦ', 'ꧦ'), ('ꩰ', 'ꩰ'), ('ꫝ', 'ꫝ'), ('ꫳ', 'ꫴ'),
+  ('ꭜ', 'ꭟ'), ('ｰ', 'ｰ'), ('ﾞ', 'ﾟ'), ('𖭀', '𖭃'),
+  ('𖾓', '𖾟'), ('𖿠', '𖿡'),
+];
+
+pub const MODIFIER_SYMBOL: &'static [(char, char)] = &[
+  ('^', '^'), ('`', '`'), ('¨', '¨'), ('¯', '¯'), ('´', '´'),
+  ('¸', '¸'), ('˂', '˅'), ('˒', '˟'), ('˥', '˫'), ('˭', '˭'),
+  ('˯', '˿'), ('͵', '͵'), ('΄', '΅'), ('᾽', '᾽'), ('᾿', '῁'),
+  ('῍', '῏'), ('῝', '῟'), ('῭', '`'), ('´', '῾'),
+  ('゛', '゜'), ('꜀', '꜖'), ('꜠', '꜡'), ('꞉', '꞊'),
+  ('꭛', '꭛'), ('﮲', '﯁'), ('＾', '＾'), ('｀', '｀'),
+  ('￣', '￣'), ('🏻', '🏿'),
+];
+
+pub const NONSPACING_MARK: &'static [(char, char)] = &[
+  ('̀', 'ͯ'), ('҃', '҇'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('ؐ', 'ؚ'), ('ً', 'ٟ'), ('ٰ', 'ٰ'),
+  ('ۖ', 'ۜ'), ('۟', 'ۤ'), ('ۧ', 'ۨ'), ('۪', 'ۭ'), ('ܑ', 'ܑ'),
+  ('ܰ', '݊'), ('ަ', 'ް'), ('߫', '߳'), ('ࠖ', '࠙'), ('ࠛ', 'ࠣ'),
+  ('ࠥ', 'ࠧ'), ('ࠩ', '࠭'), ('࡙', '࡛'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ं'), ('ऺ', 'ऺ'), ('़', '़'), ('ु', 'ै'),
+  ('्', '्'), ('॑', 'ॗ'), ('ॢ', 'ॣ'), ('ঁ', 'ঁ'),
+  ('়', '়'), ('ু', 'ৄ'), ('্', '্'), ('ৢ', 'ৣ'),
+  ('ਁ', 'ਂ'), ('਼', '਼'), ('ੁ', 'ੂ'), ('ੇ', 'ੈ'),
+  ('ੋ', '੍'), ('ੑ', 'ੑ'), ('ੰ', 'ੱ'), ('ੵ', 'ੵ'),
+  ('ઁ', 'ં'), ('઼', '઼'), ('ુ', 'ૅ'), ('ે', 'ૈ'),
+  ('્', '્'), ('ૢ', 'ૣ'), ('ૺ', '૿'), ('ଁ', 'ଁ'),
+  ('଼', '଼'), ('ି', 'ି'), ('ୁ', 'ୄ'), ('୍', '୍'),
+  ('ୖ', 'ୖ'), ('ୢ', 'ୣ'), ('ஂ', 'ஂ'), ('ீ', 'ீ'),
+  ('்', '்'), ('ఀ', 'ఀ'), ('ా', 'ీ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౢ', 'ౣ'), ('ಁ', 'ಁ'),
+  ('಼', '಼'), ('ಿ', 'ಿ'), ('ೆ', 'ೆ'), ('ೌ', '್'),
+  ('ೢ', 'ೣ'), ('ഀ', 'ഁ'), ('഻', '഼'), ('ു', 'ൄ'),
+  ('്', '്'), ('ൢ', 'ൣ'), ('්', '්'), ('ි', 'ු'),
+  ('ූ', 'ූ'), ('ั', 'ั'), ('ิ', 'ฺ'), ('็', '๎'),
+  ('ັ', 'ັ'), ('ິ', 'ູ'), ('ົ', 'ຼ'), ('່', 'ໍ'),
+  ('༘', '༙'), ('༵', '༵'), ('༷', '༷'), ('༹', '༹'),
+  ('ཱ', 'ཾ'), ('ྀ', '྄'), ('྆', '྇'), ('ྍ', 'ྗ'),
+  ('ྙ', 'ྼ'), ('࿆', '࿆'), ('ိ', 'ူ'), ('ဲ', '့'),
+  ('္', '်'), ('ွ', 'ှ'), ('ၘ', 'ၙ'), ('ၞ', 'ၠ'),
+  ('ၱ', 'ၴ'), ('ႂ', 'ႂ'), ('ႅ', 'ႆ'), ('ႍ', 'ႍ'),
+  ('ႝ', 'ႝ'), ('፝', '፟'), ('ᜒ', '᜔'), ('ᜲ', '᜴'),
+  ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'), ('឴', '឵'), ('ិ', 'ួ'),
+  ('ំ', 'ំ'), ('៉', '៓'), ('៝', '៝'), ('᠋', '᠍'),
+  ('ᢅ', 'ᢆ'), ('ᢩ', 'ᢩ'), ('ᤠ', 'ᤢ'), ('ᤧ', 'ᤨ'),
+  ('ᤲ', 'ᤲ'), ('᤹', '᤻'), ('ᨗ', 'ᨘ'), ('ᨛ', 'ᨛ'),
+  ('ᩖ', 'ᩖ'), ('ᩘ', 'ᩞ'), ('᩠', '᩠'), ('ᩢ', 'ᩢ'),
+  ('ᩥ', 'ᩬ'), ('ᩳ', '᩼'), ('᩿', '᩿'), ('᪰', '᪽'),
+  ('ᬀ', 'ᬃ'), ('᬴', '᬴'), ('ᬶ', 'ᬺ'), ('ᬼ', 'ᬼ'),
+  ('ᭂ', 'ᭂ'), ('᭫', '᭳'), ('ᮀ', 'ᮁ'), ('ᮢ', 'ᮥ'),
+  ('ᮨ', 'ᮩ'), ('᮫', 'ᮭ'), ('᯦', '᯦'), ('ᯨ', 'ᯩ'),
+  ('ᯭ', 'ᯭ'), ('ᯯ', 'ᯱ'), ('ᰬ', 'ᰳ'), ('ᰶ', '᰷'),
+  ('᳐', '᳒'), ('᳔', '᳠'), ('᳢', '᳨'), ('᳭', '᳭'),
+  ('᳴', '᳴'), ('᳸', '᳹'), ('᷀', '᷹'), ('᷻', '᷿'),
+  ('⃐', '⃜'), ('⃡', '⃡'), ('⃥', '⃰'), ('⳯', '⳱'),
+  ('⵿', '⵿'), ('ⷠ', 'ⷿ'), ('〪', '〭'), ('゙', '゚'),
+  ('꙯', '꙯'), ('ꙴ', '꙽'), ('ꚞ', 'ꚟ'), ('꛰', '꛱'),
+  ('ꠂ', 'ꠂ'), ('꠆', '꠆'), ('ꠋ', 'ꠋ'), ('ꠥ', 'ꠦ'),
+  ('꣄', 'ꣅ'), ('꣠', '꣱'), ('ꤦ', '꤭'), ('ꥇ', 'ꥑ'),
+  ('ꦀ', 'ꦂ'), ('꦳', '꦳'), ('ꦶ', 'ꦹ'), ('ꦼ', 'ꦼ'),
+  ('ꧥ', 'ꧥ'), ('ꨩ', 'ꨮ'), ('ꨱ', 'ꨲ'), ('ꨵ', 'ꨶ'),
+  ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩌ'), ('ꩼ', 'ꩼ'), ('ꪰ', 'ꪰ'),
+  ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', '꪿'), ('꫁', '꫁'),
+  ('ꫬ', 'ꫭ'), ('꫶', '꫶'), ('ꯥ', 'ꯥ'), ('ꯨ', 'ꯨ'),
+  ('꯭', '꯭'), ('ﬞ', 'ﬞ'), ('︀', '️'), ('︠', '︯'),
+  ('𐇽', '𐇽'), ('𐋠', '𐋠'), ('𐍶', '𐍺'), ('𐨁', '𐨃'),
+  ('𐨅', '𐨆'), ('𐨌', '𐨏'), ('𐨸', '𐨺'), ('𐨿', '𐨿'),
+  ('𐫥', '𐫦'), ('𑀁', '𑀁'), ('𑀸', '𑁆'), ('𑁿', '𑂁'),
+  ('𑂳', '𑂶'), ('𑂹', '𑂺'), ('𑄀', '𑄂'), ('𑄧', '𑄫'),
+  ('𑄭', '𑄴'), ('𑅳', '𑅳'), ('𑆀', '𑆁'), ('𑆶', '𑆾'),
+  ('𑇊', '𑇌'), ('𑈯', '𑈱'), ('𑈴', '𑈴'), ('𑈶', '𑈷'),
+  ('𑈾', '𑈾'), ('𑋟', '𑋟'), ('𑋣', '𑋪'), ('𑌀', '𑌁'),
+  ('𑌼', '𑌼'), ('𑍀', '𑍀'), ('𑍦', '𑍬'), ('𑍰', '𑍴'),
+  ('𑐸', '𑐿'), ('𑑂', '𑑄'), ('𑑆', '𑑆'), ('𑒳', '𑒸'),
+  ('𑒺', '𑒺'), ('𑒿', '𑓀'), ('𑓂', '𑓃'), ('𑖲', '𑖵'),
+  ('𑖼', '𑖽'), ('𑖿', '𑗀'), ('𑗜', '𑗝'), ('𑘳', '𑘺'),
+  ('𑘽', '𑘽'), ('𑘿', '𑙀'), ('𑚫', '𑚫'), ('𑚭', '𑚭'),
+  ('𑚰', '𑚵'), ('𑚷', '𑚷'), ('𑜝', '𑜟'), ('𑜢', '𑜥'),
+  ('𑜧', '𑜫'), ('𑨁', '𑨆'), ('𑨉', '𑨊'), ('𑨳', '𑨸'),
+  ('𑨻', '𑨾'), ('𑩇', '𑩇'), ('𑩑', '𑩖'), ('𑩙', '𑩛'),
+  ('𑪊', '𑪖'), ('𑪘', '𑪙'), ('𑰰', '𑰶'), ('𑰸', '𑰽'),
+  ('𑰿', '𑰿'), ('𑲒', '𑲧'), ('𑲪', '𑲰'), ('𑲲', '𑲳'),
+  ('𑲵', '𑲶'), ('𑴱', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'),
+  ('𑴿', '𑵅'), ('𑵇', '𑵇'), ('𖫰', '𖫴'), ('𖬰', '𖬶'),
+  ('𖾏', '𖾒'), ('𛲝', '𛲞'), ('𝅧', '𝅩'), ('𝅻', '𝆂'),
+  ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𝉂', '𝉄'), ('𝨀', '𝨶'),
+  ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'), ('𝪛', '𝪟'),
+  ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'),
+  ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞣐', '𞣖'), ('𞥄', '𞥊'),
+  ('󠄀', '󠇯'),
+];
+
+pub const NUMBER: &'static [(char, char)] = &[
+  ('0', '9'), ('²', '³'), ('¹', '¹'), ('¼', '¾'), ('٠', '٩'),
+  ('۰', '۹'), ('߀', '߉'), ('०', '९'), ('০', '৯'), ('৴', '৹'),
+  ('੦', '੯'), ('૦', '૯'), ('୦', '୯'), ('୲', '୷'),
+  ('௦', '௲'), ('౦', '౯'), ('౸', '౾'), ('೦', '೯'),
+  ('൘', '൞'), ('൦', '൸'), ('෦', '෯'), ('๐', '๙'),
+  ('໐', '໙'), ('༠', '༳'), ('၀', '၉'), ('႐', '႙'),
+  ('፩', '፼'), ('ᛮ', 'ᛰ'), ('០', '៩'), ('៰', '៹'),
+  ('᠐', '᠙'), ('᥆', '᥏'), ('᧐', '᧚'), ('᪀', '᪉'),
+  ('᪐', '᪙'), ('᭐', '᭙'), ('᮰', '᮹'), ('᱀', '᱉'),
+  ('᱐', '᱙'), ('⁰', '⁰'), ('⁴', '⁹'), ('₀', '₉'),
+  ('⅐', 'ↂ'), ('ↅ', '↉'), ('①', '⒛'), ('⓪', '⓿'),
+  ('❶', '➓'), ('⳽', '⳽'), ('〇', '〇'), ('〡', '〩'),
+  ('〸', '〺'), ('㆒', '㆕'), ('㈠', '㈩'), ('㉈', '㉏'),
+  ('㉑', '㉟'), ('㊀', '㊉'), ('㊱', '㊿'), ('꘠', '꘩'),
+  ('ꛦ', 'ꛯ'), ('꠰', '꠵'), ('꣐', '꣙'), ('꤀', '꤉'),
+  ('꧐', '꧙'), ('꧰', '꧹'), ('꩐', '꩙'), ('꯰', '꯹'),
+  ('０', '９'), ('𐄇', '𐄳'), ('𐅀', '𐅸'), ('𐆊', '𐆋'),
+  ('𐋡', '𐋻'), ('𐌠', '𐌣'), ('𐍁', '𐍁'), ('𐍊', '𐍊'),
+  ('𐏑', '𐏕'), ('𐒠', '𐒩'), ('𐡘', '𐡟'), ('𐡹', '𐡿'),
+  ('𐢧', '𐢯'), ('𐣻', '𐣿'), ('𐤖', '𐤛'), ('𐦼', '𐦽'),
+  ('𐧀', '𐧏'), ('𐧒', '𐧿'), ('𐩀', '𐩇'), ('𐩽', '𐩾'),
+  ('𐪝', '𐪟'), ('𐫫', '𐫯'), ('𐭘', '𐭟'), ('𐭸', '𐭿'),
+  ('𐮩', '𐮯'), ('𐳺', '𐳿'), ('𐹠', '𐹾'), ('𑁒', '𑁯'),
+  ('𑃰', '𑃹'), ('𑄶', '𑄿'), ('𑇐', '𑇙'), ('𑇡', '𑇴'),
+  ('𑋰', '𑋹'), ('𑑐', '𑑙'), ('𑓐', '𑓙'), ('𑙐', '𑙙'),
+  ('𑛀', '𑛉'), ('𑜰', '𑜻'), ('𑣠', '𑣲'), ('𑱐', '𑱬'),
+  ('𑵐', '𑵙'), ('𒐀', '𒑮'), ('𖩠', '𖩩'), ('𖭐', '𖭙'),
+  ('𖭛', '𖭡'), ('𝍠', '𝍱'), ('𝟎', '𝟿'), ('𞣇', '𞣏'),
+  ('𞥐', '𞥙'), ('🄀', '🄌'),
+];
+
+pub const OPEN_PUNCTUATION: &'static [(char, char)] = &[
+  ('(', '('), ('[', '['), ('{', '{'), ('༺', '༺'), ('༼', '༼'),
+  ('᚛', '᚛'), ('‚', '‚'), ('„', '„'), ('⁅', '⁅'),
+  ('⁽', '⁽'), ('₍', '₍'), ('⌈', '⌈'), ('⌊', '⌊'),
+  ('〈', '〈'), ('❨', '❨'), ('❪', '❪'), ('❬', '❬'),
+  ('❮', '❮'), ('❰', '❰'), ('❲', '❲'), ('❴', '❴'),
+  ('⟅', '⟅'), ('⟦', '⟦'), ('⟨', '⟨'), ('⟪', '⟪'),
+  ('⟬', '⟬'), ('⟮', '⟮'), ('⦃', '⦃'), ('⦅', '⦅'),
+  ('⦇', '⦇'), ('⦉', '⦉'), ('⦋', '⦋'), ('⦍', '⦍'),
+  ('⦏', '⦏'), ('⦑', '⦑'), ('⦓', '⦓'), ('⦕', '⦕'),
+  ('⦗', '⦗'), ('⧘', '⧘'), ('⧚', '⧚'), ('⧼', '⧼'),
+  ('⸢', '⸢'), ('⸤', '⸤'), ('⸦', '⸦'), ('⸨', '⸨'),
+  ('⹂', '⹂'), ('〈', '〈'), ('《', '《'), ('「', '「'),
+  ('『', '『'), ('【', '【'), ('〔', '〔'), ('〖', '〖'),
+  ('〘', '〘'), ('〚', '〚'), ('〝', '〝'), ('﴿', '﴿'),
+  ('︗', '︗'), ('︵', '︵'), ('︷', '︷'), ('︹', '︹'),
+  ('︻', '︻'), ('︽', '︽'), ('︿', '︿'), ('﹁', '﹁'),
+  ('﹃', '﹃'), ('﹇', '﹇'), ('﹙', '﹙'), ('﹛', '﹛'),
+  ('﹝', '﹝'), ('（', '（'), ('［', '［'), ('｛', '｛'),
+  ('｟', '｟'), ('｢', '｢'),
+];
+
+pub const OTHER: &'static [(char, char)] = &[
+  ('\u{0}', '\u{1f}'), ('\u{7f}', '\u{9f}'), ('\u{ad}', '\u{ad}'),
+  ('\u{378}', '\u{379}'), ('\u{380}', '\u{383}'), ('\u{38b}', '\u{38b}'),
+  ('\u{38d}', '\u{38d}'), ('\u{3a2}', '\u{3a2}'), ('\u{530}', '\u{530}'),
+  ('\u{557}', '\u{558}'), ('\u{560}', '\u{560}'), ('\u{588}', '\u{588}'),
+  ('\u{58b}', '\u{58c}'), ('\u{590}', '\u{590}'), ('\u{5c8}', '\u{5cf}'),
+  ('\u{5eb}', '\u{5ef}'), ('\u{5f5}', '\u{605}'), ('\u{61c}', '\u{61d}'),
+  ('\u{6dd}', '\u{6dd}'), ('\u{70e}', '\u{70f}'), ('\u{74b}', '\u{74c}'),
+  ('\u{7b2}', '\u{7bf}'), ('\u{7fb}', '\u{7ff}'), ('\u{82e}', '\u{82f}'),
+  ('\u{83f}', '\u{83f}'), ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'),
+  ('\u{86b}', '\u{89f}'), ('\u{8b5}', '\u{8b5}'), ('\u{8be}', '\u{8d3}'),
+  ('\u{8e2}', '\u{8e2}'), ('\u{984}', '\u{984}'), ('\u{98d}', '\u{98e}'),
+  ('\u{991}', '\u{992}'), ('\u{9a9}', '\u{9a9}'), ('\u{9b1}', '\u{9b1}'),
+  ('\u{9b3}', '\u{9b5}'), ('\u{9ba}', '\u{9bb}'), ('\u{9c5}', '\u{9c6}'),
+  ('\u{9c9}', '\u{9ca}'), ('\u{9cf}', '\u{9d6}'), ('\u{9d8}', '\u{9db}'),
+  ('\u{9de}', '\u{9de}'), ('\u{9e4}', '\u{9e5}'), ('\u{9fe}', '\u{a00}'),
+  ('\u{a04}', '\u{a04}'), ('\u{a0b}', '\u{a0e}'), ('\u{a11}', '\u{a12}'),
+  ('\u{a29}', '\u{a29}'), ('\u{a31}', '\u{a31}'), ('\u{a34}', '\u{a34}'),
+  ('\u{a37}', '\u{a37}'), ('\u{a3a}', '\u{a3b}'), ('\u{a3d}', '\u{a3d}'),
+  ('\u{a43}', '\u{a46}'), ('\u{a49}', '\u{a4a}'), ('\u{a4e}', '\u{a50}'),
+  ('\u{a52}', '\u{a58}'), ('\u{a5d}', '\u{a5d}'), ('\u{a5f}', '\u{a65}'),
+  ('\u{a76}', '\u{a80}'), ('\u{a84}', '\u{a84}'), ('\u{a8e}', '\u{a8e}'),
+  ('\u{a92}', '\u{a92}'), ('\u{aa9}', '\u{aa9}'), ('\u{ab1}', '\u{ab1}'),
+  ('\u{ab4}', '\u{ab4}'), ('\u{aba}', '\u{abb}'), ('\u{ac6}', '\u{ac6}'),
+  ('\u{aca}', '\u{aca}'), ('\u{ace}', '\u{acf}'), ('\u{ad1}', '\u{adf}'),
+  ('\u{ae4}', '\u{ae5}'), ('\u{af2}', '\u{af8}'), ('\u{b00}', '\u{b00}'),
+  ('\u{b04}', '\u{b04}'), ('\u{b0d}', '\u{b0e}'), ('\u{b11}', '\u{b12}'),
+  ('\u{b29}', '\u{b29}'), ('\u{b31}', '\u{b31}'), ('\u{b34}', '\u{b34}'),
+  ('\u{b3a}', '\u{b3b}'), ('\u{b45}', '\u{b46}'), ('\u{b49}', '\u{b4a}'),
+  ('\u{b4e}', '\u{b55}'), ('\u{b58}', '\u{b5b}'), ('\u{b5e}', '\u{b5e}'),
+  ('\u{b64}', '\u{b65}'), ('\u{b78}', '\u{b81}'), ('\u{b84}', '\u{b84}'),
+  ('\u{b8b}', '\u{b8d}'), ('\u{b91}', '\u{b91}'), ('\u{b96}', '\u{b98}'),
+  ('\u{b9b}', '\u{b9b}'), ('\u{b9d}', '\u{b9d}'), ('\u{ba0}', '\u{ba2}'),
+  ('\u{ba5}', '\u{ba7}'), ('\u{bab}', '\u{bad}'), ('\u{bba}', '\u{bbd}'),
+  ('\u{bc3}', '\u{bc5}'), ('\u{bc9}', '\u{bc9}'), ('\u{bce}', '\u{bcf}'),
+  ('\u{bd1}', '\u{bd6}'), ('\u{bd8}', '\u{be5}'), ('\u{bfb}', '\u{bff}'),
+  ('\u{c04}', '\u{c04}'), ('\u{c0d}', '\u{c0d}'), ('\u{c11}', '\u{c11}'),
+  ('\u{c29}', '\u{c29}'), ('\u{c3a}', '\u{c3c}'), ('\u{c45}', '\u{c45}'),
+  ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'), ('\u{c57}', '\u{c57}'),
+  ('\u{c5b}', '\u{c5f}'), ('\u{c64}', '\u{c65}'), ('\u{c70}', '\u{c77}'),
+  ('\u{c84}', '\u{c84}'), ('\u{c8d}', '\u{c8d}'), ('\u{c91}', '\u{c91}'),
+  ('\u{ca9}', '\u{ca9}'), ('\u{cb4}', '\u{cb4}'), ('\u{cba}', '\u{cbb}'),
+  ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'), ('\u{cce}', '\u{cd4}'),
+  ('\u{cd7}', '\u{cdd}'), ('\u{cdf}', '\u{cdf}'), ('\u{ce4}', '\u{ce5}'),
+  ('\u{cf0}', '\u{cf0}'), ('\u{cf3}', '\u{cff}'), ('\u{d04}', '\u{d04}'),
+  ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'), ('\u{d45}', '\u{d45}'),
+  ('\u{d49}', '\u{d49}'), ('\u{d50}', '\u{d53}'), ('\u{d64}', '\u{d65}'),
+  ('\u{d80}', '\u{d81}'), ('\u{d84}', '\u{d84}'), ('\u{d97}', '\u{d99}'),
+  ('\u{db2}', '\u{db2}'), ('\u{dbc}', '\u{dbc}'), ('\u{dbe}', '\u{dbf}'),
+  ('\u{dc7}', '\u{dc9}'), ('\u{dcb}', '\u{dce}'), ('\u{dd5}', '\u{dd5}'),
+  ('\u{dd7}', '\u{dd7}'), ('\u{de0}', '\u{de5}'), ('\u{df0}', '\u{df1}'),
+  ('\u{df5}', '\u{e00}'), ('\u{e3b}', '\u{e3e}'), ('\u{e5c}', '\u{e80}'),
+  ('\u{e83}', '\u{e83}'), ('\u{e85}', '\u{e86}'), ('\u{e89}', '\u{e89}'),
+  ('\u{e8b}', '\u{e8c}'), ('\u{e8e}', '\u{e93}'), ('\u{e98}', '\u{e98}'),
+  ('\u{ea0}', '\u{ea0}'), ('\u{ea4}', '\u{ea4}'), ('\u{ea6}', '\u{ea6}'),
+  ('\u{ea8}', '\u{ea9}'), ('\u{eac}', '\u{eac}'), ('\u{eba}', '\u{eba}'),
+  ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'), ('\u{ec7}', '\u{ec7}'),
+  ('\u{ece}', '\u{ecf}'), ('\u{eda}', '\u{edb}'), ('\u{ee0}', '\u{eff}'),
+  ('\u{f48}', '\u{f48}'), ('\u{f6d}', '\u{f70}'), ('\u{f98}', '\u{f98}'),
+  ('\u{fbd}', '\u{fbd}'), ('\u{fcd}', '\u{fcd}'), ('\u{fdb}', '\u{fff}'),
+  ('\u{10c6}', '\u{10c6}'), ('\u{10c8}', '\u{10cc}'),
+  ('\u{10ce}', '\u{10cf}'), ('\u{1249}', '\u{1249}'),
+  ('\u{124e}', '\u{124f}'), ('\u{1257}', '\u{1257}'),
+  ('\u{1259}', '\u{1259}'), ('\u{125e}', '\u{125f}'),
+  ('\u{1289}', '\u{1289}'), ('\u{128e}', '\u{128f}'),
+  ('\u{12b1}', '\u{12b1}'), ('\u{12b6}', '\u{12b7}'),
+  ('\u{12bf}', '\u{12bf}'), ('\u{12c1}', '\u{12c1}'),
+  ('\u{12c6}', '\u{12c7}'), ('\u{12d7}', '\u{12d7}'),
+  ('\u{1311}', '\u{1311}'), ('\u{1316}', '\u{1317}'),
+  ('\u{135b}', '\u{135c}'), ('\u{137d}', '\u{137f}'),
+  ('\u{139a}', '\u{139f}'), ('\u{13f6}', '\u{13f7}'),
+  ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'),
+  ('\u{16f9}', '\u{16ff}'), ('\u{170d}', '\u{170d}'),
+  ('\u{1715}', '\u{171f}'), ('\u{1737}', '\u{173f}'),
+  ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'),
+  ('\u{1771}', '\u{1771}'), ('\u{1774}', '\u{177f}'),
+  ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'),
+  ('\u{17fa}', '\u{17ff}'), ('\u{180e}', '\u{180f}'),
+  ('\u{181a}', '\u{181f}'), ('\u{1878}', '\u{187f}'),
+  ('\u{18ab}', '\u{18af}'), ('\u{18f6}', '\u{18ff}'),
+  ('\u{191f}', '\u{191f}'), ('\u{192c}', '\u{192f}'),
+  ('\u{193c}', '\u{193f}'), ('\u{1941}', '\u{1943}'),
+  ('\u{196e}', '\u{196f}'), ('\u{1975}', '\u{197f}'),
+  ('\u{19ac}', '\u{19af}'), ('\u{19ca}', '\u{19cf}'),
+  ('\u{19db}', '\u{19dd}'), ('\u{1a1c}', '\u{1a1d}'),
+  ('\u{1a5f}', '\u{1a5f}'), ('\u{1a7d}', '\u{1a7e}'),
+  ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'),
+  ('\u{1aae}', '\u{1aaf}'), ('\u{1abf}', '\u{1aff}'),
+  ('\u{1b4c}', '\u{1b4f}'), ('\u{1b7d}', '\u{1b7f}'),
+  ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'),
+  ('\u{1c4a}', '\u{1c4c}'), ('\u{1c89}', '\u{1cbf}'),
+  ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfa}', '\u{1cff}'),
+  ('\u{1dfa}', '\u{1dfa}'), ('\u{1f16}', '\u{1f17}'),
+  ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'),
+  ('\u{1f4e}', '\u{1f4f}'), ('\u{1f58}', '\u{1f58}'),
+  ('\u{1f5a}', '\u{1f5a}'), ('\u{1f5c}', '\u{1f5c}'),
+  ('\u{1f5e}', '\u{1f5e}'), ('\u{1f7e}', '\u{1f7f}'),
+  ('\u{1fb5}', '\u{1fb5}'), ('\u{1fc5}', '\u{1fc5}'),
+  ('\u{1fd4}', '\u{1fd5}'), ('\u{1fdc}', '\u{1fdc}'),
+  ('\u{1ff0}', '\u{1ff1}'), ('\u{1ff5}', '\u{1ff5}'),
+  ('\u{1fff}', '\u{1fff}'), ('\u{200b}', '\u{200f}'),
+  ('\u{202a}', '\u{202e}'), ('\u{2060}', '\u{206f}'),
+  ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'),
+  ('\u{209d}', '\u{209f}'), ('\u{20c0}', '\u{20cf}'),
+  ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'),
+  ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'),
+  ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b97}'),
+  ('\u{2bba}', '\u{2bbc}'), ('\u{2bc9}', '\u{2bc9}'),
+  ('\u{2bd3}', '\u{2beb}'), ('\u{2bf0}', '\u{2bff}'),
+  ('\u{2c2f}', '\u{2c2f}'), ('\u{2c5f}', '\u{2c5f}'),
+  ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'),
+  ('\u{2d28}', '\u{2d2c}'), ('\u{2d2e}', '\u{2d2f}'),
+  ('\u{2d68}', '\u{2d6e}'), ('\u{2d71}', '\u{2d7e}'),
+  ('\u{2d97}', '\u{2d9f}'), ('\u{2da7}', '\u{2da7}'),
+  ('\u{2daf}', '\u{2daf}'), ('\u{2db7}', '\u{2db7}'),
+  ('\u{2dbf}', '\u{2dbf}'), ('\u{2dc7}', '\u{2dc7}'),
+  ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'),
+  ('\u{2ddf}', '\u{2ddf}'), ('\u{2e4a}', '\u{2e7f}'),
+  ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'),
+  ('\u{2fd6}', '\u{2fef}'), ('\u{2ffc}', '\u{2fff}'),
+  ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'),
+  ('\u{3100}', '\u{3104}'), ('\u{312f}', '\u{3130}'),
+  ('\u{318f}', '\u{318f}'), ('\u{31bb}', '\u{31bf}'),
+  ('\u{31e4}', '\u{31ef}'), ('\u{321f}', '\u{321f}'),
+  ('\u{32ff}', '\u{32ff}'), ('\u{4db6}', '\u{4dbf}'),
+  ('\u{9feb}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'),
+  ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'),
+  ('\u{a6f8}', '\u{a6ff}'), ('\u{a7af}', '\u{a7af}'),
+  ('\u{a7b8}', '\u{a7f6}'), ('\u{a82c}', '\u{a82f}'),
+  ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'),
+  ('\u{a8c6}', '\u{a8cd}'), ('\u{a8da}', '\u{a8df}'),
+  ('\u{a8fe}', '\u{a8ff}'), ('\u{a954}', '\u{a95e}'),
+  ('\u{a97d}', '\u{a97f}'), ('\u{a9ce}', '\u{a9ce}'),
+  ('\u{a9da}', '\u{a9dd}'), ('\u{a9ff}', '\u{a9ff}'),
+  ('\u{aa37}', '\u{aa3f}'), ('\u{aa4e}', '\u{aa4f}'),
+  ('\u{aa5a}', '\u{aa5b}'), ('\u{aac3}', '\u{aada}'),
+  ('\u{aaf7}', '\u{ab00}'), ('\u{ab07}', '\u{ab08}'),
+  ('\u{ab0f}', '\u{ab10}'), ('\u{ab17}', '\u{ab1f}'),
+  ('\u{ab27}', '\u{ab27}'), ('\u{ab2f}', '\u{ab2f}'),
+  ('\u{ab66}', '\u{ab6f}'), ('\u{abee}', '\u{abef}'),
+  ('\u{abfa}', '\u{abff}'), ('\u{d7a4}', '\u{d7af}'),
+  ('\u{d7c7}', '\u{d7ca}'), ('\u{d7fc}', '\u{f8ff}'),
+  ('\u{fa6e}', '\u{fa6f}'), ('\u{fada}', '\u{faff}'),
+  ('\u{fb07}', '\u{fb12}'), ('\u{fb18}', '\u{fb1c}'),
+  ('\u{fb37}', '\u{fb37}'), ('\u{fb3d}', '\u{fb3d}'),
+  ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}', '\u{fb42}'),
+  ('\u{fb45}', '\u{fb45}'), ('\u{fbc2}', '\u{fbd2}'),
+  ('\u{fd40}', '\u{fd4f}'), ('\u{fd90}', '\u{fd91}'),
+  ('\u{fdc8}', '\u{fdef}'), ('\u{fdfe}', '\u{fdff}'),
+  ('\u{fe1a}', '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'),
+  ('\u{fe67}', '\u{fe67}'), ('\u{fe6c}', '\u{fe6f}'),
+  ('\u{fe75}', '\u{fe75}'), ('\u{fefd}', '\u{ff00}'),
+  ('\u{ffbf}', '\u{ffc1}'), ('\u{ffc8}', '\u{ffc9}'),
+  ('\u{ffd0}', '\u{ffd1}'), ('\u{ffd8}', '\u{ffd9}'),
+  ('\u{ffdd}', '\u{ffdf}'), ('\u{ffe7}', '\u{ffe7}'),
+  ('\u{ffef}', '\u{fffb}'), ('\u{fffe}', '\u{ffff}'),
+  ('\u{1000c}', '\u{1000c}'), ('\u{10027}', '\u{10027}'),
+  ('\u{1003b}', '\u{1003b}'), ('\u{1003e}', '\u{1003e}'),
+  ('\u{1004e}', '\u{1004f}'), ('\u{1005e}', '\u{1007f}'),
+  ('\u{100fb}', '\u{100ff}'), ('\u{10103}', '\u{10106}'),
+  ('\u{10134}', '\u{10136}'), ('\u{1018f}', '\u{1018f}'),
+  ('\u{1019c}', '\u{1019f}'), ('\u{101a1}', '\u{101cf}'),
+  ('\u{101fe}', '\u{1027f}'), ('\u{1029d}', '\u{1029f}'),
+  ('\u{102d1}', '\u{102df}'), ('\u{102fc}', '\u{102ff}'),
+  ('\u{10324}', '\u{1032c}'), ('\u{1034b}', '\u{1034f}'),
+  ('\u{1037b}', '\u{1037f}'), ('\u{1039e}', '\u{1039e}'),
+  ('\u{103c4}', '\u{103c7}'), ('\u{103d6}', '\u{103ff}'),
+  ('\u{1049e}', '\u{1049f}'), ('\u{104aa}', '\u{104af}'),
+  ('\u{104d4}', '\u{104d7}'), ('\u{104fc}', '\u{104ff}'),
+  ('\u{10528}', '\u{1052f}'), ('\u{10564}', '\u{1056e}'),
+  ('\u{10570}', '\u{105ff}'), ('\u{10737}', '\u{1073f}'),
+  ('\u{10756}', '\u{1075f}'), ('\u{10768}', '\u{107ff}'),
+  ('\u{10806}', '\u{10807}'), ('\u{10809}', '\u{10809}'),
+  ('\u{10836}', '\u{10836}'), ('\u{10839}', '\u{1083b}'),
+  ('\u{1083d}', '\u{1083e}'), ('\u{10856}', '\u{10856}'),
+  ('\u{1089f}', '\u{108a6}'), ('\u{108b0}', '\u{108df}'),
+  ('\u{108f3}', '\u{108f3}'), ('\u{108f6}', '\u{108fa}'),
+  ('\u{1091c}', '\u{1091e}'), ('\u{1093a}', '\u{1093e}'),
+  ('\u{10940}', '\u{1097f}'), ('\u{109b8}', '\u{109bb}'),
+  ('\u{109d0}', '\u{109d1}'), ('\u{10a04}', '\u{10a04}'),
+  ('\u{10a07}', '\u{10a0b}'), ('\u{10a14}', '\u{10a14}'),
+  ('\u{10a18}', '\u{10a18}'), ('\u{10a34}', '\u{10a37}'),
+  ('\u{10a3b}', '\u{10a3e}'), ('\u{10a48}', '\u{10a4f}'),
+  ('\u{10a59}', '\u{10a5f}'), ('\u{10aa0}', '\u{10abf}'),
+  ('\u{10ae7}', '\u{10aea}'), ('\u{10af7}', '\u{10aff}'),
+  ('\u{10b36}', '\u{10b38}'), ('\u{10b56}', '\u{10b57}'),
+  ('\u{10b73}', '\u{10b77}'), ('\u{10b92}', '\u{10b98}'),
+  ('\u{10b9d}', '\u{10ba8}'), ('\u{10bb0}', '\u{10bff}'),
+  ('\u{10c49}', '\u{10c7f}'), ('\u{10cb3}', '\u{10cbf}'),
+  ('\u{10cf3}', '\u{10cf9}'), ('\u{10d00}', '\u{10e5f}'),
+  ('\u{10e7f}', '\u{10fff}'), ('\u{1104e}', '\u{11051}'),
+  ('\u{11070}', '\u{1107e}'), ('\u{110bd}', '\u{110bd}'),
+  ('\u{110c2}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'),
+  ('\u{110fa}', '\u{110ff}'), ('\u{11135}', '\u{11135}'),
+  ('\u{11144}', '\u{1114f}'), ('\u{11177}', '\u{1117f}'),
+  ('\u{111ce}', '\u{111cf}'), ('\u{111e0}', '\u{111e0}'),
+  ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'),
+  ('\u{1123f}', '\u{1127f}'), ('\u{11287}', '\u{11287}'),
+  ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'),
+  ('\u{1129e}', '\u{1129e}'), ('\u{112aa}', '\u{112af}'),
+  ('\u{112eb}', '\u{112ef}'), ('\u{112fa}', '\u{112ff}'),
+  ('\u{11304}', '\u{11304}'), ('\u{1130d}', '\u{1130e}'),
+  ('\u{11311}', '\u{11312}'), ('\u{11329}', '\u{11329}'),
+  ('\u{11331}', '\u{11331}'), ('\u{11334}', '\u{11334}'),
+  ('\u{1133a}', '\u{1133b}'), ('\u{11345}', '\u{11346}'),
+  ('\u{11349}', '\u{1134a}'), ('\u{1134e}', '\u{1134f}'),
+  ('\u{11351}', '\u{11356}'), ('\u{11358}', '\u{1135c}'),
+  ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'),
+  ('\u{11375}', '\u{113ff}'), ('\u{1145a}', '\u{1145a}'),
+  ('\u{1145c}', '\u{1145c}'), ('\u{1145e}', '\u{1147f}'),
+  ('\u{114c8}', '\u{114cf}'), ('\u{114da}', '\u{1157f}'),
+  ('\u{115b6}', '\u{115b7}'), ('\u{115de}', '\u{115ff}'),
+  ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'),
+  ('\u{1166d}', '\u{1167f}'), ('\u{116b8}', '\u{116bf}'),
+  ('\u{116ca}', '\u{116ff}'), ('\u{1171a}', '\u{1171c}'),
+  ('\u{1172c}', '\u{1172f}'), ('\u{11740}', '\u{1189f}'),
+  ('\u{118f3}', '\u{118fe}'), ('\u{11900}', '\u{119ff}'),
+  ('\u{11a48}', '\u{11a4f}'), ('\u{11a84}', '\u{11a85}'),
+  ('\u{11a9d}', '\u{11a9d}'), ('\u{11aa3}', '\u{11abf}'),
+  ('\u{11af9}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'),
+  ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'),
+  ('\u{11c6d}', '\u{11c6f}'), ('\u{11c90}', '\u{11c91}'),
+  ('\u{11ca8}', '\u{11ca8}'), ('\u{11cb7}', '\u{11cff}'),
+  ('\u{11d07}', '\u{11d07}'), ('\u{11d0a}', '\u{11d0a}'),
+  ('\u{11d37}', '\u{11d39}'), ('\u{11d3b}', '\u{11d3b}'),
+  ('\u{11d3e}', '\u{11d3e}'), ('\u{11d48}', '\u{11d4f}'),
+  ('\u{11d5a}', '\u{11fff}'), ('\u{1239a}', '\u{123ff}'),
+  ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'),
+  ('\u{12544}', '\u{12fff}'), ('\u{1342f}', '\u{143ff}'),
+  ('\u{14647}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'),
+  ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'),
+  ('\u{16a70}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'),
+  ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'),
+  ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'),
+  ('\u{16b78}', '\u{16b7c}'), ('\u{16b90}', '\u{16eff}'),
+  ('\u{16f45}', '\u{16f4f}'), ('\u{16f7f}', '\u{16f8e}'),
+  ('\u{16fa0}', '\u{16fdf}'), ('\u{16fe2}', '\u{16fff}'),
+  ('\u{187ed}', '\u{187ff}'), ('\u{18af3}', '\u{1afff}'),
+  ('\u{1b11f}', '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'),
+  ('\u{1bc6b}', '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'),
+  ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'),
+  ('\u{1bca0}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'),
+  ('\u{1d127}', '\u{1d128}'), ('\u{1d173}', '\u{1d17a}'),
+  ('\u{1d1e9}', '\u{1d1ff}'), ('\u{1d246}', '\u{1d2ff}'),
+  ('\u{1d357}', '\u{1d35f}'), ('\u{1d372}', '\u{1d3ff}'),
+  ('\u{1d455}', '\u{1d455}'), ('\u{1d49d}', '\u{1d49d}'),
+  ('\u{1d4a0}', '\u{1d4a1}'), ('\u{1d4a3}', '\u{1d4a4}'),
+  ('\u{1d4a7}', '\u{1d4a8}'), ('\u{1d4ad}', '\u{1d4ad}'),
+  ('\u{1d4ba}', '\u{1d4ba}'), ('\u{1d4bc}', '\u{1d4bc}'),
+  ('\u{1d4c4}', '\u{1d4c4}'), ('\u{1d506}', '\u{1d506}'),
+  ('\u{1d50b}', '\u{1d50c}'), ('\u{1d515}', '\u{1d515}'),
+  ('\u{1d51d}', '\u{1d51d}'), ('\u{1d53a}', '\u{1d53a}'),
+  ('\u{1d53f}', '\u{1d53f}'), ('\u{1d545}', '\u{1d545}'),
+  ('\u{1d547}', '\u{1d549}'), ('\u{1d551}', '\u{1d551}'),
+  ('\u{1d6a6}', '\u{1d6a7}'), ('\u{1d7cc}', '\u{1d7cd}'),
+  ('\u{1da8c}', '\u{1da9a}'), ('\u{1daa0}', '\u{1daa0}'),
+  ('\u{1dab0}', '\u{1dfff}'), ('\u{1e007}', '\u{1e007}'),
+  ('\u{1e019}', '\u{1e01a}'), ('\u{1e022}', '\u{1e022}'),
+  ('\u{1e025}', '\u{1e025}'), ('\u{1e02b}', '\u{1e7ff}'),
+  ('\u{1e8c5}', '\u{1e8c6}'), ('\u{1e8d7}', '\u{1e8ff}'),
+  ('\u{1e94b}', '\u{1e94f}'), ('\u{1e95a}', '\u{1e95d}'),
+  ('\u{1e960}', '\u{1edff}'), ('\u{1ee04}', '\u{1ee04}'),
+  ('\u{1ee20}', '\u{1ee20}'), ('\u{1ee23}', '\u{1ee23}'),
+  ('\u{1ee25}', '\u{1ee26}'), ('\u{1ee28}', '\u{1ee28}'),
+  ('\u{1ee33}', '\u{1ee33}'), ('\u{1ee38}', '\u{1ee38}'),
+  ('\u{1ee3a}', '\u{1ee3a}'), ('\u{1ee3c}', '\u{1ee41}'),
+  ('\u{1ee43}', '\u{1ee46}'), ('\u{1ee48}', '\u{1ee48}'),
+  ('\u{1ee4a}', '\u{1ee4a}'), ('\u{1ee4c}', '\u{1ee4c}'),
+  ('\u{1ee50}', '\u{1ee50}'), ('\u{1ee53}', '\u{1ee53}'),
+  ('\u{1ee55}', '\u{1ee56}'), ('\u{1ee58}', '\u{1ee58}'),
+  ('\u{1ee5a}', '\u{1ee5a}'), ('\u{1ee5c}', '\u{1ee5c}'),
+  ('\u{1ee5e}', '\u{1ee5e}'), ('\u{1ee60}', '\u{1ee60}'),
+  ('\u{1ee63}', '\u{1ee63}'), ('\u{1ee65}', '\u{1ee66}'),
+  ('\u{1ee6b}', '\u{1ee6b}'), ('\u{1ee73}', '\u{1ee73}'),
+  ('\u{1ee78}', '\u{1ee78}'), ('\u{1ee7d}', '\u{1ee7d}'),
+  ('\u{1ee7f}', '\u{1ee7f}'), ('\u{1ee8a}', '\u{1ee8a}'),
+  ('\u{1ee9c}', '\u{1eea0}'), ('\u{1eea4}', '\u{1eea4}'),
+  ('\u{1eeaa}', '\u{1eeaa}'), ('\u{1eebc}', '\u{1eeef}'),
+  ('\u{1eef2}', '\u{1efff}'), ('\u{1f02c}', '\u{1f02f}'),
+  ('\u{1f094}', '\u{1f09f}'), ('\u{1f0af}', '\u{1f0b0}'),
+  ('\u{1f0c0}', '\u{1f0c0}'), ('\u{1f0d0}', '\u{1f0d0}'),
+  ('\u{1f0f6}', '\u{1f0ff}'), ('\u{1f10d}', '\u{1f10f}'),
+  ('\u{1f12f}', '\u{1f12f}'), ('\u{1f16c}', '\u{1f16f}'),
+  ('\u{1f1ad}', '\u{1f1e5}'), ('\u{1f203}', '\u{1f20f}'),
+  ('\u{1f23c}', '\u{1f23f}'), ('\u{1f249}', '\u{1f24f}'),
+  ('\u{1f252}', '\u{1f25f}'), ('\u{1f266}', '\u{1f2ff}'),
+  ('\u{1f6d5}', '\u{1f6df}'), ('\u{1f6ed}', '\u{1f6ef}'),
+  ('\u{1f6f9}', '\u{1f6ff}'), ('\u{1f774}', '\u{1f77f}'),
+  ('\u{1f7d5}', '\u{1f7ff}'), ('\u{1f80c}', '\u{1f80f}'),
+  ('\u{1f848}', '\u{1f84f}'), ('\u{1f85a}', '\u{1f85f}'),
+  ('\u{1f888}', '\u{1f88f}'), ('\u{1f8ae}', '\u{1f8ff}'),
+  ('\u{1f90c}', '\u{1f90f}'), ('\u{1f93f}', '\u{1f93f}'),
+  ('\u{1f94d}', '\u{1f94f}'), ('\u{1f96c}', '\u{1f97f}'),
+  ('\u{1f998}', '\u{1f9bf}'), ('\u{1f9c1}', '\u{1f9cf}'),
+  ('\u{1f9e7}', '\u{1ffff}'), ('\u{2a6d7}', '\u{2a6ff}'),
+  ('\u{2b735}', '\u{2b73f}'), ('\u{2b81e}', '\u{2b81f}'),
+  ('\u{2cea2}', '\u{2ceaf}'), ('\u{2ebe1}', '\u{2f7ff}'),
+  ('\u{2fa1e}', '\u{e00ff}'), ('\u{e01f0}', '\u{10ffff}'),
+];
+
+pub const OTHER_LETTER: &'static [(char, char)] = &[
+  ('ª', 'ª'), ('º', 'º'), ('ƻ', 'ƻ'), ('ǀ', 'ǃ'), ('ʔ', 'ʔ'),
+  ('א', 'ת'), ('װ', 'ײ'), ('ؠ', 'ؿ'), ('ف', 'ي'), ('ٮ', 'ٯ'),
+  ('ٱ', 'ۓ'), ('ە', 'ە'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'), ('ۿ', 'ۿ'),
+  ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'), ('ߊ', 'ߪ'),
+  ('ࠀ', 'ࠕ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'),
+  ('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'), ('ॐ', 'ॐ'),
+  ('क़', 'ॡ'), ('ॲ', 'ঀ'), ('অ', 'ঌ'), ('এ', 'ঐ'),
+  ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'),
+  ('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'), ('য়', 'ৡ'),
+  ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'),
+  ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'),
+  ('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('ੲ', 'ੴ'),
+  ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'),
+  ('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'), ('ૐ', 'ૐ'),
+  ('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'),
+  ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'),
+  ('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'), ('ୱ', 'ୱ'),
+  ('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
+  ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
+  ('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'), ('అ', 'ఌ'),
+  ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ఽ'),
+  ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'), ('ಅ', 'ಌ'),
+  ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
+  ('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), ('ೱ', 'ೲ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ഽ'),
+  ('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'), ('ൺ', 'ൿ'),
+  ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'), ('ල', 'ල'),
+  ('ව', 'ෆ'), ('ก', 'ะ'), ('า', 'ำ'), ('เ', 'ๅ'),
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ະ'),
+  ('າ', 'ຳ'), ('ຽ', 'ຽ'), ('ເ', 'ໄ'), ('ໜ', 'ໟ'),
+  ('ༀ', 'ༀ'), ('ཀ', 'ཇ'), ('ཉ', 'ཬ'), ('ྈ', 'ྌ'),
+  ('က', 'ဪ'), ('ဿ', 'ဿ'), ('ၐ', 'ၕ'), ('ၚ', 'ၝ'),
+  ('ၡ', 'ၡ'), ('ၥ', 'ၦ'), ('ၮ', 'ၰ'), ('ၵ', 'ႁ'),
+  ('ႎ', 'ႎ'), ('ა', 'ჺ'), ('ჽ', 'ቈ'), ('ቊ', 'ቍ'),
+  ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኈ'),
+  ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'), ('ኸ', 'ኾ'),
+  ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'), ('ዘ', 'ጐ'),
+  ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'), ('ᐁ', 'ᙬ'),
+  ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛱ', 'ᛸ'),
+  ('ᜀ', 'ᜌ'), ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('ᝀ', 'ᝑ'),
+  ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ក', 'ឳ'), ('ៜ', 'ៜ'),
+  ('ᠠ', 'ᡂ'), ('ᡄ', 'ᡷ'), ('ᢀ', 'ᢄ'), ('ᢇ', 'ᢨ'),
+  ('ᢪ', 'ᢪ'), ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᥐ', 'ᥭ'),
+  ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('ᨀ', 'ᨖ'),
+  ('ᨠ', 'ᩔ'), ('ᬅ', 'ᬳ'), ('ᭅ', 'ᭋ'), ('ᮃ', 'ᮠ'),
+  ('ᮮ', 'ᮯ'), ('ᮺ', 'ᯥ'), ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'),
+  ('ᱚ', 'ᱷ'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳱ'), ('ᳵ', 'ᳶ'),
+  ('ℵ', 'ℸ'), ('ⴰ', 'ⵧ'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('〆', '〆'),
+  ('〼', '〼'), ('ぁ', 'ゖ'), ('ゟ', 'ゟ'), ('ァ', 'ヺ'),
+  ('ヿ', 'ヿ'), ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'),
+  ('ㇰ', 'ㇿ'), ('㐀', '䶵'), ('一', '鿪'), ('ꀀ', 'ꀔ'),
+  ('ꀖ', 'ꒌ'), ('ꓐ', 'ꓷ'), ('ꔀ', 'ꘋ'), ('ꘐ', 'ꘟ'),
+  ('ꘪ', 'ꘫ'), ('ꙮ', 'ꙮ'), ('ꚠ', 'ꛥ'), ('ꞏ', 'ꞏ'),
+  ('ꟷ', 'ꟷ'), ('ꟻ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'),
+  ('ꠌ', 'ꠢ'), ('ꡀ', 'ꡳ'), ('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'),
+  ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'),
+  ('ꥠ', 'ꥼ'), ('ꦄ', 'ꦲ'), ('ꧠ', 'ꧤ'), ('ꧧ', 'ꧯ'),
+  ('ꧺ', 'ꧾ'), ('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'), ('ꩄ', 'ꩋ'),
+  ('ꩠ', 'ꩯ'), ('ꩱ', 'ꩶ'), ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪯ'),
+  ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'),
+  ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫜ'), ('ꫠ', 'ꫪ'), ('ꫲ', 'ꫲ'),
+  ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'),
+  ('ꬨ', 'ꬮ'), ('ꯀ', 'ꯢ'), ('가', '힣'), ('ힰ', 'ퟆ'),
+  ('ퟋ', 'ퟻ'), ('豈', '舘'), ('並', '龎'), ('יִ', 'יִ'),
+  ('ײַ', 'ﬨ'), ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'), ('ﹰ', 'ﹴ'),
+  ('ﹶ', 'ﻼ'), ('ｦ', 'ｯ'), ('ｱ', 'ﾝ'), ('ﾠ', 'ﾾ'),
+  ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐊀', '𐊜'),
+  ('𐊠', '𐋐'), ('𐌀', '𐌟'), ('𐌭', '𐍀'), ('𐍂', '𐍉'),
+  ('𐍐', '𐍵'), ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'),
+  ('𐑐', '𐒝'), ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'),
+  ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'),
+  ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'),
+  ('𐡠', '𐡶'), ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'),
+  ('𐤀', '𐤕'), ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'),
+  ('𐨀', '𐨀'), ('𐨐', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'),
+  ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫤'),
+  ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'),
+  ('𐰀', '𐱈'), ('𑀃', '𑀷'), ('𑂃', '𑂯'), ('𑃐', '𑃨'),
+  ('𑄃', '𑄦'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'),
+  ('𑇁', '𑇄'), ('𑇚', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'),
+  ('𑈓', '𑈫'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋞'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'),
+  ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑒀', '𑒯'), ('𑓄', '𑓅'),
+  ('𑓇', '𑓇'), ('𑖀', '𑖮'), ('𑗘', '𑗛'), ('𑘀', '𑘯'),
+  ('𑙄', '𑙄'), ('𑚀', '𑚪'), ('𑜀', '𑜙'), ('𑣿', '𑣿'),
+  ('𑨀', '𑨀'), ('𑨋', '𑨲'), ('𑨺', '𑨺'), ('𑩐', '𑩐'),
+  ('𑩜', '𑪃'), ('𑪆', '𑪉'), ('𑫀', '𑫸'), ('𑰀', '𑰈'),
+  ('𑰊', '𑰮'), ('𑱀', '𑱀'), ('𑱲', '𑲏'), ('𑴀', '𑴆'),
+  ('𑴈', '𑴉'), ('𑴋', '𑴰'), ('𑵆', '𑵆'), ('𒀀', '𒎙'),
+  ('𒒀', '𒕃'), ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'),
+  ('𖩀', '𖩞'), ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽐'), ('𗀀', '𘟬'),
+  ('𘠀', '𘫲'), ('𛀀', '𛄞'), ('𛅰', '𛋻'), ('𛰀', '𛱪'),
+  ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𞠀', '𞣄'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'),
+  ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const OTHER_NUMBER: &'static [(char, char)] = &[
+  ('²', '³'), ('¹', '¹'), ('¼', '¾'), ('৴', '৹'), ('୲', '୷'),
+  ('௰', '௲'), ('౸', '౾'), ('൘', '൞'), ('൰', '൸'),
+  ('༪', '༳'), ('፩', '፼'), ('៰', '៹'), ('᧚', '᧚'),
+  ('⁰', '⁰'), ('⁴', '⁹'), ('₀', '₉'), ('⅐', '⅟'),
+  ('↉', '↉'), ('①', '⒛'), ('⓪', '⓿'), ('❶', '➓'),
+  ('⳽', '⳽'), ('㆒', '㆕'), ('㈠', '㈩'), ('㉈', '㉏'),
+  ('㉑', '㉟'), ('㊀', '㊉'), ('㊱', '㊿'), ('꠰', '꠵'),
+  ('𐄇', '𐄳'), ('𐅵', '𐅸'), ('𐆊', '𐆋'), ('𐋡', '𐋻'),
+  ('𐌠', '𐌣'), ('𐡘', '𐡟'), ('𐡹', '𐡿'), ('𐢧', '𐢯'),
+  ('𐣻', '𐣿'), ('𐤖', '𐤛'), ('𐦼', '𐦽'), ('𐧀', '𐧏'),
+  ('𐧒', '𐧿'), ('𐩀', '𐩇'), ('𐩽', '𐩾'), ('𐪝', '𐪟'),
+  ('𐫫', '𐫯'), ('𐭘', '𐭟'), ('𐭸', '𐭿'), ('𐮩', '𐮯'),
+  ('𐳺', '𐳿'), ('𐹠', '𐹾'), ('𑁒', '𑁥'), ('𑇡', '𑇴'),
+  ('𑜺', '𑜻'), ('𑣪', '𑣲'), ('𑱚', '𑱬'), ('𖭛', '𖭡'),
+  ('𝍠', '𝍱'), ('𞣇', '𞣏'), ('🄀', '🄌'),
+];
+
+pub const OTHER_PUNCTUATION: &'static [(char, char)] = &[
+  ('!', '#'), ('%', '\''), ('*', '*'), (',', ','), ('.', '/'), (':', ';'),
+  ('?', '@'), ('\\', '\\'), ('¡', '¡'), ('§', '§'), ('¶', '·'),
+  ('¿', '¿'), (';', ';'), ('·', '·'), ('՚', '՟'), ('։', '։'),
+  ('׀', '׀'), ('׃', '׃'), ('׆', '׆'), ('׳', '״'), ('؉', '؊'),
+  ('،', '؍'), ('؛', '؛'), ('؞', '؟'), ('٪', '٭'), ('۔', '۔'),
+  ('܀', '܍'), ('߷', '߹'), ('࠰', '࠾'), ('࡞', '࡞'), ('।', '॥'),
+  ('॰', '॰'), ('৽', '৽'), ('૰', '૰'), ('෴', '෴'),
+  ('๏', '๏'), ('๚', '๛'), ('༄', '༒'), ('༔', '༔'),
+  ('྅', '྅'), ('࿐', '࿔'), ('࿙', '࿚'), ('၊', '၏'),
+  ('჻', '჻'), ('፠', '፨'), ('᙭', '᙮'), ('᛫', '᛭'),
+  ('᜵', '᜶'), ('។', '៖'), ('៘', '៚'), ('᠀', '᠅'),
+  ('᠇', '᠊'), ('᥄', '᥅'), ('᨞', '᨟'), ('᪠', '᪦'),
+  ('᪨', '᪭'), ('᭚', '᭠'), ('᯼', '᯿'), ('᰻', '᰿'),
+  ('᱾', '᱿'), ('᳀', '᳇'), ('᳓', '᳓'), ('‖', '‗'),
+  ('†', '‧'), ('‰', '‸'), ('※', '‾'), ('⁁', '⁃'),
+  ('⁇', '⁑'), ('⁓', '⁓'), ('⁕', '⁞'), ('⳹', '⳼'),
+  ('⳾', '⳿'), ('⵰', '⵰'), ('⸀', '⸁'), ('⸆', '⸈'),
+  ('⸋', '⸋'), ('⸎', '⸖'), ('⸘', '⸙'), ('⸛', '⸛'),
+  ('⸞', '⸟'), ('⸪', '⸮'), ('⸰', '⸹'), ('⸼', '⸿'),
+  ('⹁', '⹁'), ('⹃', '⹉'), ('、', '〃'), ('〽', '〽'),
+  ('・', '・'), ('꓾', '꓿'), ('꘍', '꘏'), ('꙳', '꙳'),
+  ('꙾', '꙾'), ('꛲', '꛷'), ('꡴', '꡷'), ('꣎', '꣏'),
+  ('꣸', '꣺'), ('꣼', '꣼'), ('꤮', '꤯'), ('꥟', '꥟'),
+  ('꧁', '꧍'), ('꧞', '꧟'), ('꩜', '꩟'), ('꫞', '꫟'),
+  ('꫰', '꫱'), ('꯫', '꯫'), ('︐', '︖'), ('︙', '︙'),
+  ('︰', '︰'), ('﹅', '﹆'), ('﹉', '﹌'), ('﹐', '﹒'),
+  ('﹔', '﹗'), ('﹟', '﹡'), ('﹨', '﹨'), ('﹪', '﹫'),
+  ('！', '＃'), ('％', '＇'), ('＊', '＊'), ('，', '，'),
+  ('．', '／'), ('：', '；'), ('？', '＠'), ('＼', '＼'),
+  ('｡', '｡'), ('､', '･'), ('𐄀', '𐄂'), ('𐎟', '𐎟'),
+  ('𐏐', '𐏐'), ('𐕯', '𐕯'), ('𐡗', '𐡗'), ('𐤟', '𐤟'),
+  ('𐤿', '𐤿'), ('𐩐', '𐩘'), ('𐩿', '𐩿'), ('𐫰', '𐫶'),
+  ('𐬹', '𐬿'), ('𐮙', '𐮜'), ('𑁇', '𑁍'), ('𑂻', '𑂼'),
+  ('𑂾', '𑃁'), ('𑅀', '𑅃'), ('𑅴', '𑅵'), ('𑇅', '𑇉'),
+  ('𑇍', '𑇍'), ('𑇛', '𑇛'), ('𑇝', '𑇟'), ('𑈸', '𑈽'),
+  ('𑊩', '𑊩'), ('𑑋', '𑑏'), ('𑑛', '𑑛'), ('𑑝', '𑑝'),
+  ('𑓆', '𑓆'), ('𑗁', '𑗗'), ('𑙁', '𑙃'), ('𑙠', '𑙬'),
+  ('𑜼', '𑜾'), ('𑨿', '𑩆'), ('𑪚', '𑪜'), ('𑪞', '𑪢'),
+  ('𑱁', '𑱅'), ('𑱰', '𑱱'), ('𒑰', '𒑴'), ('𖩮', '𖩯'),
+  ('𖫵', '𖫵'), ('𖬷', '𖬻'), ('𖭄', '𖭄'), ('𛲟', '𛲟'),
+  ('𝪇', '𝪋'), ('𞥞', '𞥟'),
+];
+
+pub const OTHER_SYMBOL: &'static [(char, char)] = &[
+  ('¦', '¦'), ('©', '©'), ('®', '®'), ('°', '°'), ('҂', '҂'),
+  ('֍', '֎'), ('؎', '؏'), ('۞', '۞'), ('۩', '۩'), ('۽', '۾'),
+  ('߶', '߶'), ('৺', '৺'), ('୰', '୰'), ('௳', '௸'),
+  ('௺', '௺'), ('౿', '౿'), ('൏', '൏'), ('൹', '൹'),
+  ('༁', '༃'), ('༓', '༓'), ('༕', '༗'), ('༚', '༟'),
+  ('༴', '༴'), ('༶', '༶'), ('༸', '༸'), ('྾', '࿅'),
+  ('࿇', '࿌'), ('࿎', '࿏'), ('࿕', '࿘'), ('႞', '႟'),
+  ('᎐', '᎙'), ('᥀', '᥀'), ('᧞', '᧿'), ('᭡', '᭪'),
+  ('᭴', '᭼'), ('℀', '℁'), ('℃', '℆'), ('℈', '℉'),
+  ('℔', '℔'), ('№', '℗'), ('℞', '℣'), ('℥', '℥'),
+  ('℧', '℧'), ('℩', '℩'), ('℮', '℮'), ('℺', '℻'),
+  ('⅊', '⅊'), ('⅌', '⅍'), ('⅏', '⅏'), ('↊', '↋'),
+  ('↕', '↙'), ('↜', '↟'), ('↡', '↢'), ('↤', '↥'),
+  ('↧', '↭'), ('↯', '⇍'), ('⇐', '⇑'), ('⇓', '⇓'),
+  ('⇕', '⇳'), ('⌀', '⌇'), ('⌌', '⌟'), ('⌢', '⌨'),
+  ('⌫', '⍻'), ('⍽', '⎚'), ('⎴', '⏛'), ('⏢', '␦'),
+  ('⑀', '⑊'), ('⒜', 'ⓩ'), ('─', '▶'), ('▸', '◀'),
+  ('◂', '◷'), ('☀', '♮'), ('♰', '❧'), ('➔', '➿'),
+  ('⠀', '⣿'), ('⬀', '⬯'), ('⭅', '⭆'), ('⭍', '⭳'),
+  ('⭶', '⮕'), ('⮘', '⮹'), ('⮽', '⯈'), ('⯊', '⯒'),
+  ('⯬', '⯯'), ('⳥', '⳪'), ('⺀', '⺙'), ('⺛', '⻳'),
+  ('⼀', '⿕'), ('⿰', '⿻'), ('〄', '〄'), ('〒', '〓'),
+  ('〠', '〠'), ('〶', '〷'), ('〾', '〿'), ('㆐', '㆑'),
+  ('㆖', '㆟'), ('㇀', '㇣'), ('㈀', '㈞'), ('㈪', '㉇'),
+  ('㉐', '㉐'), ('㉠', '㉿'), ('㊊', '㊰'), ('㋀', '㋾'),
+  ('㌀', '㏿'), ('䷀', '䷿'), ('꒐', '꓆'), ('꠨', '꠫'),
+  ('꠶', '꠷'), ('꠹', '꠹'), ('꩷', '꩹'), ('﷽', '﷽'),
+  ('￤', '￤'), ('￨', '￨'), ('￭', '￮'), ('', '�'),
+  ('𐄷', '𐄿'), ('𐅹', '𐆉'), ('𐆌', '𐆎'), ('𐆐', '𐆛'),
+  ('𐆠', '𐆠'), ('𐇐', '𐇼'), ('𐡷', '𐡸'), ('𐫈', '𐫈'),
+  ('𑜿', '𑜿'), ('𖬼', '𖬿'), ('𖭅', '𖭅'), ('𛲜', '𛲜'),
+  ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'), ('𝅪', '𝅬'),
+  ('𝆃', '𝆄'), ('𝆌', '𝆩'), ('𝆮', '𝇨'), ('𝈀', '𝉁'),
+  ('𝉅', '𝉅'), ('𝌀', '𝍖'), ('𝠀', '𝧿'), ('𝨷', '𝨺'),
+  ('𝩭', '𝩴'), ('𝩶', '𝪃'), ('𝪅', '𝪆'), ('🀀', '🀫'),
+  ('🀰', '🂓'), ('🂠', '🂮'), ('🂱', '🂿'), ('🃁', '🃏'),
+  ('🃑', '🃵'), ('🄐', '🄮'), ('🄰', '🅫'), ('🅰', '🆬'),
+  ('🇦', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'),
+  ('🉠', '🉥'), ('🌀', '🏺'), ('🐀', '🛔'), ('🛠', '🛬'),
+  ('🛰', '🛸'), ('🜀', '🝳'), ('🞀', '🟔'), ('🠀', '🠋'),
+  ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'),
+  ('🤀', '🤋'), ('🤐', '🤾'), ('🥀', '🥌'), ('🥐', '🥫'),
+  ('🦀', '🦗'), ('🧀', '🧀'), ('🧐', '🧦'),
+];
+
+pub const PARAGRAPH_SEPARATOR: &'static [(char, char)] = &[
+  ('\u{2029}', '\u{2029}'),
+];
+
+pub const PRIVATE_USE: &'static [(char, char)] = &[
+  ('\u{e000}', '\u{f8ff}'), ('\u{f0000}', '\u{ffffd}'),
+  ('\u{100000}', '\u{10fffd}'),
+];
+
+pub const PUNCTUATION: &'static [(char, char)] = &[
+  ('!', '#'), ('%', '*'), (',', '/'), (':', ';'), ('?', '@'), ('[', ']'),
+  ('_', '_'), ('{', '{'), ('}', '}'), ('¡', '¡'), ('§', '§'),
+  ('«', '«'), ('¶', '·'), ('»', '»'), ('¿', '¿'), (';', ';'),
+  ('·', '·'), ('՚', '՟'), ('։', '֊'), ('־', '־'), ('׀', '׀'),
+  ('׃', '׃'), ('׆', '׆'), ('׳', '״'), ('؉', '؊'), ('،', '؍'),
+  ('؛', '؛'), ('؞', '؟'), ('٪', '٭'), ('۔', '۔'), ('܀', '܍'),
+  ('߷', '߹'), ('࠰', '࠾'), ('࡞', '࡞'), ('।', '॥'),
+  ('॰', '॰'), ('৽', '৽'), ('૰', '૰'), ('෴', '෴'),
+  ('๏', '๏'), ('๚', '๛'), ('༄', '༒'), ('༔', '༔'),
+  ('༺', '༽'), ('྅', '྅'), ('࿐', '࿔'), ('࿙', '࿚'),
+  ('၊', '၏'), ('჻', '჻'), ('፠', '፨'), ('᐀', '᐀'),
+  ('᙭', '᙮'), ('᚛', '᚜'), ('᛫', '᛭'), ('᜵', '᜶'),
+  ('។', '៖'), ('៘', '៚'), ('᠀', '᠊'), ('᥄', '᥅'),
+  ('᨞', '᨟'), ('᪠', '᪦'), ('᪨', '᪭'), ('᭚', '᭠'),
+  ('᯼', '᯿'), ('᰻', '᰿'), ('᱾', '᱿'), ('᳀', '᳇'),
+  ('᳓', '᳓'), ('‐', '‧'), ('‰', '⁃'), ('⁅', '⁑'),
+  ('⁓', '⁞'), ('⁽', '⁾'), ('₍', '₎'), ('⌈', '⌋'),
+  ('〈', '〉'), ('❨', '❵'), ('⟅', '⟆'), ('⟦', '⟯'),
+  ('⦃', '⦘'), ('⧘', '⧛'), ('⧼', '⧽'), ('⳹', '⳼'),
+  ('⳾', '⳿'), ('⵰', '⵰'), ('⸀', '⸮'), ('⸰', '⹉'),
+  ('、', '〃'), ('〈', '】'), ('〔', '〟'), ('〰', '〰'),
+  ('〽', '〽'), ('゠', '゠'), ('・', '・'), ('꓾', '꓿'),
+  ('꘍', '꘏'), ('꙳', '꙳'), ('꙾', '꙾'), ('꛲', '꛷'),
+  ('꡴', '꡷'), ('꣎', '꣏'), ('꣸', '꣺'), ('꣼', '꣼'),
+  ('꤮', '꤯'), ('꥟', '꥟'), ('꧁', '꧍'), ('꧞', '꧟'),
+  ('꩜', '꩟'), ('꫞', '꫟'), ('꫰', '꫱'), ('꯫', '꯫'),
+  ('﴾', '﴿'), ('︐', '︙'), ('︰', '﹒'), ('﹔', '﹡'),
+  ('﹣', '﹣'), ('﹨', '﹨'), ('﹪', '﹫'), ('！', '＃'),
+  ('％', '＊'), ('，', '／'), ('：', '；'), ('？', '＠'),
+  ('［', '］'), ('＿', '＿'), ('｛', '｛'), ('｝', '｝'),
+  ('｟', '･'), ('𐄀', '𐄂'), ('𐎟', '𐎟'), ('𐏐', '𐏐'),
+  ('𐕯', '𐕯'), ('𐡗', '𐡗'), ('𐤟', '𐤟'), ('𐤿', '𐤿'),
+  ('𐩐', '𐩘'), ('𐩿', '𐩿'), ('𐫰', '𐫶'), ('𐬹', '𐬿'),
+  ('𐮙', '𐮜'), ('𑁇', '𑁍'), ('𑂻', '𑂼'), ('𑂾', '𑃁'),
+  ('𑅀', '𑅃'), ('𑅴', '𑅵'), ('𑇅', '𑇉'), ('𑇍', '𑇍'),
+  ('𑇛', '𑇛'), ('𑇝', '𑇟'), ('𑈸', '𑈽'), ('𑊩', '𑊩'),
+  ('𑑋', '𑑏'), ('𑑛', '𑑛'), ('𑑝', '𑑝'), ('𑓆', '𑓆'),
+  ('𑗁', '𑗗'), ('𑙁', '𑙃'), ('𑙠', '𑙬'), ('𑜼', '𑜾'),
+  ('𑨿', '𑩆'), ('𑪚', '𑪜'), ('𑪞', '𑪢'), ('𑱁', '𑱅'),
+  ('𑱰', '𑱱'), ('𒑰', '𒑴'), ('𖩮', '𖩯'), ('𖫵', '𖫵'),
+  ('𖬷', '𖬻'), ('𖭄', '𖭄'), ('𛲟', '𛲟'), ('𝪇', '𝪋'),
+  ('𞥞', '𞥟'),
+];
+
+pub const SEPARATOR: &'static [(char, char)] = &[
+  (' ', ' '), ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'),
+  ('\u{2000}', '\u{200a}'), ('\u{2028}', '\u{2029}'),
+  ('\u{202f}', '\u{202f}'), ('\u{205f}', '\u{205f}'),
+  ('\u{3000}', '\u{3000}'),
+];
+
+pub const SPACE_SEPARATOR: &'static [(char, char)] = &[
+  (' ', ' '), ('\u{a0}', '\u{a0}'), ('\u{1680}', '\u{1680}'),
+  ('\u{2000}', '\u{200a}'), ('\u{202f}', '\u{202f}'),
+  ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+];
+
+pub const SPACING_MARK: &'static [(char, char)] = &[
+  ('ः', 'ः'), ('ऻ', 'ऻ'), ('ा', 'ी'), ('ॉ', 'ौ'),
+  ('ॎ', 'ॏ'), ('ং', 'ঃ'), ('া', 'ী'), ('ে', 'ৈ'),
+  ('ো', 'ৌ'), ('ৗ', 'ৗ'), ('ਃ', 'ਃ'), ('ਾ', 'ੀ'),
+  ('ઃ', 'ઃ'), ('ા', 'ી'), ('ૉ', 'ૉ'), ('ો', 'ૌ'),
+  ('ଂ', 'ଃ'), ('ା', 'ା'), ('ୀ', 'ୀ'), ('େ', 'ୈ'),
+  ('ୋ', 'ୌ'), ('ୗ', 'ୗ'), ('ா', 'ி'), ('ு', 'ூ'),
+  ('ெ', 'ை'), ('ொ', 'ௌ'), ('ௗ', 'ௗ'), ('ఁ', 'ః'),
+  ('ు', 'ౄ'), ('ಂ', 'ಃ'), ('ಾ', 'ಾ'), ('ೀ', 'ೄ'),
+  ('ೇ', 'ೈ'), ('ೊ', 'ೋ'), ('ೕ', 'ೖ'), ('ം', 'ഃ'),
+  ('ാ', 'ീ'), ('െ', 'ൈ'), ('ൊ', 'ൌ'), ('ൗ', 'ൗ'),
+  ('ං', 'ඃ'), ('ා', 'ෑ'), ('ෘ', 'ෟ'), ('ෲ', 'ෳ'),
+  ('༾', '༿'), ('ཿ', 'ཿ'), ('ါ', 'ာ'), ('ေ', 'ေ'),
+  ('း', 'း'), ('ျ', 'ြ'), ('ၖ', 'ၗ'), ('ၢ', 'ၤ'),
+  ('ၧ', 'ၭ'), ('ႃ', 'ႄ'), ('ႇ', 'ႌ'), ('ႏ', 'ႏ'),
+  ('ႚ', 'ႜ'), ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'),
+  ('ᤣ', 'ᤦ'), ('ᤩ', 'ᤫ'), ('ᤰ', 'ᤱ'), ('ᤳ', 'ᤸ'),
+  ('ᨙ', 'ᨚ'), ('ᩕ', 'ᩕ'), ('ᩗ', 'ᩗ'), ('ᩡ', 'ᩡ'),
+  ('ᩣ', 'ᩤ'), ('ᩭ', 'ᩲ'), ('ᬄ', 'ᬄ'), ('ᬵ', 'ᬵ'),
+  ('ᬻ', 'ᬻ'), ('ᬽ', 'ᭁ'), ('ᭃ', '᭄'), ('ᮂ', 'ᮂ'),
+  ('ᮡ', 'ᮡ'), ('ᮦ', 'ᮧ'), ('᮪', '᮪'), ('ᯧ', 'ᯧ'),
+  ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'), ('᯲', '᯳'), ('ᰤ', 'ᰫ'),
+  ('ᰴ', 'ᰵ'), ('᳡', '᳡'), ('ᳲ', 'ᳳ'), ('᳷', '᳷'),
+  ('〮', '〯'), ('ꠣ', 'ꠤ'), ('ꠧ', 'ꠧ'), ('ꢀ', 'ꢁ'),
+  ('ꢴ', 'ꣃ'), ('ꥒ', '꥓'), ('ꦃ', 'ꦃ'), ('ꦴ', 'ꦵ'),
+  ('ꦺ', 'ꦻ'), ('ꦽ', '꧀'), ('ꨯ', 'ꨰ'), ('ꨳ', 'ꨴ'),
+  ('ꩍ', 'ꩍ'), ('ꩻ', 'ꩻ'), ('ꩽ', 'ꩽ'), ('ꫫ', 'ꫫ'),
+  ('ꫮ', 'ꫯ'), ('ꫵ', 'ꫵ'), ('ꯣ', 'ꯤ'), ('ꯦ', 'ꯧ'),
+  ('ꯩ', 'ꯪ'), ('꯬', '꯬'), ('𑀀', '𑀀'), ('𑀂', '𑀂'),
+  ('𑂂', '𑂂'), ('𑂰', '𑂲'), ('𑂷', '𑂸'), ('𑄬', '𑄬'),
+  ('𑆂', '𑆂'), ('𑆳', '𑆵'), ('𑆿', '𑇀'), ('𑈬', '𑈮'),
+  ('𑈲', '𑈳'), ('𑈵', '𑈵'), ('𑋠', '𑋢'), ('𑌂', '𑌃'),
+  ('𑌾', '𑌿'), ('𑍁', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍍'),
+  ('𑍗', '𑍗'), ('𑍢', '𑍣'), ('𑐵', '𑐷'), ('𑑀', '𑑁'),
+  ('𑑅', '𑑅'), ('𑒰', '𑒲'), ('𑒹', '𑒹'), ('𑒻', '𑒾'),
+  ('𑓁', '𑓁'), ('𑖯', '𑖱'), ('𑖸', '𑖻'), ('𑖾', '𑖾'),
+  ('𑘰', '𑘲'), ('𑘻', '𑘼'), ('𑘾', '𑘾'), ('𑚬', '𑚬'),
+  ('𑚮', '𑚯'), ('𑚶', '𑚶'), ('𑜠', '𑜡'), ('𑜦', '𑜦'),
+  ('𑨇', '𑨈'), ('𑨹', '𑨹'), ('𑩗', '𑩘'), ('𑪗', '𑪗'),
+  ('𑰯', '𑰯'), ('𑰾', '𑰾'), ('𑲩', '𑲩'), ('𑲱', '𑲱'),
+  ('𑲴', '𑲴'), ('𖽑', '𖽾'), ('𝅥', '𝅦'), ('𝅭', '𝅲'),
+];
+
+pub const SYMBOL: &'static [(char, char)] = &[
+  ('$', '$'), ('+', '+'), ('<', '>'), ('^', '^'), ('`', '`'), ('|', '|'),
+  ('~', '~'), ('¢', '¦'), ('¨', '©'), ('¬', '¬'), ('®', '±'),
+  ('´', '´'), ('¸', '¸'), ('×', '×'), ('÷', '÷'), ('˂', '˅'),
+  ('˒', '˟'), ('˥', '˫'), ('˭', '˭'), ('˯', '˿'), ('͵', '͵'),
+  ('΄', '΅'), ('϶', '϶'), ('҂', '҂'), ('֍', '֏'), ('؆', '؈'),
+  ('؋', '؋'), ('؎', '؏'), ('۞', '۞'), ('۩', '۩'), ('۽', '۾'),
+  ('߶', '߶'), ('৲', '৳'), ('৺', '৻'), ('૱', '૱'),
+  ('୰', '୰'), ('௳', '௺'), ('౿', '౿'), ('൏', '൏'),
+  ('൹', '൹'), ('฿', '฿'), ('༁', '༃'), ('༓', '༓'),
+  ('༕', '༗'), ('༚', '༟'), ('༴', '༴'), ('༶', '༶'),
+  ('༸', '༸'), ('྾', '࿅'), ('࿇', '࿌'), ('࿎', '࿏'),
+  ('࿕', '࿘'), ('႞', '႟'), ('᎐', '᎙'), ('៛', '៛'),
+  ('᥀', '᥀'), ('᧞', '᧿'), ('᭡', '᭪'), ('᭴', '᭼'),
+  ('᾽', '᾽'), ('᾿', '῁'), ('῍', '῏'), ('῝', '῟'),
+  ('῭', '`'), ('´', '῾'), ('⁄', '⁄'), ('⁒', '⁒'),
+  ('⁺', '⁼'), ('₊', '₌'), ('₠', '₿'), ('℀', '℁'),
+  ('℃', '℆'), ('℈', '℉'), ('℔', '℔'), ('№', '℘'),
+  ('℞', '℣'), ('℥', '℥'), ('℧', '℧'), ('℩', '℩'),
+  ('℮', '℮'), ('℺', '℻'), ('⅀', '⅄'), ('⅊', '⅍'),
+  ('⅏', '⅏'), ('↊', '↋'), ('←', '⌇'), ('⌌', '⌨'),
+  ('⌫', '␦'), ('⑀', '⑊'), ('⒜', 'ⓩ'), ('─', '❧'),
+  ('➔', '⟄'), ('⟇', '⟥'), ('⟰', '⦂'), ('⦙', '⧗'),
+  ('⧜', '⧻'), ('⧾', '⭳'), ('⭶', '⮕'), ('⮘', '⮹'),
+  ('⮽', '⯈'), ('⯊', '⯒'), ('⯬', '⯯'), ('⳥', '⳪'),
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), ('⿰', '⿻'),
+  ('〄', '〄'), ('〒', '〓'), ('〠', '〠'), ('〶', '〷'),
+  ('〾', '〿'), ('゛', '゜'), ('㆐', '㆑'), ('㆖', '㆟'),
+  ('㇀', '㇣'), ('㈀', '㈞'), ('㈪', '㉇'), ('㉐', '㉐'),
+  ('㉠', '㉿'), ('㊊', '㊰'), ('㋀', '㋾'), ('㌀', '㏿'),
+  ('䷀', '䷿'), ('꒐', '꓆'), ('꜀', '꜖'), ('꜠', '꜡'),
+  ('꞉', '꞊'), ('꠨', '꠫'), ('꠶', '꠹'), ('꩷', '꩹'),
+  ('꭛', '꭛'), ('﬩', '﬩'), ('﮲', '﯁'), ('﷼', '﷽'),
+  ('﹢', '﹢'), ('﹤', '﹦'), ('﹩', '﹩'), ('＄', '＄'),
+  ('＋', '＋'), ('＜', '＞'), ('＾', '＾'), ('｀', '｀'),
+  ('｜', '｜'), ('～', '～'), ('￠', '￦'), ('￨', '￮'),
+  ('', '�'), ('𐄷', '𐄿'), ('𐅹', '𐆉'), ('𐆌', '𐆎'),
+  ('𐆐', '𐆛'), ('𐆠', '𐆠'), ('𐇐', '𐇼'), ('𐡷', '𐡸'),
+  ('𐫈', '𐫈'), ('𑜿', '𑜿'), ('𖬼', '𖬿'), ('𖭅', '𖭅'),
+  ('𛲜', '𛲜'), ('𝀀', '𝃵'), ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'),
+  ('𝅪', '𝅬'), ('𝆃', '𝆄'), ('𝆌', '𝆩'), ('𝆮', '𝇨'),
+  ('𝈀', '𝉁'), ('𝉅', '𝉅'), ('𝌀', '𝍖'), ('𝛁', '𝛁'),
+  ('𝛛', '𝛛'), ('𝛻', '𝛻'), ('𝜕', '𝜕'), ('𝜵', '𝜵'),
+  ('𝝏', '𝝏'), ('𝝯', '𝝯'), ('𝞉', '𝞉'), ('𝞩', '𝞩'),
+  ('𝟃', '𝟃'), ('𝠀', '𝧿'), ('𝨷', '𝨺'), ('𝩭', '𝩴'),
+  ('𝩶', '𝪃'), ('𝪅', '𝪆'), ('𞻰', '𞻱'), ('🀀', '🀫'),
+  ('🀰', '🂓'), ('🂠', '🂮'), ('🂱', '🂿'), ('🃁', '🃏'),
+  ('🃑', '🃵'), ('🄐', '🄮'), ('🄰', '🅫'), ('🅰', '🆬'),
+  ('🇦', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'),
+  ('🉠', '🉥'), ('🌀', '🛔'), ('🛠', '🛬'), ('🛰', '🛸'),
+  ('🜀', '🝳'), ('🞀', '🟔'), ('🠀', '🠋'), ('🠐', '🡇'),
+  ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), ('🤀', '🤋'),
+  ('🤐', '🤾'), ('🥀', '🥌'), ('🥐', '🥫'), ('🦀', '🦗'),
+  ('🧀', '🧀'), ('🧐', '🧦'),
+];
+
+pub const TITLECASE_LETTER: &'static [(char, char)] = &[
+  ('ǅ', 'ǅ'), ('ǈ', 'ǈ'), ('ǋ', 'ǋ'), ('ǲ', 'ǲ'), ('ᾈ', 'ᾏ'),
+  ('ᾘ', 'ᾟ'), ('ᾨ', 'ᾯ'), ('ᾼ', 'ᾼ'), ('ῌ', 'ῌ'),
+  ('ῼ', 'ῼ'),
+];
+
+pub const UNASSIGNED: &'static [(char, char)] = &[
+  ('\u{378}', '\u{379}'), ('\u{380}', '\u{383}'), ('\u{38b}', '\u{38b}'),
+  ('\u{38d}', '\u{38d}'), ('\u{3a2}', '\u{3a2}'), ('\u{530}', '\u{530}'),
+  ('\u{557}', '\u{558}'), ('\u{560}', '\u{560}'), ('\u{588}', '\u{588}'),
+  ('\u{58b}', '\u{58c}'), ('\u{590}', '\u{590}'), ('\u{5c8}', '\u{5cf}'),
+  ('\u{5eb}', '\u{5ef}'), ('\u{5f5}', '\u{5ff}'), ('\u{61d}', '\u{61d}'),
+  ('\u{70e}', '\u{70e}'), ('\u{74b}', '\u{74c}'), ('\u{7b2}', '\u{7bf}'),
+  ('\u{7fb}', '\u{7ff}'), ('\u{82e}', '\u{82f}'), ('\u{83f}', '\u{83f}'),
+  ('\u{85c}', '\u{85d}'), ('\u{85f}', '\u{85f}'), ('\u{86b}', '\u{89f}'),
+  ('\u{8b5}', '\u{8b5}'), ('\u{8be}', '\u{8d3}'), ('\u{984}', '\u{984}'),
+  ('\u{98d}', '\u{98e}'), ('\u{991}', '\u{992}'), ('\u{9a9}', '\u{9a9}'),
+  ('\u{9b1}', '\u{9b1}'), ('\u{9b3}', '\u{9b5}'), ('\u{9ba}', '\u{9bb}'),
+  ('\u{9c5}', '\u{9c6}'), ('\u{9c9}', '\u{9ca}'), ('\u{9cf}', '\u{9d6}'),
+  ('\u{9d8}', '\u{9db}'), ('\u{9de}', '\u{9de}'), ('\u{9e4}', '\u{9e5}'),
+  ('\u{9fe}', '\u{a00}'), ('\u{a04}', '\u{a04}'), ('\u{a0b}', '\u{a0e}'),
+  ('\u{a11}', '\u{a12}'), ('\u{a29}', '\u{a29}'), ('\u{a31}', '\u{a31}'),
+  ('\u{a34}', '\u{a34}'), ('\u{a37}', '\u{a37}'), ('\u{a3a}', '\u{a3b}'),
+  ('\u{a3d}', '\u{a3d}'), ('\u{a43}', '\u{a46}'), ('\u{a49}', '\u{a4a}'),
+  ('\u{a4e}', '\u{a50}'), ('\u{a52}', '\u{a58}'), ('\u{a5d}', '\u{a5d}'),
+  ('\u{a5f}', '\u{a65}'), ('\u{a76}', '\u{a80}'), ('\u{a84}', '\u{a84}'),
+  ('\u{a8e}', '\u{a8e}'), ('\u{a92}', '\u{a92}'), ('\u{aa9}', '\u{aa9}'),
+  ('\u{ab1}', '\u{ab1}'), ('\u{ab4}', '\u{ab4}'), ('\u{aba}', '\u{abb}'),
+  ('\u{ac6}', '\u{ac6}'), ('\u{aca}', '\u{aca}'), ('\u{ace}', '\u{acf}'),
+  ('\u{ad1}', '\u{adf}'), ('\u{ae4}', '\u{ae5}'), ('\u{af2}', '\u{af8}'),
+  ('\u{b00}', '\u{b00}'), ('\u{b04}', '\u{b04}'), ('\u{b0d}', '\u{b0e}'),
+  ('\u{b11}', '\u{b12}'), ('\u{b29}', '\u{b29}'), ('\u{b31}', '\u{b31}'),
+  ('\u{b34}', '\u{b34}'), ('\u{b3a}', '\u{b3b}'), ('\u{b45}', '\u{b46}'),
+  ('\u{b49}', '\u{b4a}'), ('\u{b4e}', '\u{b55}'), ('\u{b58}', '\u{b5b}'),
+  ('\u{b5e}', '\u{b5e}'), ('\u{b64}', '\u{b65}'), ('\u{b78}', '\u{b81}'),
+  ('\u{b84}', '\u{b84}'), ('\u{b8b}', '\u{b8d}'), ('\u{b91}', '\u{b91}'),
+  ('\u{b96}', '\u{b98}'), ('\u{b9b}', '\u{b9b}'), ('\u{b9d}', '\u{b9d}'),
+  ('\u{ba0}', '\u{ba2}'), ('\u{ba5}', '\u{ba7}'), ('\u{bab}', '\u{bad}'),
+  ('\u{bba}', '\u{bbd}'), ('\u{bc3}', '\u{bc5}'), ('\u{bc9}', '\u{bc9}'),
+  ('\u{bce}', '\u{bcf}'), ('\u{bd1}', '\u{bd6}'), ('\u{bd8}', '\u{be5}'),
+  ('\u{bfb}', '\u{bff}'), ('\u{c04}', '\u{c04}'), ('\u{c0d}', '\u{c0d}'),
+  ('\u{c11}', '\u{c11}'), ('\u{c29}', '\u{c29}'), ('\u{c3a}', '\u{c3c}'),
+  ('\u{c45}', '\u{c45}'), ('\u{c49}', '\u{c49}'), ('\u{c4e}', '\u{c54}'),
+  ('\u{c57}', '\u{c57}'), ('\u{c5b}', '\u{c5f}'), ('\u{c64}', '\u{c65}'),
+  ('\u{c70}', '\u{c77}'), ('\u{c84}', '\u{c84}'), ('\u{c8d}', '\u{c8d}'),
+  ('\u{c91}', '\u{c91}'), ('\u{ca9}', '\u{ca9}'), ('\u{cb4}', '\u{cb4}'),
+  ('\u{cba}', '\u{cbb}'), ('\u{cc5}', '\u{cc5}'), ('\u{cc9}', '\u{cc9}'),
+  ('\u{cce}', '\u{cd4}'), ('\u{cd7}', '\u{cdd}'), ('\u{cdf}', '\u{cdf}'),
+  ('\u{ce4}', '\u{ce5}'), ('\u{cf0}', '\u{cf0}'), ('\u{cf3}', '\u{cff}'),
+  ('\u{d04}', '\u{d04}'), ('\u{d0d}', '\u{d0d}'), ('\u{d11}', '\u{d11}'),
+  ('\u{d45}', '\u{d45}'), ('\u{d49}', '\u{d49}'), ('\u{d50}', '\u{d53}'),
+  ('\u{d64}', '\u{d65}'), ('\u{d80}', '\u{d81}'), ('\u{d84}', '\u{d84}'),
+  ('\u{d97}', '\u{d99}'), ('\u{db2}', '\u{db2}'), ('\u{dbc}', '\u{dbc}'),
+  ('\u{dbe}', '\u{dbf}'), ('\u{dc7}', '\u{dc9}'), ('\u{dcb}', '\u{dce}'),
+  ('\u{dd5}', '\u{dd5}'), ('\u{dd7}', '\u{dd7}'), ('\u{de0}', '\u{de5}'),
+  ('\u{df0}', '\u{df1}'), ('\u{df5}', '\u{e00}'), ('\u{e3b}', '\u{e3e}'),
+  ('\u{e5c}', '\u{e80}'), ('\u{e83}', '\u{e83}'), ('\u{e85}', '\u{e86}'),
+  ('\u{e89}', '\u{e89}'), ('\u{e8b}', '\u{e8c}'), ('\u{e8e}', '\u{e93}'),
+  ('\u{e98}', '\u{e98}'), ('\u{ea0}', '\u{ea0}'), ('\u{ea4}', '\u{ea4}'),
+  ('\u{ea6}', '\u{ea6}'), ('\u{ea8}', '\u{ea9}'), ('\u{eac}', '\u{eac}'),
+  ('\u{eba}', '\u{eba}'), ('\u{ebe}', '\u{ebf}'), ('\u{ec5}', '\u{ec5}'),
+  ('\u{ec7}', '\u{ec7}'), ('\u{ece}', '\u{ecf}'), ('\u{eda}', '\u{edb}'),
+  ('\u{ee0}', '\u{eff}'), ('\u{f48}', '\u{f48}'), ('\u{f6d}', '\u{f70}'),
+  ('\u{f98}', '\u{f98}'), ('\u{fbd}', '\u{fbd}'), ('\u{fcd}', '\u{fcd}'),
+  ('\u{fdb}', '\u{fff}'), ('\u{10c6}', '\u{10c6}'), ('\u{10c8}', '\u{10cc}'),
+  ('\u{10ce}', '\u{10cf}'), ('\u{1249}', '\u{1249}'),
+  ('\u{124e}', '\u{124f}'), ('\u{1257}', '\u{1257}'),
+  ('\u{1259}', '\u{1259}'), ('\u{125e}', '\u{125f}'),
+  ('\u{1289}', '\u{1289}'), ('\u{128e}', '\u{128f}'),
+  ('\u{12b1}', '\u{12b1}'), ('\u{12b6}', '\u{12b7}'),
+  ('\u{12bf}', '\u{12bf}'), ('\u{12c1}', '\u{12c1}'),
+  ('\u{12c6}', '\u{12c7}'), ('\u{12d7}', '\u{12d7}'),
+  ('\u{1311}', '\u{1311}'), ('\u{1316}', '\u{1317}'),
+  ('\u{135b}', '\u{135c}'), ('\u{137d}', '\u{137f}'),
+  ('\u{139a}', '\u{139f}'), ('\u{13f6}', '\u{13f7}'),
+  ('\u{13fe}', '\u{13ff}'), ('\u{169d}', '\u{169f}'),
+  ('\u{16f9}', '\u{16ff}'), ('\u{170d}', '\u{170d}'),
+  ('\u{1715}', '\u{171f}'), ('\u{1737}', '\u{173f}'),
+  ('\u{1754}', '\u{175f}'), ('\u{176d}', '\u{176d}'),
+  ('\u{1771}', '\u{1771}'), ('\u{1774}', '\u{177f}'),
+  ('\u{17de}', '\u{17df}'), ('\u{17ea}', '\u{17ef}'),
+  ('\u{17fa}', '\u{17ff}'), ('\u{180f}', '\u{180f}'),
+  ('\u{181a}', '\u{181f}'), ('\u{1878}', '\u{187f}'),
+  ('\u{18ab}', '\u{18af}'), ('\u{18f6}', '\u{18ff}'),
+  ('\u{191f}', '\u{191f}'), ('\u{192c}', '\u{192f}'),
+  ('\u{193c}', '\u{193f}'), ('\u{1941}', '\u{1943}'),
+  ('\u{196e}', '\u{196f}'), ('\u{1975}', '\u{197f}'),
+  ('\u{19ac}', '\u{19af}'), ('\u{19ca}', '\u{19cf}'),
+  ('\u{19db}', '\u{19dd}'), ('\u{1a1c}', '\u{1a1d}'),
+  ('\u{1a5f}', '\u{1a5f}'), ('\u{1a7d}', '\u{1a7e}'),
+  ('\u{1a8a}', '\u{1a8f}'), ('\u{1a9a}', '\u{1a9f}'),
+  ('\u{1aae}', '\u{1aaf}'), ('\u{1abf}', '\u{1aff}'),
+  ('\u{1b4c}', '\u{1b4f}'), ('\u{1b7d}', '\u{1b7f}'),
+  ('\u{1bf4}', '\u{1bfb}'), ('\u{1c38}', '\u{1c3a}'),
+  ('\u{1c4a}', '\u{1c4c}'), ('\u{1c89}', '\u{1cbf}'),
+  ('\u{1cc8}', '\u{1ccf}'), ('\u{1cfa}', '\u{1cff}'),
+  ('\u{1dfa}', '\u{1dfa}'), ('\u{1f16}', '\u{1f17}'),
+  ('\u{1f1e}', '\u{1f1f}'), ('\u{1f46}', '\u{1f47}'),
+  ('\u{1f4e}', '\u{1f4f}'), ('\u{1f58}', '\u{1f58}'),
+  ('\u{1f5a}', '\u{1f5a}'), ('\u{1f5c}', '\u{1f5c}'),
+  ('\u{1f5e}', '\u{1f5e}'), ('\u{1f7e}', '\u{1f7f}'),
+  ('\u{1fb5}', '\u{1fb5}'), ('\u{1fc5}', '\u{1fc5}'),
+  ('\u{1fd4}', '\u{1fd5}'), ('\u{1fdc}', '\u{1fdc}'),
+  ('\u{1ff0}', '\u{1ff1}'), ('\u{1ff5}', '\u{1ff5}'),
+  ('\u{1fff}', '\u{1fff}'), ('\u{2065}', '\u{2065}'),
+  ('\u{2072}', '\u{2073}'), ('\u{208f}', '\u{208f}'),
+  ('\u{209d}', '\u{209f}'), ('\u{20c0}', '\u{20cf}'),
+  ('\u{20f1}', '\u{20ff}'), ('\u{218c}', '\u{218f}'),
+  ('\u{2427}', '\u{243f}'), ('\u{244b}', '\u{245f}'),
+  ('\u{2b74}', '\u{2b75}'), ('\u{2b96}', '\u{2b97}'),
+  ('\u{2bba}', '\u{2bbc}'), ('\u{2bc9}', '\u{2bc9}'),
+  ('\u{2bd3}', '\u{2beb}'), ('\u{2bf0}', '\u{2bff}'),
+  ('\u{2c2f}', '\u{2c2f}'), ('\u{2c5f}', '\u{2c5f}'),
+  ('\u{2cf4}', '\u{2cf8}'), ('\u{2d26}', '\u{2d26}'),
+  ('\u{2d28}', '\u{2d2c}'), ('\u{2d2e}', '\u{2d2f}'),
+  ('\u{2d68}', '\u{2d6e}'), ('\u{2d71}', '\u{2d7e}'),
+  ('\u{2d97}', '\u{2d9f}'), ('\u{2da7}', '\u{2da7}'),
+  ('\u{2daf}', '\u{2daf}'), ('\u{2db7}', '\u{2db7}'),
+  ('\u{2dbf}', '\u{2dbf}'), ('\u{2dc7}', '\u{2dc7}'),
+  ('\u{2dcf}', '\u{2dcf}'), ('\u{2dd7}', '\u{2dd7}'),
+  ('\u{2ddf}', '\u{2ddf}'), ('\u{2e4a}', '\u{2e7f}'),
+  ('\u{2e9a}', '\u{2e9a}'), ('\u{2ef4}', '\u{2eff}'),
+  ('\u{2fd6}', '\u{2fef}'), ('\u{2ffc}', '\u{2fff}'),
+  ('\u{3040}', '\u{3040}'), ('\u{3097}', '\u{3098}'),
+  ('\u{3100}', '\u{3104}'), ('\u{312f}', '\u{3130}'),
+  ('\u{318f}', '\u{318f}'), ('\u{31bb}', '\u{31bf}'),
+  ('\u{31e4}', '\u{31ef}'), ('\u{321f}', '\u{321f}'),
+  ('\u{32ff}', '\u{32ff}'), ('\u{4db6}', '\u{4dbf}'),
+  ('\u{9feb}', '\u{9fff}'), ('\u{a48d}', '\u{a48f}'),
+  ('\u{a4c7}', '\u{a4cf}'), ('\u{a62c}', '\u{a63f}'),
+  ('\u{a6f8}', '\u{a6ff}'), ('\u{a7af}', '\u{a7af}'),
+  ('\u{a7b8}', '\u{a7f6}'), ('\u{a82c}', '\u{a82f}'),
+  ('\u{a83a}', '\u{a83f}'), ('\u{a878}', '\u{a87f}'),
+  ('\u{a8c6}', '\u{a8cd}'), ('\u{a8da}', '\u{a8df}'),
+  ('\u{a8fe}', '\u{a8ff}'), ('\u{a954}', '\u{a95e}'),
+  ('\u{a97d}', '\u{a97f}'), ('\u{a9ce}', '\u{a9ce}'),
+  ('\u{a9da}', '\u{a9dd}'), ('\u{a9ff}', '\u{a9ff}'),
+  ('\u{aa37}', '\u{aa3f}'), ('\u{aa4e}', '\u{aa4f}'),
+  ('\u{aa5a}', '\u{aa5b}'), ('\u{aac3}', '\u{aada}'),
+  ('\u{aaf7}', '\u{ab00}'), ('\u{ab07}', '\u{ab08}'),
+  ('\u{ab0f}', '\u{ab10}'), ('\u{ab17}', '\u{ab1f}'),
+  ('\u{ab27}', '\u{ab27}'), ('\u{ab2f}', '\u{ab2f}'),
+  ('\u{ab66}', '\u{ab6f}'), ('\u{abee}', '\u{abef}'),
+  ('\u{abfa}', '\u{abff}'), ('\u{d7a4}', '\u{d7af}'),
+  ('\u{d7c7}', '\u{d7ca}'), ('\u{d7fc}', '\u{d7ff}'),
+  ('\u{fa6e}', '\u{fa6f}'), ('\u{fada}', '\u{faff}'),
+  ('\u{fb07}', '\u{fb12}'), ('\u{fb18}', '\u{fb1c}'),
+  ('\u{fb37}', '\u{fb37}'), ('\u{fb3d}', '\u{fb3d}'),
+  ('\u{fb3f}', '\u{fb3f}'), ('\u{fb42}', '\u{fb42}'),
+  ('\u{fb45}', '\u{fb45}'), ('\u{fbc2}', '\u{fbd2}'),
+  ('\u{fd40}', '\u{fd4f}'), ('\u{fd90}', '\u{fd91}'),
+  ('\u{fdc8}', '\u{fdef}'), ('\u{fdfe}', '\u{fdff}'),
+  ('\u{fe1a}', '\u{fe1f}'), ('\u{fe53}', '\u{fe53}'),
+  ('\u{fe67}', '\u{fe67}'), ('\u{fe6c}', '\u{fe6f}'),
+  ('\u{fe75}', '\u{fe75}'), ('\u{fefd}', '\u{fefe}'),
+  ('\u{ff00}', '\u{ff00}'), ('\u{ffbf}', '\u{ffc1}'),
+  ('\u{ffc8}', '\u{ffc9}'), ('\u{ffd0}', '\u{ffd1}'),
+  ('\u{ffd8}', '\u{ffd9}'), ('\u{ffdd}', '\u{ffdf}'),
+  ('\u{ffe7}', '\u{ffe7}'), ('\u{ffef}', '\u{fff8}'),
+  ('\u{fffe}', '\u{ffff}'), ('\u{1000c}', '\u{1000c}'),
+  ('\u{10027}', '\u{10027}'), ('\u{1003b}', '\u{1003b}'),
+  ('\u{1003e}', '\u{1003e}'), ('\u{1004e}', '\u{1004f}'),
+  ('\u{1005e}', '\u{1007f}'), ('\u{100fb}', '\u{100ff}'),
+  ('\u{10103}', '\u{10106}'), ('\u{10134}', '\u{10136}'),
+  ('\u{1018f}', '\u{1018f}'), ('\u{1019c}', '\u{1019f}'),
+  ('\u{101a1}', '\u{101cf}'), ('\u{101fe}', '\u{1027f}'),
+  ('\u{1029d}', '\u{1029f}'), ('\u{102d1}', '\u{102df}'),
+  ('\u{102fc}', '\u{102ff}'), ('\u{10324}', '\u{1032c}'),
+  ('\u{1034b}', '\u{1034f}'), ('\u{1037b}', '\u{1037f}'),
+  ('\u{1039e}', '\u{1039e}'), ('\u{103c4}', '\u{103c7}'),
+  ('\u{103d6}', '\u{103ff}'), ('\u{1049e}', '\u{1049f}'),
+  ('\u{104aa}', '\u{104af}'), ('\u{104d4}', '\u{104d7}'),
+  ('\u{104fc}', '\u{104ff}'), ('\u{10528}', '\u{1052f}'),
+  ('\u{10564}', '\u{1056e}'), ('\u{10570}', '\u{105ff}'),
+  ('\u{10737}', '\u{1073f}'), ('\u{10756}', '\u{1075f}'),
+  ('\u{10768}', '\u{107ff}'), ('\u{10806}', '\u{10807}'),
+  ('\u{10809}', '\u{10809}'), ('\u{10836}', '\u{10836}'),
+  ('\u{10839}', '\u{1083b}'), ('\u{1083d}', '\u{1083e}'),
+  ('\u{10856}', '\u{10856}'), ('\u{1089f}', '\u{108a6}'),
+  ('\u{108b0}', '\u{108df}'), ('\u{108f3}', '\u{108f3}'),
+  ('\u{108f6}', '\u{108fa}'), ('\u{1091c}', '\u{1091e}'),
+  ('\u{1093a}', '\u{1093e}'), ('\u{10940}', '\u{1097f}'),
+  ('\u{109b8}', '\u{109bb}'), ('\u{109d0}', '\u{109d1}'),
+  ('\u{10a04}', '\u{10a04}'), ('\u{10a07}', '\u{10a0b}'),
+  ('\u{10a14}', '\u{10a14}'), ('\u{10a18}', '\u{10a18}'),
+  ('\u{10a34}', '\u{10a37}'), ('\u{10a3b}', '\u{10a3e}'),
+  ('\u{10a48}', '\u{10a4f}'), ('\u{10a59}', '\u{10a5f}'),
+  ('\u{10aa0}', '\u{10abf}'), ('\u{10ae7}', '\u{10aea}'),
+  ('\u{10af7}', '\u{10aff}'), ('\u{10b36}', '\u{10b38}'),
+  ('\u{10b56}', '\u{10b57}'), ('\u{10b73}', '\u{10b77}'),
+  ('\u{10b92}', '\u{10b98}'), ('\u{10b9d}', '\u{10ba8}'),
+  ('\u{10bb0}', '\u{10bff}'), ('\u{10c49}', '\u{10c7f}'),
+  ('\u{10cb3}', '\u{10cbf}'), ('\u{10cf3}', '\u{10cf9}'),
+  ('\u{10d00}', '\u{10e5f}'), ('\u{10e7f}', '\u{10fff}'),
+  ('\u{1104e}', '\u{11051}'), ('\u{11070}', '\u{1107e}'),
+  ('\u{110c2}', '\u{110cf}'), ('\u{110e9}', '\u{110ef}'),
+  ('\u{110fa}', '\u{110ff}'), ('\u{11135}', '\u{11135}'),
+  ('\u{11144}', '\u{1114f}'), ('\u{11177}', '\u{1117f}'),
+  ('\u{111ce}', '\u{111cf}'), ('\u{111e0}', '\u{111e0}'),
+  ('\u{111f5}', '\u{111ff}'), ('\u{11212}', '\u{11212}'),
+  ('\u{1123f}', '\u{1127f}'), ('\u{11287}', '\u{11287}'),
+  ('\u{11289}', '\u{11289}'), ('\u{1128e}', '\u{1128e}'),
+  ('\u{1129e}', '\u{1129e}'), ('\u{112aa}', '\u{112af}'),
+  ('\u{112eb}', '\u{112ef}'), ('\u{112fa}', '\u{112ff}'),
+  ('\u{11304}', '\u{11304}'), ('\u{1130d}', '\u{1130e}'),
+  ('\u{11311}', '\u{11312}'), ('\u{11329}', '\u{11329}'),
+  ('\u{11331}', '\u{11331}'), ('\u{11334}', '\u{11334}'),
+  ('\u{1133a}', '\u{1133b}'), ('\u{11345}', '\u{11346}'),
+  ('\u{11349}', '\u{1134a}'), ('\u{1134e}', '\u{1134f}'),
+  ('\u{11351}', '\u{11356}'), ('\u{11358}', '\u{1135c}'),
+  ('\u{11364}', '\u{11365}'), ('\u{1136d}', '\u{1136f}'),
+  ('\u{11375}', '\u{113ff}'), ('\u{1145a}', '\u{1145a}'),
+  ('\u{1145c}', '\u{1145c}'), ('\u{1145e}', '\u{1147f}'),
+  ('\u{114c8}', '\u{114cf}'), ('\u{114da}', '\u{1157f}'),
+  ('\u{115b6}', '\u{115b7}'), ('\u{115de}', '\u{115ff}'),
+  ('\u{11645}', '\u{1164f}'), ('\u{1165a}', '\u{1165f}'),
+  ('\u{1166d}', '\u{1167f}'), ('\u{116b8}', '\u{116bf}'),
+  ('\u{116ca}', '\u{116ff}'), ('\u{1171a}', '\u{1171c}'),
+  ('\u{1172c}', '\u{1172f}'), ('\u{11740}', '\u{1189f}'),
+  ('\u{118f3}', '\u{118fe}'), ('\u{11900}', '\u{119ff}'),
+  ('\u{11a48}', '\u{11a4f}'), ('\u{11a84}', '\u{11a85}'),
+  ('\u{11a9d}', '\u{11a9d}'), ('\u{11aa3}', '\u{11abf}'),
+  ('\u{11af9}', '\u{11bff}'), ('\u{11c09}', '\u{11c09}'),
+  ('\u{11c37}', '\u{11c37}'), ('\u{11c46}', '\u{11c4f}'),
+  ('\u{11c6d}', '\u{11c6f}'), ('\u{11c90}', '\u{11c91}'),
+  ('\u{11ca8}', '\u{11ca8}'), ('\u{11cb7}', '\u{11cff}'),
+  ('\u{11d07}', '\u{11d07}'), ('\u{11d0a}', '\u{11d0a}'),
+  ('\u{11d37}', '\u{11d39}'), ('\u{11d3b}', '\u{11d3b}'),
+  ('\u{11d3e}', '\u{11d3e}'), ('\u{11d48}', '\u{11d4f}'),
+  ('\u{11d5a}', '\u{11fff}'), ('\u{1239a}', '\u{123ff}'),
+  ('\u{1246f}', '\u{1246f}'), ('\u{12475}', '\u{1247f}'),
+  ('\u{12544}', '\u{12fff}'), ('\u{1342f}', '\u{143ff}'),
+  ('\u{14647}', '\u{167ff}'), ('\u{16a39}', '\u{16a3f}'),
+  ('\u{16a5f}', '\u{16a5f}'), ('\u{16a6a}', '\u{16a6d}'),
+  ('\u{16a70}', '\u{16acf}'), ('\u{16aee}', '\u{16aef}'),
+  ('\u{16af6}', '\u{16aff}'), ('\u{16b46}', '\u{16b4f}'),
+  ('\u{16b5a}', '\u{16b5a}'), ('\u{16b62}', '\u{16b62}'),
+  ('\u{16b78}', '\u{16b7c}'), ('\u{16b90}', '\u{16eff}'),
+  ('\u{16f45}', '\u{16f4f}'), ('\u{16f7f}', '\u{16f8e}'),
+  ('\u{16fa0}', '\u{16fdf}'), ('\u{16fe2}', '\u{16fff}'),
+  ('\u{187ed}', '\u{187ff}'), ('\u{18af3}', '\u{1afff}'),
+  ('\u{1b11f}', '\u{1b16f}'), ('\u{1b2fc}', '\u{1bbff}'),
+  ('\u{1bc6b}', '\u{1bc6f}'), ('\u{1bc7d}', '\u{1bc7f}'),
+  ('\u{1bc89}', '\u{1bc8f}'), ('\u{1bc9a}', '\u{1bc9b}'),
+  ('\u{1bca4}', '\u{1cfff}'), ('\u{1d0f6}', '\u{1d0ff}'),
+  ('\u{1d127}', '\u{1d128}'), ('\u{1d1e9}', '\u{1d1ff}'),
+  ('\u{1d246}', '\u{1d2ff}'), ('\u{1d357}', '\u{1d35f}'),
+  ('\u{1d372}', '\u{1d3ff}'), ('\u{1d455}', '\u{1d455}'),
+  ('\u{1d49d}', '\u{1d49d}'), ('\u{1d4a0}', '\u{1d4a1}'),
+  ('\u{1d4a3}', '\u{1d4a4}'), ('\u{1d4a7}', '\u{1d4a8}'),
+  ('\u{1d4ad}', '\u{1d4ad}'), ('\u{1d4ba}', '\u{1d4ba}'),
+  ('\u{1d4bc}', '\u{1d4bc}'), ('\u{1d4c4}', '\u{1d4c4}'),
+  ('\u{1d506}', '\u{1d506}'), ('\u{1d50b}', '\u{1d50c}'),
+  ('\u{1d515}', '\u{1d515}'), ('\u{1d51d}', '\u{1d51d}'),
+  ('\u{1d53a}', '\u{1d53a}'), ('\u{1d53f}', '\u{1d53f}'),
+  ('\u{1d545}', '\u{1d545}'), ('\u{1d547}', '\u{1d549}'),
+  ('\u{1d551}', '\u{1d551}'), ('\u{1d6a6}', '\u{1d6a7}'),
+  ('\u{1d7cc}', '\u{1d7cd}'), ('\u{1da8c}', '\u{1da9a}'),
+  ('\u{1daa0}', '\u{1daa0}'), ('\u{1dab0}', '\u{1dfff}'),
+  ('\u{1e007}', '\u{1e007}'), ('\u{1e019}', '\u{1e01a}'),
+  ('\u{1e022}', '\u{1e022}'), ('\u{1e025}', '\u{1e025}'),
+  ('\u{1e02b}', '\u{1e7ff}'), ('\u{1e8c5}', '\u{1e8c6}'),
+  ('\u{1e8d7}', '\u{1e8ff}'), ('\u{1e94b}', '\u{1e94f}'),
+  ('\u{1e95a}', '\u{1e95d}'), ('\u{1e960}', '\u{1edff}'),
+  ('\u{1ee04}', '\u{1ee04}'), ('\u{1ee20}', '\u{1ee20}'),
+  ('\u{1ee23}', '\u{1ee23}'), ('\u{1ee25}', '\u{1ee26}'),
+  ('\u{1ee28}', '\u{1ee28}'), ('\u{1ee33}', '\u{1ee33}'),
+  ('\u{1ee38}', '\u{1ee38}'), ('\u{1ee3a}', '\u{1ee3a}'),
+  ('\u{1ee3c}', '\u{1ee41}'), ('\u{1ee43}', '\u{1ee46}'),
+  ('\u{1ee48}', '\u{1ee48}'), ('\u{1ee4a}', '\u{1ee4a}'),
+  ('\u{1ee4c}', '\u{1ee4c}'), ('\u{1ee50}', '\u{1ee50}'),
+  ('\u{1ee53}', '\u{1ee53}'), ('\u{1ee55}', '\u{1ee56}'),
+  ('\u{1ee58}', '\u{1ee58}'), ('\u{1ee5a}', '\u{1ee5a}'),
+  ('\u{1ee5c}', '\u{1ee5c}'), ('\u{1ee5e}', '\u{1ee5e}'),
+  ('\u{1ee60}', '\u{1ee60}'), ('\u{1ee63}', '\u{1ee63}'),
+  ('\u{1ee65}', '\u{1ee66}'), ('\u{1ee6b}', '\u{1ee6b}'),
+  ('\u{1ee73}', '\u{1ee73}'), ('\u{1ee78}', '\u{1ee78}'),
+  ('\u{1ee7d}', '\u{1ee7d}'), ('\u{1ee7f}', '\u{1ee7f}'),
+  ('\u{1ee8a}', '\u{1ee8a}'), ('\u{1ee9c}', '\u{1eea0}'),
+  ('\u{1eea4}', '\u{1eea4}'), ('\u{1eeaa}', '\u{1eeaa}'),
+  ('\u{1eebc}', '\u{1eeef}'), ('\u{1eef2}', '\u{1efff}'),
+  ('\u{1f02c}', '\u{1f02f}'), ('\u{1f094}', '\u{1f09f}'),
+  ('\u{1f0af}', '\u{1f0b0}'), ('\u{1f0c0}', '\u{1f0c0}'),
+  ('\u{1f0d0}', '\u{1f0d0}'), ('\u{1f0f6}', '\u{1f0ff}'),
+  ('\u{1f10d}', '\u{1f10f}'), ('\u{1f12f}', '\u{1f12f}'),
+  ('\u{1f16c}', '\u{1f16f}'), ('\u{1f1ad}', '\u{1f1e5}'),
+  ('\u{1f203}', '\u{1f20f}'), ('\u{1f23c}', '\u{1f23f}'),
+  ('\u{1f249}', '\u{1f24f}'), ('\u{1f252}', '\u{1f25f}'),
+  ('\u{1f266}', '\u{1f2ff}'), ('\u{1f6d5}', '\u{1f6df}'),
+  ('\u{1f6ed}', '\u{1f6ef}'), ('\u{1f6f9}', '\u{1f6ff}'),
+  ('\u{1f774}', '\u{1f77f}'), ('\u{1f7d5}', '\u{1f7ff}'),
+  ('\u{1f80c}', '\u{1f80f}'), ('\u{1f848}', '\u{1f84f}'),
+  ('\u{1f85a}', '\u{1f85f}'), ('\u{1f888}', '\u{1f88f}'),
+  ('\u{1f8ae}', '\u{1f8ff}'), ('\u{1f90c}', '\u{1f90f}'),
+  ('\u{1f93f}', '\u{1f93f}'), ('\u{1f94d}', '\u{1f94f}'),
+  ('\u{1f96c}', '\u{1f97f}'), ('\u{1f998}', '\u{1f9bf}'),
+  ('\u{1f9c1}', '\u{1f9cf}'), ('\u{1f9e7}', '\u{1ffff}'),
+  ('\u{2a6d7}', '\u{2a6ff}'), ('\u{2b735}', '\u{2b73f}'),
+  ('\u{2b81e}', '\u{2b81f}'), ('\u{2cea2}', '\u{2ceaf}'),
+  ('\u{2ebe1}', '\u{2f7ff}'), ('\u{2fa1e}', '\u{e0000}'),
+  ('\u{e0002}', '\u{e001f}'), ('\u{e0080}', '\u{e00ff}'),
+  ('\u{e01f0}', '\u{effff}'), ('\u{ffffe}', '\u{fffff}'),
+  ('\u{10fffe}', '\u{10ffff}'),
+];
+
+pub const UPPERCASE_LETTER: &'static [(char, char)] = &[
+  ('A', 'Z'), ('À', 'Ö'), ('Ø', 'Þ'), ('Ā', 'Ā'), ('Ă', 'Ă'),
+  ('Ą', 'Ą'), ('Ć', 'Ć'), ('Ĉ', 'Ĉ'), ('Ċ', 'Ċ'), ('Č', 'Č'),
+  ('Ď', 'Ď'), ('Đ', 'Đ'), ('Ē', 'Ē'), ('Ĕ', 'Ĕ'), ('Ė', 'Ė'),
+  ('Ę', 'Ę'), ('Ě', 'Ě'), ('Ĝ', 'Ĝ'), ('Ğ', 'Ğ'), ('Ġ', 'Ġ'),
+  ('Ģ', 'Ģ'), ('Ĥ', 'Ĥ'), ('Ħ', 'Ħ'), ('Ĩ', 'Ĩ'), ('Ī', 'Ī'),
+  ('Ĭ', 'Ĭ'), ('Į', 'Į'), ('İ', 'İ'), ('Ĳ', 'Ĳ'), ('Ĵ', 'Ĵ'),
+  ('Ķ', 'Ķ'), ('Ĺ', 'Ĺ'), ('Ļ', 'Ļ'), ('Ľ', 'Ľ'), ('Ŀ', 'Ŀ'),
+  ('Ł', 'Ł'), ('Ń', 'Ń'), ('Ņ', 'Ņ'), ('Ň', 'Ň'), ('Ŋ', 'Ŋ'),
+  ('Ō', 'Ō'), ('Ŏ', 'Ŏ'), ('Ő', 'Ő'), ('Œ', 'Œ'), ('Ŕ', 'Ŕ'),
+  ('Ŗ', 'Ŗ'), ('Ř', 'Ř'), ('Ś', 'Ś'), ('Ŝ', 'Ŝ'), ('Ş', 'Ş'),
+  ('Š', 'Š'), ('Ţ', 'Ţ'), ('Ť', 'Ť'), ('Ŧ', 'Ŧ'), ('Ũ', 'Ũ'),
+  ('Ū', 'Ū'), ('Ŭ', 'Ŭ'), ('Ů', 'Ů'), ('Ű', 'Ű'), ('Ų', 'Ų'),
+  ('Ŵ', 'Ŵ'), ('Ŷ', 'Ŷ'), ('Ÿ', 'Ź'), ('Ż', 'Ż'), ('Ž', 'Ž'),
+  ('Ɓ', 'Ƃ'), ('Ƅ', 'Ƅ'), ('Ɔ', 'Ƈ'), ('Ɖ', 'Ƌ'), ('Ǝ', 'Ƒ'),
+  ('Ɠ', 'Ɣ'), ('Ɩ', 'Ƙ'), ('Ɯ', 'Ɲ'), ('Ɵ', 'Ơ'), ('Ƣ', 'Ƣ'),
+  ('Ƥ', 'Ƥ'), ('Ʀ', 'Ƨ'), ('Ʃ', 'Ʃ'), ('Ƭ', 'Ƭ'), ('Ʈ', 'Ư'),
+  ('Ʊ', 'Ƴ'), ('Ƶ', 'Ƶ'), ('Ʒ', 'Ƹ'), ('Ƽ', 'Ƽ'), ('Ǆ', 'Ǆ'),
+  ('Ǉ', 'Ǉ'), ('Ǌ', 'Ǌ'), ('Ǎ', 'Ǎ'), ('Ǐ', 'Ǐ'), ('Ǒ', 'Ǒ'),
+  ('Ǔ', 'Ǔ'), ('Ǖ', 'Ǖ'), ('Ǘ', 'Ǘ'), ('Ǚ', 'Ǚ'), ('Ǜ', 'Ǜ'),
+  ('Ǟ', 'Ǟ'), ('Ǡ', 'Ǡ'), ('Ǣ', 'Ǣ'), ('Ǥ', 'Ǥ'), ('Ǧ', 'Ǧ'),
+  ('Ǩ', 'Ǩ'), ('Ǫ', 'Ǫ'), ('Ǭ', 'Ǭ'), ('Ǯ', 'Ǯ'), ('Ǳ', 'Ǳ'),
+  ('Ǵ', 'Ǵ'), ('Ƕ', 'Ǹ'), ('Ǻ', 'Ǻ'), ('Ǽ', 'Ǽ'), ('Ǿ', 'Ǿ'),
+  ('Ȁ', 'Ȁ'), ('Ȃ', 'Ȃ'), ('Ȅ', 'Ȅ'), ('Ȇ', 'Ȇ'), ('Ȉ', 'Ȉ'),
+  ('Ȋ', 'Ȋ'), ('Ȍ', 'Ȍ'), ('Ȏ', 'Ȏ'), ('Ȑ', 'Ȑ'), ('Ȓ', 'Ȓ'),
+  ('Ȕ', 'Ȕ'), ('Ȗ', 'Ȗ'), ('Ș', 'Ș'), ('Ț', 'Ț'), ('Ȝ', 'Ȝ'),
+  ('Ȟ', 'Ȟ'), ('Ƞ', 'Ƞ'), ('Ȣ', 'Ȣ'), ('Ȥ', 'Ȥ'), ('Ȧ', 'Ȧ'),
+  ('Ȩ', 'Ȩ'), ('Ȫ', 'Ȫ'), ('Ȭ', 'Ȭ'), ('Ȯ', 'Ȯ'), ('Ȱ', 'Ȱ'),
+  ('Ȳ', 'Ȳ'), ('Ⱥ', 'Ȼ'), ('Ƚ', 'Ⱦ'), ('Ɂ', 'Ɂ'), ('Ƀ', 'Ɇ'),
+  ('Ɉ', 'Ɉ'), ('Ɋ', 'Ɋ'), ('Ɍ', 'Ɍ'), ('Ɏ', 'Ɏ'), ('Ͱ', 'Ͱ'),
+  ('Ͳ', 'Ͳ'), ('Ͷ', 'Ͷ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ώ'), ('Α', 'Ρ'), ('Σ', 'Ϋ'), ('Ϗ', 'Ϗ'),
+  ('ϒ', 'ϔ'), ('Ϙ', 'Ϙ'), ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'),
+  ('Ϡ', 'Ϡ'), ('Ϣ', 'Ϣ'), ('Ϥ', 'Ϥ'), ('Ϧ', 'Ϧ'), ('Ϩ', 'Ϩ'),
+  ('Ϫ', 'Ϫ'), ('Ϭ', 'Ϭ'), ('Ϯ', 'Ϯ'), ('ϴ', 'ϴ'), ('Ϸ', 'Ϸ'),
+  ('Ϲ', 'Ϻ'), ('Ͻ', 'Я'), ('Ѡ', 'Ѡ'), ('Ѣ', 'Ѣ'), ('Ѥ', 'Ѥ'),
+  ('Ѧ', 'Ѧ'), ('Ѩ', 'Ѩ'), ('Ѫ', 'Ѫ'), ('Ѭ', 'Ѭ'), ('Ѯ', 'Ѯ'),
+  ('Ѱ', 'Ѱ'), ('Ѳ', 'Ѳ'), ('Ѵ', 'Ѵ'), ('Ѷ', 'Ѷ'), ('Ѹ', 'Ѹ'),
+  ('Ѻ', 'Ѻ'), ('Ѽ', 'Ѽ'), ('Ѿ', 'Ѿ'), ('Ҁ', 'Ҁ'), ('Ҋ', 'Ҋ'),
+  ('Ҍ', 'Ҍ'), ('Ҏ', 'Ҏ'), ('Ґ', 'Ґ'), ('Ғ', 'Ғ'), ('Ҕ', 'Ҕ'),
+  ('Җ', 'Җ'), ('Ҙ', 'Ҙ'), ('Қ', 'Қ'), ('Ҝ', 'Ҝ'), ('Ҟ', 'Ҟ'),
+  ('Ҡ', 'Ҡ'), ('Ң', 'Ң'), ('Ҥ', 'Ҥ'), ('Ҧ', 'Ҧ'), ('Ҩ', 'Ҩ'),
+  ('Ҫ', 'Ҫ'), ('Ҭ', 'Ҭ'), ('Ү', 'Ү'), ('Ұ', 'Ұ'), ('Ҳ', 'Ҳ'),
+  ('Ҵ', 'Ҵ'), ('Ҷ', 'Ҷ'), ('Ҹ', 'Ҹ'), ('Һ', 'Һ'), ('Ҽ', 'Ҽ'),
+  ('Ҿ', 'Ҿ'), ('Ӏ', 'Ӂ'), ('Ӄ', 'Ӄ'), ('Ӆ', 'Ӆ'), ('Ӈ', 'Ӈ'),
+  ('Ӊ', 'Ӊ'), ('Ӌ', 'Ӌ'), ('Ӎ', 'Ӎ'), ('Ӑ', 'Ӑ'), ('Ӓ', 'Ӓ'),
+  ('Ӕ', 'Ӕ'), ('Ӗ', 'Ӗ'), ('Ә', 'Ә'), ('Ӛ', 'Ӛ'), ('Ӝ', 'Ӝ'),
+  ('Ӟ', 'Ӟ'), ('Ӡ', 'Ӡ'), ('Ӣ', 'Ӣ'), ('Ӥ', 'Ӥ'), ('Ӧ', 'Ӧ'),
+  ('Ө', 'Ө'), ('Ӫ', 'Ӫ'), ('Ӭ', 'Ӭ'), ('Ӯ', 'Ӯ'), ('Ӱ', 'Ӱ'),
+  ('Ӳ', 'Ӳ'), ('Ӵ', 'Ӵ'), ('Ӷ', 'Ӷ'), ('Ӹ', 'Ӹ'), ('Ӻ', 'Ӻ'),
+  ('Ӽ', 'Ӽ'), ('Ӿ', 'Ӿ'), ('Ԁ', 'Ԁ'), ('Ԃ', 'Ԃ'), ('Ԅ', 'Ԅ'),
+  ('Ԇ', 'Ԇ'), ('Ԉ', 'Ԉ'), ('Ԋ', 'Ԋ'), ('Ԍ', 'Ԍ'), ('Ԏ', 'Ԏ'),
+  ('Ԑ', 'Ԑ'), ('Ԓ', 'Ԓ'), ('Ԕ', 'Ԕ'), ('Ԗ', 'Ԗ'), ('Ԙ', 'Ԙ'),
+  ('Ԛ', 'Ԛ'), ('Ԝ', 'Ԝ'), ('Ԟ', 'Ԟ'), ('Ԡ', 'Ԡ'), ('Ԣ', 'Ԣ'),
+  ('Ԥ', 'Ԥ'), ('Ԧ', 'Ԧ'), ('Ԩ', 'Ԩ'), ('Ԫ', 'Ԫ'), ('Ԭ', 'Ԭ'),
+  ('Ԯ', 'Ԯ'), ('Ա', 'Ֆ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('Ḁ', 'Ḁ'), ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'),
+  ('Ḇ', 'Ḇ'), ('Ḉ', 'Ḉ'), ('Ḋ', 'Ḋ'), ('Ḍ', 'Ḍ'),
+  ('Ḏ', 'Ḏ'), ('Ḑ', 'Ḑ'), ('Ḓ', 'Ḓ'), ('Ḕ', 'Ḕ'),
+  ('Ḗ', 'Ḗ'), ('Ḙ', 'Ḙ'), ('Ḛ', 'Ḛ'), ('Ḝ', 'Ḝ'),
+  ('Ḟ', 'Ḟ'), ('Ḡ', 'Ḡ'), ('Ḣ', 'Ḣ'), ('Ḥ', 'Ḥ'),
+  ('Ḧ', 'Ḧ'), ('Ḩ', 'Ḩ'), ('Ḫ', 'Ḫ'), ('Ḭ', 'Ḭ'),
+  ('Ḯ', 'Ḯ'), ('Ḱ', 'Ḱ'), ('Ḳ', 'Ḳ'), ('Ḵ', 'Ḵ'),
+  ('Ḷ', 'Ḷ'), ('Ḹ', 'Ḹ'), ('Ḻ', 'Ḻ'), ('Ḽ', 'Ḽ'),
+  ('Ḿ', 'Ḿ'), ('Ṁ', 'Ṁ'), ('Ṃ', 'Ṃ'), ('Ṅ', 'Ṅ'),
+  ('Ṇ', 'Ṇ'), ('Ṉ', 'Ṉ'), ('Ṋ', 'Ṋ'), ('Ṍ', 'Ṍ'),
+  ('Ṏ', 'Ṏ'), ('Ṑ', 'Ṑ'), ('Ṓ', 'Ṓ'), ('Ṕ', 'Ṕ'),
+  ('Ṗ', 'Ṗ'), ('Ṙ', 'Ṙ'), ('Ṛ', 'Ṛ'), ('Ṝ', 'Ṝ'),
+  ('Ṟ', 'Ṟ'), ('Ṡ', 'Ṡ'), ('Ṣ', 'Ṣ'), ('Ṥ', 'Ṥ'),
+  ('Ṧ', 'Ṧ'), ('Ṩ', 'Ṩ'), ('Ṫ', 'Ṫ'), ('Ṭ', 'Ṭ'),
+  ('Ṯ', 'Ṯ'), ('Ṱ', 'Ṱ'), ('Ṳ', 'Ṳ'), ('Ṵ', 'Ṵ'),
+  ('Ṷ', 'Ṷ'), ('Ṹ', 'Ṹ'), ('Ṻ', 'Ṻ'), ('Ṽ', 'Ṽ'),
+  ('Ṿ', 'Ṿ'), ('Ẁ', 'Ẁ'), ('Ẃ', 'Ẃ'), ('Ẅ', 'Ẅ'),
+  ('Ẇ', 'Ẇ'), ('Ẉ', 'Ẉ'), ('Ẋ', 'Ẋ'), ('Ẍ', 'Ẍ'),
+  ('Ẏ', 'Ẏ'), ('Ẑ', 'Ẑ'), ('Ẓ', 'Ẓ'), ('Ẕ', 'Ẕ'),
+  ('ẞ', 'ẞ'), ('Ạ', 'Ạ'), ('Ả', 'Ả'), ('Ấ', 'Ấ'),
+  ('Ầ', 'Ầ'), ('Ẩ', 'Ẩ'), ('Ẫ', 'Ẫ'), ('Ậ', 'Ậ'),
+  ('Ắ', 'Ắ'), ('Ằ', 'Ằ'), ('Ẳ', 'Ẳ'), ('Ẵ', 'Ẵ'),
+  ('Ặ', 'Ặ'), ('Ẹ', 'Ẹ'), ('Ẻ', 'Ẻ'), ('Ẽ', 'Ẽ'),
+  ('Ế', 'Ế'), ('Ề', 'Ề'), ('Ể', 'Ể'), ('Ễ', 'Ễ'),
+  ('Ệ', 'Ệ'), ('Ỉ', 'Ỉ'), ('Ị', 'Ị'), ('Ọ', 'Ọ'),
+  ('Ỏ', 'Ỏ'), ('Ố', 'Ố'), ('Ồ', 'Ồ'), ('Ổ', 'Ổ'),
+  ('Ỗ', 'Ỗ'), ('Ộ', 'Ộ'), ('Ớ', 'Ớ'), ('Ờ', 'Ờ'),
+  ('Ở', 'Ở'), ('Ỡ', 'Ỡ'), ('Ợ', 'Ợ'), ('Ụ', 'Ụ'),
+  ('Ủ', 'Ủ'), ('Ứ', 'Ứ'), ('Ừ', 'Ừ'), ('Ử', 'Ử'),
+  ('Ữ', 'Ữ'), ('Ự', 'Ự'), ('Ỳ', 'Ỳ'), ('Ỵ', 'Ỵ'),
+  ('Ỷ', 'Ỷ'), ('Ỹ', 'Ỹ'), ('Ỻ', 'Ỻ'), ('Ỽ', 'Ỽ'),
+  ('Ỿ', 'Ỿ'), ('Ἀ', 'Ἇ'), ('Ἐ', 'Ἕ'), ('Ἠ', 'Ἧ'),
+  ('Ἰ', 'Ἷ'), ('Ὀ', 'Ὅ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'Ὗ'), ('Ὠ', 'Ὧ'), ('Ᾰ', 'Ά'),
+  ('Ὲ', 'Ή'), ('Ῐ', 'Ί'), ('Ῠ', 'Ῥ'), ('Ὸ', 'Ώ'),
+  ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℋ', 'ℍ'), ('ℐ', 'ℒ'),
+  ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'),
+  ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℰ', 'ℳ'), ('ℾ', 'ℿ'),
+  ('ⅅ', 'ⅅ'), ('Ↄ', 'Ↄ'), ('Ⰰ', 'Ⱞ'), ('Ⱡ', 'Ⱡ'),
+  ('Ɫ', 'Ɽ'), ('Ⱨ', 'Ⱨ'), ('Ⱪ', 'Ⱪ'), ('Ⱬ', 'Ⱬ'),
+  ('Ɑ', 'Ɒ'), ('Ⱳ', 'Ⱳ'), ('Ⱶ', 'Ⱶ'), ('Ȿ', 'Ⲁ'),
+  ('Ⲃ', 'Ⲃ'), ('Ⲅ', 'Ⲅ'), ('Ⲇ', 'Ⲇ'), ('Ⲉ', 'Ⲉ'),
+  ('Ⲋ', 'Ⲋ'), ('Ⲍ', 'Ⲍ'), ('Ⲏ', 'Ⲏ'), ('Ⲑ', 'Ⲑ'),
+  ('Ⲓ', 'Ⲓ'), ('Ⲕ', 'Ⲕ'), ('Ⲗ', 'Ⲗ'), ('Ⲙ', 'Ⲙ'),
+  ('Ⲛ', 'Ⲛ'), ('Ⲝ', 'Ⲝ'), ('Ⲟ', 'Ⲟ'), ('Ⲡ', 'Ⲡ'),
+  ('Ⲣ', 'Ⲣ'), ('Ⲥ', 'Ⲥ'), ('Ⲧ', 'Ⲧ'), ('Ⲩ', 'Ⲩ'),
+  ('Ⲫ', 'Ⲫ'), ('Ⲭ', 'Ⲭ'), ('Ⲯ', 'Ⲯ'), ('Ⲱ', 'Ⲱ'),
+  ('Ⲳ', 'Ⲳ'), ('Ⲵ', 'Ⲵ'), ('Ⲷ', 'Ⲷ'), ('Ⲹ', 'Ⲹ'),
+  ('Ⲻ', 'Ⲻ'), ('Ⲽ', 'Ⲽ'), ('Ⲿ', 'Ⲿ'), ('Ⳁ', 'Ⳁ'),
+  ('Ⳃ', 'Ⳃ'), ('Ⳅ', 'Ⳅ'), ('Ⳇ', 'Ⳇ'), ('Ⳉ', 'Ⳉ'),
+  ('Ⳋ', 'Ⳋ'), ('Ⳍ', 'Ⳍ'), ('Ⳏ', 'Ⳏ'), ('Ⳑ', 'Ⳑ'),
+  ('Ⳓ', 'Ⳓ'), ('Ⳕ', 'Ⳕ'), ('Ⳗ', 'Ⳗ'), ('Ⳙ', 'Ⳙ'),
+  ('Ⳛ', 'Ⳛ'), ('Ⳝ', 'Ⳝ'), ('Ⳟ', 'Ⳟ'), ('Ⳡ', 'Ⳡ'),
+  ('Ⳣ', 'Ⳣ'), ('Ⳬ', 'Ⳬ'), ('Ⳮ', 'Ⳮ'), ('Ⳳ', 'Ⳳ'),
+  ('Ꙁ', 'Ꙁ'), ('Ꙃ', 'Ꙃ'), ('Ꙅ', 'Ꙅ'), ('Ꙇ', 'Ꙇ'),
+  ('Ꙉ', 'Ꙉ'), ('Ꙋ', 'Ꙋ'), ('Ꙍ', 'Ꙍ'), ('Ꙏ', 'Ꙏ'),
+  ('Ꙑ', 'Ꙑ'), ('Ꙓ', 'Ꙓ'), ('Ꙕ', 'Ꙕ'), ('Ꙗ', 'Ꙗ'),
+  ('Ꙙ', 'Ꙙ'), ('Ꙛ', 'Ꙛ'), ('Ꙝ', 'Ꙝ'), ('Ꙟ', 'Ꙟ'),
+  ('Ꙡ', 'Ꙡ'), ('Ꙣ', 'Ꙣ'), ('Ꙥ', 'Ꙥ'), ('Ꙧ', 'Ꙧ'),
+  ('Ꙩ', 'Ꙩ'), ('Ꙫ', 'Ꙫ'), ('Ꙭ', 'Ꙭ'), ('Ꚁ', 'Ꚁ'),
+  ('Ꚃ', 'Ꚃ'), ('Ꚅ', 'Ꚅ'), ('Ꚇ', 'Ꚇ'), ('Ꚉ', 'Ꚉ'),
+  ('Ꚋ', 'Ꚋ'), ('Ꚍ', 'Ꚍ'), ('Ꚏ', 'Ꚏ'), ('Ꚑ', 'Ꚑ'),
+  ('Ꚓ', 'Ꚓ'), ('Ꚕ', 'Ꚕ'), ('Ꚗ', 'Ꚗ'), ('Ꚙ', 'Ꚙ'),
+  ('Ꚛ', 'Ꚛ'), ('Ꜣ', 'Ꜣ'), ('Ꜥ', 'Ꜥ'), ('Ꜧ', 'Ꜧ'),
+  ('Ꜩ', 'Ꜩ'), ('Ꜫ', 'Ꜫ'), ('Ꜭ', 'Ꜭ'), ('Ꜯ', 'Ꜯ'),
+  ('Ꜳ', 'Ꜳ'), ('Ꜵ', 'Ꜵ'), ('Ꜷ', 'Ꜷ'), ('Ꜹ', 'Ꜹ'),
+  ('Ꜻ', 'Ꜻ'), ('Ꜽ', 'Ꜽ'), ('Ꜿ', 'Ꜿ'), ('Ꝁ', 'Ꝁ'),
+  ('Ꝃ', 'Ꝃ'), ('Ꝅ', 'Ꝅ'), ('Ꝇ', 'Ꝇ'), ('Ꝉ', 'Ꝉ'),
+  ('Ꝋ', 'Ꝋ'), ('Ꝍ', 'Ꝍ'), ('Ꝏ', 'Ꝏ'), ('Ꝑ', 'Ꝑ'),
+  ('Ꝓ', 'Ꝓ'), ('Ꝕ', 'Ꝕ'), ('Ꝗ', 'Ꝗ'), ('Ꝙ', 'Ꝙ'),
+  ('Ꝛ', 'Ꝛ'), ('Ꝝ', 'Ꝝ'), ('Ꝟ', 'Ꝟ'), ('Ꝡ', 'Ꝡ'),
+  ('Ꝣ', 'Ꝣ'), ('Ꝥ', 'Ꝥ'), ('Ꝧ', 'Ꝧ'), ('Ꝩ', 'Ꝩ'),
+  ('Ꝫ', 'Ꝫ'), ('Ꝭ', 'Ꝭ'), ('Ꝯ', 'Ꝯ'), ('Ꝺ', 'Ꝺ'),
+  ('Ꝼ', 'Ꝼ'), ('Ᵹ', 'Ꝿ'), ('Ꞁ', 'Ꞁ'), ('Ꞃ', 'Ꞃ'),
+  ('Ꞅ', 'Ꞅ'), ('Ꞇ', 'Ꞇ'), ('Ꞌ', 'Ꞌ'), ('Ɥ', 'Ɥ'),
+  ('Ꞑ', 'Ꞑ'), ('Ꞓ', 'Ꞓ'), ('Ꞗ', 'Ꞗ'), ('Ꞙ', 'Ꞙ'),
+  ('Ꞛ', 'Ꞛ'), ('Ꞝ', 'Ꞝ'), ('Ꞟ', 'Ꞟ'), ('Ꞡ', 'Ꞡ'),
+  ('Ꞣ', 'Ꞣ'), ('Ꞥ', 'Ꞥ'), ('Ꞧ', 'Ꞧ'), ('Ꞩ', 'Ꞩ'),
+  ('Ɦ', 'Ɪ'), ('Ʞ', 'Ꞵ'), ('Ꞷ', 'Ꞷ'), ('Ａ', 'Ｚ'),
+  ('𐐀', '𐐧'), ('𐒰', '𐓓'), ('𐲀', '𐲲'), ('𑢠', '𑢿'),
+  ('𝐀', '𝐙'), ('𝐴', '𝑍'), ('𝑨', '𝒁'), ('𝒜', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒵'), ('𝓐', '𝓩'), ('𝔄', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔸', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕬', '𝖅'),
+  ('𝖠', '𝖹'), ('𝗔', '𝗭'), ('𝘈', '𝘡'), ('𝘼', '𝙕'),
+  ('𝙰', '𝚉'), ('𝚨', '𝛀'), ('𝛢', '𝛺'), ('𝜜', '𝜴'),
+  ('𝝖', '𝝮'), ('𝞐', '𝞨'), ('𝟊', '𝟊'), ('𞤀', '𞤡'),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/mod.rs
@@ -0,0 +1,9 @@
+pub mod age;
+pub mod case_folding_simple;
+pub mod general_category;
+pub mod perl_word;
+pub mod property_bool;
+pub mod property_names;
+pub mod property_values;
+pub mod script_extension;
+pub mod script;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/perl_word.rs
@@ -0,0 +1,179 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate perl-word /home/andrew/tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const PERL_WORD: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'),
+  ('º', 'º'), ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'),
+  ('ˠ', 'ˤ'), ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('̀', 'ʹ'), ('Ͷ', 'ͷ'),
+  ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('҃', 'ԯ'), ('Ա', 'Ֆ'),
+  ('ՙ', 'ՙ'), ('ա', 'և'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('א', 'ת'), ('װ', 'ײ'), ('ؐ', 'ؚ'),
+  ('ؠ', '٩'), ('ٮ', 'ۓ'), ('ە', 'ۜ'), ('۟', 'ۨ'), ('۪', 'ۼ'),
+  ('ۿ', 'ۿ'), ('ܐ', '݊'), ('ݍ', 'ޱ'), ('߀', 'ߵ'), ('ߺ', 'ߺ'),
+  ('ࠀ', '࠭'), ('ࡀ', '࡛'), ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'),
+  ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'), ('ࣣ', 'ॣ'), ('०', '९'),
+  ('ॱ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'), ('ও', 'ন'),
+  ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'), ('়', 'ৄ'),
+  ('ে', 'ৈ'), ('ো', 'ৎ'), ('ৗ', 'ৗ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৣ'), ('০', 'ৱ'), ('ৼ', 'ৼ'), ('ਁ', 'ਃ'),
+  ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'),
+  ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('਼', '਼'),
+  ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'), ('ੑ', 'ੑ'),
+  ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'), ('ઁ', 'ઃ'),
+  ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'),
+  ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'), ('ે', 'ૉ'),
+  ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૣ'), ('૦', '૯'),
+  ('ૹ', '૿'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'),
+  ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'),
+  ('଼', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'), ('ୖ', 'ୗ'),
+  ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('୦', '୯'), ('ୱ', 'ୱ'),
+  ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
+  ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
+  ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'), ('ெ', 'ை'),
+  ('ொ', '்'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'), ('௦', '௯'),
+  ('ఀ', 'ః'), ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'),
+  ('ప', 'హ'), ('ఽ', 'ౄ'), ('ె', 'ై'), ('ొ', '్'),
+  ('ౕ', 'ౖ'), ('ౘ', 'ౚ'), ('ౠ', 'ౣ'), ('౦', '౯'),
+  ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'),
+  ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('಼', 'ೄ'), ('ೆ', 'ೈ'),
+  ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೞ', 'ೞ'), ('ೠ', 'ೣ'),
+  ('೦', '೯'), ('ೱ', 'ೲ'), ('ഀ', 'ഃ'), ('അ', 'ഌ'),
+  ('എ', 'ഐ'), ('ഒ', 'ൄ'), ('െ', 'ൈ'), ('ൊ', 'ൎ'),
+  ('ൔ', 'ൗ'), ('ൟ', 'ൣ'), ('൦', '൯'), ('ൺ', 'ൿ'),
+  ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'), ('ා', 'ු'),
+  ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'), ('ෲ', 'ෳ'),
+  ('ก', 'ฺ'), ('เ', '๎'), ('๐', '๙'), ('ກ', 'ຂ'),
+  ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'), ('ຍ', 'ຍ'),
+  ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'), ('ລ', 'ລ'),
+  ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'), ('ົ', 'ຽ'),
+  ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'), ('໐', '໙'),
+  ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('༘', '༙'), ('༠', '༩'),
+  ('༵', '༵'), ('༷', '༷'), ('༹', '༹'), ('༾', 'ཇ'),
+  ('ཉ', 'ཬ'), ('ཱ', '྄'), ('྆', 'ྗ'), ('ྙ', 'ྼ'),
+  ('࿆', '࿆'), ('က', '၉'), ('ၐ', 'ႝ'), ('Ⴀ', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'),
+  ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'),
+  ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'),
+  ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'),
+  ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('፝', '፟'),
+  ('ᎀ', 'ᎏ'), ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'),
+  ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'),
+  ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'), ('ᜠ', '᜴'), ('ᝀ', 'ᝓ'),
+  ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ᝲ', 'ᝳ'), ('ក', '៓'),
+  ('ៗ', 'ៗ'), ('ៜ', '៝'), ('០', '៩'), ('᠋', '᠍'),
+  ('᠐', '᠙'), ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'),
+  ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'), ('᥆', 'ᥭ'),
+  ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧙'),
+  ('ᨀ', 'ᨛ'), ('ᨠ', 'ᩞ'), ('᩠', '᩼'), ('᩿', '᪉'),
+  ('᪐', '᪙'), ('ᪧ', 'ᪧ'), ('᪰', '᪾'), ('ᬀ', 'ᭋ'),
+  ('᭐', '᭙'), ('᭫', '᭳'), ('ᮀ', '᯳'), ('ᰀ', '᰷'),
+  ('᱀', '᱉'), ('ᱍ', 'ᱽ'), ('ᲀ', 'ᲈ'), ('᳐', '᳒'),
+  ('᳔', '᳹'), ('ᴀ', '᷹'), ('᷻', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'),
+  ('ῐ', 'ΐ'), ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'),
+  ('ῶ', 'ῼ'), ('\u{200c}', '\u{200d}'), ('‿', '⁀'), ('⁔', '⁔'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('⃐', '⃰'),
+  ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'),
+  ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'),
+  ('K', 'ℭ'), ('ℯ', 'ℹ'), ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'),
+  ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⓐ', 'ⓩ'), ('Ⰰ', 'Ⱞ'),
+  ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳳ'), ('ⴀ', 'ⴥ'),
+  ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'),
+  ('⵿', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'),
+  ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'),
+  ('ⷘ', 'ⷞ'), ('ⷠ', 'ⷿ'), ('ⸯ', 'ⸯ'), ('々', '〇'),
+  ('〡', '〯'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('゙', '゚'), ('ゝ', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'),
+  ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'),
+  ('㐀', '䶵'), ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'),
+  ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘫ'), ('Ꙁ', '꙲'), ('ꙴ', '꙽'),
+  ('ꙿ', '꛱'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠧ'), ('ꡀ', 'ꡳ'), ('ꢀ', 'ꣅ'),
+  ('꣐', '꣙'), ('꣠', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'),
+  ('꤀', '꤭'), ('ꤰ', '꥓'), ('ꥠ', 'ꥼ'), ('ꦀ', '꧀'),
+  ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'),
+  ('꩐', '꩙'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꫂ'), ('ꫛ', 'ꫝ'),
+  ('ꫠ', 'ꫯ'), ('ꫲ', '꫶'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'),
+  ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'),
+  ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯪ'), ('꯬', '꯭'), ('꯰', '꯹'),
+  ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'),
+  ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'ﬨ'),
+  ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'),
+  ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'),
+  ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'), ('︀', '️'), ('︠', '︯'),
+  ('︳', '︴'), ('﹍', '﹏'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'),
+  ('０', '９'), ('Ａ', 'Ｚ'), ('＿', '＿'), ('ａ', 'ｚ'),
+  ('ｦ', 'ﾾ'), ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'),
+  ('ￚ', 'ￜ'), ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'),
+  ('𐀼', '𐀽'), ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'),
+  ('𐅀', '𐅴'), ('𐇽', '𐇽'), ('𐊀', '𐊜'), ('𐊠', '𐋐'),
+  ('𐋠', '𐋠'), ('𐌀', '𐌟'), ('𐌭', '𐍊'), ('𐍐', '𐍺'),
+  ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'), ('𐏑', '𐏕'),
+  ('𐐀', '𐒝'), ('𐒠', '𐒩'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'),
+  ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'),
+  ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'),
+  ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'),
+  ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨃'),
+  ('𐨅', '𐨆'), ('𐨌', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'),
+  ('𐨸', '𐨺'), ('𐨿', '𐨿'), ('𐩠', '𐩼'), ('𐪀', '𐪜'),
+  ('𐫀', '𐫇'), ('𐫉', '𐫦'), ('𐬀', '𐬵'), ('𐭀', '𐭕'),
+  ('𐭠', '𐭲'), ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐲀', '𐲲'),
+  ('𐳀', '𐳲'), ('𑀀', '𑁆'), ('𑁦', '𑁯'), ('𑁿', '𑂺'),
+  ('𑃐', '𑃨'), ('𑃰', '𑃹'), ('𑄀', '𑄴'), ('𑄶', '𑄿'),
+  ('𑅐', '𑅳'), ('𑅶', '𑅶'), ('𑆀', '𑇄'), ('𑇊', '𑇌'),
+  ('𑇐', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈷'),
+  ('𑈾', '𑈾'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋪'), ('𑋰', '𑋹'),
+  ('𑌀', '𑌃'), ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'),
+  ('𑌪', '𑌰'), ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌼', '𑍄'),
+  ('𑍇', '𑍈'), ('𑍋', '𑍍'), ('𑍐', '𑍐'), ('𑍗', '𑍗'),
+  ('𑍝', '𑍣'), ('𑍦', '𑍬'), ('𑍰', '𑍴'), ('𑐀', '𑑊'),
+  ('𑑐', '𑑙'), ('𑒀', '𑓅'), ('𑓇', '𑓇'), ('𑓐', '𑓙'),
+  ('𑖀', '𑖵'), ('𑖸', '𑗀'), ('𑗘', '𑗝'), ('𑘀', '𑙀'),
+  ('𑙄', '𑙄'), ('𑙐', '𑙙'), ('𑚀', '𑚷'), ('𑛀', '𑛉'),
+  ('𑜀', '𑜙'), ('𑜝', '𑜫'), ('𑜰', '𑜹'), ('𑢠', '𑣩'),
+  ('𑣿', '𑣿'), ('𑨀', '𑨾'), ('𑩇', '𑩇'), ('𑩐', '𑪃'),
+  ('𑪆', '𑪙'), ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰶'),
+  ('𑰸', '𑱀'), ('𑱐', '𑱙'), ('𑱲', '𑲏'), ('𑲒', '𑲧'),
+  ('𑲩', '𑲶'), ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'),
+  ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵇'), ('𑵐', '𑵙'),
+  ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), ('𓀀', '𓐮'),
+  ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'),
+  ('𖫐', '𖫭'), ('𖫰', '𖫴'), ('𖬀', '𖬶'), ('𖭀', '𖭃'),
+  ('𖭐', '𖭙'), ('𖭣', '𖭷'), ('𖭽', '𖮏'), ('𖼀', '𖽄'),
+  ('𖽐', '𖽾'), ('𖾏', '𖾟'), ('𖿠', '𖿡'), ('𗀀', '𘟬'),
+  ('𘠀', '𘫲'), ('𛀀', '𛄞'), ('𛅰', '𛋻'), ('𛰀', '𛱪'),
+  ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲝', '𛲞'),
+  ('𝅥', '𝅩'), ('𝅭', '𝅲'), ('𝅻', '𝆂'), ('𝆅', '𝆋'),
+  ('𝆪', '𝆭'), ('𝉂', '𝉄'), ('𝐀', '𝑔'), ('𝑖', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'),
+  ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'),
+  ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'),
+  ('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'),
+  ('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'),
+  ('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'),
+  ('𝟎', '𝟿'), ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'),
+  ('𝪄', '𝪄'), ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'),
+  ('𞀈', '𞀘'), ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+  ('𞠀', '𞣄'), ('𞣐', '𞣖'), ('𞤀', '𞥊'), ('𞥐', '𞥙'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉'),
+  ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'),
+  ('𬺰', '𮯠'), ('丽', '𪘀'), ('󠄀', '󠇯'),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/property_bool.rs
@@ -0,0 +1,2576 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-bool tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("ASCII_Hex_Digit", ASCII_HEX_DIGIT), ("Alphabetic", ALPHABETIC),
+  ("Bidi_Control", BIDI_CONTROL), ("Case_Ignorable", CASE_IGNORABLE),
+  ("Cased", CASED), ("Changes_When_Casefolded", CHANGES_WHEN_CASEFOLDED),
+  ("Changes_When_Casemapped", CHANGES_WHEN_CASEMAPPED),
+  ("Changes_When_Lowercased", CHANGES_WHEN_LOWERCASED),
+  ("Changes_When_Titlecased", CHANGES_WHEN_TITLECASED),
+  ("Changes_When_Uppercased", CHANGES_WHEN_UPPERCASED), ("Dash", DASH),
+  ("Default_Ignorable_Code_Point", DEFAULT_IGNORABLE_CODE_POINT),
+  ("Deprecated", DEPRECATED), ("Diacritic", DIACRITIC),
+  ("Extender", EXTENDER), ("Grapheme_Base", GRAPHEME_BASE),
+  ("Grapheme_Extend", GRAPHEME_EXTEND), ("Grapheme_Link", GRAPHEME_LINK),
+  ("Hex_Digit", HEX_DIGIT), ("Hyphen", HYPHEN),
+  ("IDS_Binary_Operator", IDS_BINARY_OPERATOR),
+  ("IDS_Trinary_Operator", IDS_TRINARY_OPERATOR),
+  ("ID_Continue", ID_CONTINUE), ("ID_Start", ID_START),
+  ("Ideographic", IDEOGRAPHIC), ("Join_Control", JOIN_CONTROL),
+  ("Logical_Order_Exception", LOGICAL_ORDER_EXCEPTION),
+  ("Lowercase", LOWERCASE), ("Math", MATH),
+  ("Noncharacter_Code_Point", NONCHARACTER_CODE_POINT),
+  ("Other_Alphabetic", OTHER_ALPHABETIC),
+  ("Other_Default_Ignorable_Code_Point", OTHER_DEFAULT_IGNORABLE_CODE_POINT),
+  ("Other_Grapheme_Extend", OTHER_GRAPHEME_EXTEND),
+  ("Other_ID_Continue", OTHER_ID_CONTINUE),
+  ("Other_ID_Start", OTHER_ID_START), ("Other_Lowercase", OTHER_LOWERCASE),
+  ("Other_Math", OTHER_MATH), ("Other_Uppercase", OTHER_UPPERCASE),
+  ("Pattern_Syntax", PATTERN_SYNTAX),
+  ("Pattern_White_Space", PATTERN_WHITE_SPACE),
+  ("Prepended_Concatenation_Mark", PREPENDED_CONCATENATION_MARK),
+  ("Quotation_Mark", QUOTATION_MARK), ("Radical", RADICAL),
+  ("Regional_Indicator", REGIONAL_INDICATOR),
+  ("Sentence_Terminal", SENTENCE_TERMINAL), ("Soft_Dotted", SOFT_DOTTED),
+  ("Terminal_Punctuation", TERMINAL_PUNCTUATION),
+  ("Unified_Ideograph", UNIFIED_IDEOGRAPH), ("Uppercase", UPPERCASE),
+  ("Variation_Selector", VARIATION_SELECTOR), ("White_Space", WHITE_SPACE),
+  ("XID_Continue", XID_CONTINUE), ("XID_Start", XID_START),
+];
+
+pub const ASCII_HEX_DIGIT: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'F'), ('a', 'f'),
+];
+
+pub const ALPHABETIC: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'),
+  ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('ͅ', 'ͅ'), ('Ͱ', 'ʹ'), ('Ͷ', 'ͷ'),
+  ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'), ('Ա', 'Ֆ'),
+  ('ՙ', 'ՙ'), ('ա', 'և'), ('ְ', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('א', 'ת'), ('װ', 'ײ'), ('ؐ', 'ؚ'),
+  ('ؠ', 'ٗ'), ('ٙ', 'ٟ'), ('ٮ', 'ۓ'), ('ە', 'ۜ'), ('ۡ', 'ۨ'),
+  ('ۭ', 'ۯ'), ('ۺ', 'ۼ'), ('ۿ', 'ۿ'), ('ܐ', 'ܿ'), ('ݍ', 'ޱ'),
+  ('ߊ', 'ߪ'), ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠀ', 'ࠗ'), ('ࠚ', 'ࠬ'),
+  ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'),
+  ('ࣔ', 'ࣟ'), ('ࣣ', 'ࣩ'), ('ࣰ', 'ऻ'), ('ऽ', 'ौ'),
+  ('ॎ', 'ॐ'), ('ॕ', 'ॣ'), ('ॱ', 'ঃ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ৄ'), ('ে', 'ৈ'), ('ো', 'ৌ'),
+  ('ৎ', 'ৎ'), ('ৗ', 'ৗ'), ('ড়', 'ঢ়'), ('য়', 'ৣ'),
+  ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਁ', 'ਃ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'),
+  ('ੋ', 'ੌ'), ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('ੰ', 'ੵ'), ('ઁ', 'ઃ'), ('અ', 'ઍ'), ('એ', 'ઑ'),
+  ('ઓ', 'ન'), ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'),
+  ('ઽ', 'ૅ'), ('ે', 'ૉ'), ('ો', 'ૌ'), ('ૐ', 'ૐ'),
+  ('ૠ', 'ૣ'), ('ૹ', 'ૼ'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('ଽ', 'ୄ'), ('େ', 'ୈ'), ('ୋ', 'ୌ'),
+  ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('ୱ', 'ୱ'),
+  ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
+  ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
+  ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'), ('ெ', 'ை'),
+  ('ொ', 'ௌ'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'), ('ఀ', 'ః'),
+  ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'),
+  ('ఽ', 'ౄ'), ('ె', 'ై'), ('ొ', 'ౌ'), ('ౕ', 'ౖ'),
+  ('ౘ', 'ౚ'), ('ౠ', 'ౣ'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'),
+  ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
+  ('ಽ', 'ೄ'), ('ೆ', 'ೈ'), ('ೊ', 'ೌ'), ('ೕ', 'ೖ'),
+  ('ೞ', 'ೞ'), ('ೠ', 'ೣ'), ('ೱ', 'ೲ'), ('ഀ', 'ഃ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'), ('ഽ', 'ൄ'),
+  ('െ', 'ൈ'), ('ൊ', 'ൌ'), ('ൎ', 'ൎ'), ('ൔ', 'ൗ'),
+  ('ൟ', 'ൣ'), ('ൺ', 'ൿ'), ('ං', 'ඃ'), ('අ', 'ඖ'),
+  ('ක', 'න'), ('ඳ', 'ර'), ('ල', 'ල'), ('ව', 'ෆ'),
+  ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('ෲ', 'ෳ'),
+  ('ก', 'ฺ'), ('เ', 'ๆ'), ('ํ', 'ํ'), ('ກ', 'ຂ'),
+  ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'), ('ຍ', 'ຍ'),
+  ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'), ('ລ', 'ລ'),
+  ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'), ('ົ', 'ຽ'),
+  ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('ໍ', 'ໍ'), ('ໜ', 'ໟ'),
+  ('ༀ', 'ༀ'), ('ཀ', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', 'ཱྀ'),
+  ('ྈ', 'ྗ'), ('ྙ', 'ྼ'), ('က', 'ံ'), ('း', 'း'),
+  ('ျ', 'ဿ'), ('ၐ', 'ၢ'), ('ၥ', 'ၨ'), ('ၮ', 'ႆ'),
+  ('ႎ', 'ႎ'), ('ႜ', 'ႝ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'),
+  ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'), ('ቊ', 'ቍ'),
+  ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኈ'),
+  ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'), ('ኸ', 'ኾ'),
+  ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'), ('ዘ', 'ጐ'),
+  ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('፟', '፟'), ('ᎀ', 'ᎏ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'),
+  ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'),
+  ('ᜎ', 'ᜓ'), ('ᜠ', 'ᜳ'), ('ᝀ', 'ᝓ'), ('ᝠ', 'ᝬ'),
+  ('ᝮ', 'ᝰ'), ('ᝲ', 'ᝳ'), ('ក', 'ឳ'), ('ា', 'ៈ'),
+  ('ៗ', 'ៗ'), ('ៜ', 'ៜ'), ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢪ'),
+  ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'), ('ᤰ', 'ᤸ'),
+  ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'),
+  ('ᨀ', 'ᨛ'), ('ᨠ', 'ᩞ'), ('ᩡ', 'ᩴ'), ('ᪧ', 'ᪧ'),
+  ('ᬀ', 'ᬳ'), ('ᬵ', 'ᭃ'), ('ᭅ', 'ᭋ'), ('ᮀ', 'ᮩ'),
+  ('ᮬ', 'ᮯ'), ('ᮺ', 'ᯥ'), ('ᯧ', 'ᯱ'), ('ᰀ', 'ᰵ'),
+  ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'), ('ᲀ', 'ᲈ'), ('ᳩ', 'ᳬ'),
+  ('ᳮ', 'ᳳ'), ('ᳵ', 'ᳶ'), ('ᴀ', 'ᶿ'), ('ᷧ', 'ᷴ'),
+  ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'),
+  ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'),
+  ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'),
+  ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℯ', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⓐ', 'ⓩ'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'),
+  ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'),
+  ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'), ('ⶀ', 'ⶖ'),
+  ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'),
+  ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'),
+  ('ⷠ', 'ⷿ'), ('ⸯ', 'ⸯ'), ('々', '〇'), ('〡', '〩'),
+  ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'), ('ゝ', 'ゟ'),
+  ('ァ', 'ヺ'), ('ー', 'ヿ'), ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'),
+  ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'), ('㐀', '䶵'), ('一', '鿪'),
+  ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘟ'),
+  ('ꘪ', 'ꘫ'), ('Ꙁ', 'ꙮ'), ('ꙴ', 'ꙻ'), ('ꙿ', 'ꛯ'),
+  ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'),
+  ('ꟷ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠧ'),
+  ('ꡀ', 'ꡳ'), ('ꢀ', 'ꣃ'), ('ꣅ', 'ꣅ'), ('ꣲ', 'ꣷ'),
+  ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('ꤊ', 'ꤪ'), ('ꤰ', 'ꥒ'),
+  ('ꥠ', 'ꥼ'), ('ꦀ', 'ꦲ'), ('ꦴ', 'ꦿ'), ('ꧏ', 'ꧏ'),
+  ('ꧠ', 'ꧤ'), ('ꧦ', 'ꧯ'), ('ꧺ', 'ꧾ'), ('ꨀ', 'ꨶ'),
+  ('ꩀ', 'ꩍ'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪾ'),
+  ('ꫀ', 'ꫀ'), ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫝ'), ('ꫠ', 'ꫯ'),
+  ('ꫲ', 'ꫵ'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'),
+  ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'),
+  ('ꭰ', 'ꯪ'), ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'),
+  ('豈', '舘'), ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'),
+  ('יִ', 'ﬨ'), ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'), ('ﹰ', 'ﹴ'),
+  ('ﹶ', 'ﻼ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'), ('ｦ', 'ﾾ'),
+  ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'),
+  ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐌀', '𐌟'), ('𐌭', '𐍊'),
+  ('𐍐', '𐍺'), ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'),
+  ('𐏑', '𐏕'), ('𐐀', '𐒝'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'),
+  ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'),
+  ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'),
+  ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'),
+  ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨃'),
+  ('𐨅', '𐨆'), ('𐨌', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'),
+  ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫤'),
+  ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'),
+  ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑀀', '𑁅'),
+  ('𑂂', '𑂸'), ('𑃐', '𑃨'), ('𑄀', '𑄲'), ('𑅐', '𑅲'),
+  ('𑅶', '𑅶'), ('𑆀', '𑆿'), ('𑇁', '𑇄'), ('𑇚', '𑇚'),
+  ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈴'), ('𑈷', '𑈷'),
+  ('𑈾', '𑈾'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋨'), ('𑌀', '𑌃'),
+  ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'),
+  ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌽', '𑍄'), ('𑍇', '𑍈'),
+  ('𑍋', '𑍌'), ('𑍐', '𑍐'), ('𑍗', '𑍗'), ('𑍝', '𑍣'),
+  ('𑐀', '𑑁'), ('𑑃', '𑑅'), ('𑑇', '𑑊'), ('𑒀', '𑓁'),
+  ('𑓄', '𑓅'), ('𑓇', '𑓇'), ('𑖀', '𑖵'), ('𑖸', '𑖾'),
+  ('𑗘', '𑗝'), ('𑘀', '𑘾'), ('𑙀', '𑙀'), ('𑙄', '𑙄'),
+  ('𑚀', '𑚵'), ('𑜀', '𑜙'), ('𑜝', '𑜪'), ('𑢠', '𑣟'),
+  ('𑣿', '𑣿'), ('𑨀', '𑨲'), ('𑨵', '𑨾'), ('𑩐', '𑪃'),
+  ('𑪆', '𑪗'), ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰶'),
+  ('𑰸', '𑰾'), ('𑱀', '𑱀'), ('𑱲', '𑲏'), ('𑲒', '𑲧'),
+  ('𑲩', '𑲶'), ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'),
+  ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵁'), ('𑵃', '𑵃'),
+  ('𑵆', '𑵇'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'),
+  ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'),
+  ('𖫐', '𖫭'), ('𖬀', '𖬶'), ('𖭀', '𖭃'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾓', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𛲞', '𛲞'), ('𝐀', '𝑔'), ('𝑖', '𝒜'),
+  ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'),
+  ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'),
+  ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'),
+  ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'),
+  ('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'),
+  ('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'),
+  ('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'),
+  ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'), ('𞀣', '𞀤'),
+  ('𞀦', '𞀪'), ('𞠀', '𞣄'), ('𞤀', '𞥃'), ('𞥇', '𞥇'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('🄰', '🅉'), ('🅐', '🅩'), ('🅰', '🆉'),
+  ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'),
+  ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const BIDI_CONTROL: &'static [(char, char)] = &[
+  ('\u{61c}', '\u{61c}'), ('\u{200e}', '\u{200f}'), ('\u{202a}', '\u{202e}'),
+  ('\u{2066}', '\u{2069}'),
+];
+
+pub const CASE_IGNORABLE: &'static [(char, char)] = &[
+  ('\'', '\''), ('.', '.'), (':', ':'), ('^', '^'), ('`', '`'), ('¨', '¨'),
+  ('\u{ad}', '\u{ad}'), ('¯', '¯'), ('´', '´'), ('·', '¸'),
+  ('ʰ', 'ͯ'), ('ʹ', '͵'), ('ͺ', 'ͺ'), ('΄', '΅'), ('·', '·'),
+  ('҃', '҉'), ('ՙ', 'ՙ'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('״', '״'), ('\u{600}', '\u{605}'),
+  ('ؐ', 'ؚ'), ('\u{61c}', '\u{61c}'), ('ـ', 'ـ'), ('ً', 'ٟ'),
+  ('ٰ', 'ٰ'), ('ۖ', '\u{6dd}'), ('۟', 'ۨ'), ('۪', 'ۭ'),
+  ('\u{70f}', '\u{70f}'), ('ܑ', 'ܑ'), ('ܰ', '݊'), ('ަ', 'ް'),
+  ('߫', 'ߵ'), ('ߺ', 'ߺ'), ('ࠖ', '࠭'), ('࡙', '࡛'), ('ࣔ', 'ं'),
+  ('ऺ', 'ऺ'), ('़', '़'), ('ु', 'ै'), ('्', '्'),
+  ('॑', 'ॗ'), ('ॢ', 'ॣ'), ('ॱ', 'ॱ'), ('ঁ', 'ঁ'),
+  ('়', '়'), ('ু', 'ৄ'), ('্', '্'), ('ৢ', 'ৣ'),
+  ('ਁ', 'ਂ'), ('਼', '਼'), ('ੁ', 'ੂ'), ('ੇ', 'ੈ'),
+  ('ੋ', '੍'), ('ੑ', 'ੑ'), ('ੰ', 'ੱ'), ('ੵ', 'ੵ'),
+  ('ઁ', 'ં'), ('઼', '઼'), ('ુ', 'ૅ'), ('ે', 'ૈ'),
+  ('્', '્'), ('ૢ', 'ૣ'), ('ૺ', '૿'), ('ଁ', 'ଁ'),
+  ('଼', '଼'), ('ି', 'ି'), ('ୁ', 'ୄ'), ('୍', '୍'),
+  ('ୖ', 'ୖ'), ('ୢ', 'ୣ'), ('ஂ', 'ஂ'), ('ீ', 'ீ'),
+  ('்', '்'), ('ఀ', 'ఀ'), ('ా', 'ీ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౢ', 'ౣ'), ('ಁ', 'ಁ'),
+  ('಼', '಼'), ('ಿ', 'ಿ'), ('ೆ', 'ೆ'), ('ೌ', '್'),
+  ('ೢ', 'ೣ'), ('ഀ', 'ഁ'), ('഻', '഼'), ('ു', 'ൄ'),
+  ('്', '്'), ('ൢ', 'ൣ'), ('්', '්'), ('ි', 'ු'),
+  ('ූ', 'ූ'), ('ั', 'ั'), ('ิ', 'ฺ'), ('ๆ', '๎'),
+  ('ັ', 'ັ'), ('ິ', 'ູ'), ('ົ', 'ຼ'), ('ໆ', 'ໆ'),
+  ('່', 'ໍ'), ('༘', '༙'), ('༵', '༵'), ('༷', '༷'),
+  ('༹', '༹'), ('ཱ', 'ཾ'), ('ྀ', '྄'), ('྆', '྇'),
+  ('ྍ', 'ྗ'), ('ྙ', 'ྼ'), ('࿆', '࿆'), ('ိ', 'ူ'),
+  ('ဲ', '့'), ('္', '်'), ('ွ', 'ှ'), ('ၘ', 'ၙ'),
+  ('ၞ', 'ၠ'), ('ၱ', 'ၴ'), ('ႂ', 'ႂ'), ('ႅ', 'ႆ'),
+  ('ႍ', 'ႍ'), ('ႝ', 'ႝ'), ('ჼ', 'ჼ'), ('፝', '፟'),
+  ('ᜒ', '᜔'), ('ᜲ', '᜴'), ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'),
+  ('឴', '឵'), ('ិ', 'ួ'), ('ំ', 'ំ'), ('៉', '៓'),
+  ('ៗ', 'ៗ'), ('៝', '៝'), ('᠋', '\u{180e}'), ('ᡃ', 'ᡃ'),
+  ('ᢅ', 'ᢆ'), ('ᢩ', 'ᢩ'), ('ᤠ', 'ᤢ'), ('ᤧ', 'ᤨ'),
+  ('ᤲ', 'ᤲ'), ('᤹', '᤻'), ('ᨗ', 'ᨘ'), ('ᨛ', 'ᨛ'),
+  ('ᩖ', 'ᩖ'), ('ᩘ', 'ᩞ'), ('᩠', '᩠'), ('ᩢ', 'ᩢ'),
+  ('ᩥ', 'ᩬ'), ('ᩳ', '᩼'), ('᩿', '᩿'), ('ᪧ', 'ᪧ'),
+  ('᪰', '᪾'), ('ᬀ', 'ᬃ'), ('᬴', '᬴'), ('ᬶ', 'ᬺ'),
+  ('ᬼ', 'ᬼ'), ('ᭂ', 'ᭂ'), ('᭫', '᭳'), ('ᮀ', 'ᮁ'),
+  ('ᮢ', 'ᮥ'), ('ᮨ', 'ᮩ'), ('᮫', 'ᮭ'), ('᯦', '᯦'),
+  ('ᯨ', 'ᯩ'), ('ᯭ', 'ᯭ'), ('ᯯ', 'ᯱ'), ('ᰬ', 'ᰳ'),
+  ('ᰶ', '᰷'), ('ᱸ', 'ᱽ'), ('᳐', '᳒'), ('᳔', '᳠'),
+  ('᳢', '᳨'), ('᳭', '᳭'), ('᳴', '᳴'), ('᳸', '᳹'),
+  ('ᴬ', 'ᵪ'), ('ᵸ', 'ᵸ'), ('ᶛ', '᷹'), ('᷻', '᷿'),
+  ('᾽', '᾽'), ('᾿', '῁'), ('῍', '῏'), ('῝', '῟'),
+  ('῭', '`'), ('´', '῾'), ('\u{200b}', '\u{200f}'), ('‘', '’'),
+  ('․', '․'), ('‧', '‧'), ('\u{202a}', '\u{202e}'),
+  ('\u{2060}', '\u{2064}'), ('\u{2066}', '\u{206f}'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('⃐', '⃰'), ('ⱼ', 'ⱽ'),
+  ('⳯', '⳱'), ('ⵯ', 'ⵯ'), ('⵿', '⵿'), ('ⷠ', 'ⷿ'),
+  ('ⸯ', 'ⸯ'), ('々', '々'), ('〪', '〭'), ('〱', '〵'),
+  ('〻', '〻'), ('゙', 'ゞ'), ('ー', 'ヾ'), ('ꀕ', 'ꀕ'),
+  ('ꓸ', 'ꓽ'), ('ꘌ', 'ꘌ'), ('꙯', '꙲'), ('ꙴ', '꙽'),
+  ('ꙿ', 'ꙿ'), ('ꚜ', 'ꚟ'), ('꛰', '꛱'), ('꜀', '꜡'),
+  ('ꝰ', 'ꝰ'), ('ꞈ', '꞊'), ('ꟸ', 'ꟹ'), ('ꠂ', 'ꠂ'),
+  ('꠆', '꠆'), ('ꠋ', 'ꠋ'), ('ꠥ', 'ꠦ'), ('꣄', 'ꣅ'),
+  ('꣠', '꣱'), ('ꤦ', '꤭'), ('ꥇ', 'ꥑ'), ('ꦀ', 'ꦂ'),
+  ('꦳', '꦳'), ('ꦶ', 'ꦹ'), ('ꦼ', 'ꦼ'), ('ꧏ', 'ꧏ'),
+  ('ꧥ', 'ꧦ'), ('ꨩ', 'ꨮ'), ('ꨱ', 'ꨲ'), ('ꨵ', 'ꨶ'),
+  ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩌ'), ('ꩰ', 'ꩰ'), ('ꩼ', 'ꩼ'),
+  ('ꪰ', 'ꪰ'), ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', '꪿'),
+  ('꫁', '꫁'), ('ꫝ', 'ꫝ'), ('ꫬ', 'ꫭ'), ('ꫳ', 'ꫴ'),
+  ('꫶', '꫶'), ('꭛', 'ꭟ'), ('ꯥ', 'ꯥ'), ('ꯨ', 'ꯨ'),
+  ('꯭', '꯭'), ('ﬞ', 'ﬞ'), ('﮲', '﯁'), ('︀', '️'),
+  ('︓', '︓'), ('︠', '︯'), ('﹒', '﹒'), ('﹕', '﹕'),
+  ('\u{feff}', '\u{feff}'), ('＇', '＇'), ('．', '．'), ('：', '：'),
+  ('＾', '＾'), ('｀', '｀'), ('ｰ', 'ｰ'), ('ﾞ', 'ﾟ'),
+  ('￣', '￣'), ('\u{fff9}', '\u{fffb}'), ('𐇽', '𐇽'),
+  ('𐋠', '𐋠'), ('𐍶', '𐍺'), ('𐨁', '𐨃'), ('𐨅', '𐨆'),
+  ('𐨌', '𐨏'), ('𐨸', '𐨺'), ('𐨿', '𐨿'), ('𐫥', '𐫦'),
+  ('𑀁', '𑀁'), ('𑀸', '𑁆'), ('𑁿', '𑂁'), ('𑂳', '𑂶'),
+  ('𑂹', '𑂺'), ('\u{110bd}', '\u{110bd}'), ('𑄀', '𑄂'),
+  ('𑄧', '𑄫'), ('𑄭', '𑄴'), ('𑅳', '𑅳'), ('𑆀', '𑆁'),
+  ('𑆶', '𑆾'), ('𑇊', '𑇌'), ('𑈯', '𑈱'), ('𑈴', '𑈴'),
+  ('𑈶', '𑈷'), ('𑈾', '𑈾'), ('𑋟', '𑋟'), ('𑋣', '𑋪'),
+  ('𑌀', '𑌁'), ('𑌼', '𑌼'), ('𑍀', '𑍀'), ('𑍦', '𑍬'),
+  ('𑍰', '𑍴'), ('𑐸', '𑐿'), ('𑑂', '𑑄'), ('𑑆', '𑑆'),
+  ('𑒳', '𑒸'), ('𑒺', '𑒺'), ('𑒿', '𑓀'), ('𑓂', '𑓃'),
+  ('𑖲', '𑖵'), ('𑖼', '𑖽'), ('𑖿', '𑗀'), ('𑗜', '𑗝'),
+  ('𑘳', '𑘺'), ('𑘽', '𑘽'), ('𑘿', '𑙀'), ('𑚫', '𑚫'),
+  ('𑚭', '𑚭'), ('𑚰', '𑚵'), ('𑚷', '𑚷'), ('𑜝', '𑜟'),
+  ('𑜢', '𑜥'), ('𑜧', '𑜫'), ('𑨁', '𑨆'), ('𑨉', '𑨊'),
+  ('𑨳', '𑨸'), ('𑨻', '𑨾'), ('𑩇', '𑩇'), ('𑩑', '𑩖'),
+  ('𑩙', '𑩛'), ('𑪊', '𑪖'), ('𑪘', '𑪙'), ('𑰰', '𑰶'),
+  ('𑰸', '𑰽'), ('𑰿', '𑰿'), ('𑲒', '𑲧'), ('𑲪', '𑲰'),
+  ('𑲲', '𑲳'), ('𑲵', '𑲶'), ('𑴱', '𑴶'), ('𑴺', '𑴺'),
+  ('𑴼', '𑴽'), ('𑴿', '𑵅'), ('𑵇', '𑵇'), ('𖫰', '𖫴'),
+  ('𖬰', '𖬶'), ('𖭀', '𖭃'), ('𖾏', '𖾟'), ('𖿠', '𖿡'),
+  ('𛲝', '𛲞'), ('\u{1bca0}', '\u{1bca3}'), ('𝅧', '𝅩'),
+  ('\u{1d173}', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𝉂', '𝉄'),
+  ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'),
+  ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'),
+  ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞣐', '𞣖'),
+  ('𞥄', '𞥊'), ('🏻', '🏿'), ('\u{e0001}', '\u{e0001}'),
+  ('\u{e0020}', '\u{e007f}'), ('󠄀', '󠇯'),
+];
+
+pub const CASED: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ƺ'), ('Ƽ', 'ƿ'), ('Ǆ', 'ʓ'),
+  ('ʕ', 'ʸ'), ('ˀ', 'ˁ'), ('ˠ', 'ˤ'), ('ͅ', 'ͅ'), ('Ͱ', 'ͳ'),
+  ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ա', 'և'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᶿ'),
+  ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'),
+  ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'),
+  ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'),
+  ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℯ', 'ℴ'),
+  ('ℹ', 'ℹ'), ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'),
+  ('Ⅰ', 'ⅿ'), ('Ↄ', 'ↄ'), ('Ⓐ', 'ⓩ'), ('Ⰰ', 'Ⱞ'),
+  ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳮ'), ('Ⳳ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('Ꙁ', 'ꙭ'),
+  ('Ꚁ', 'ꚝ'), ('Ꜣ', 'ꞇ'), ('Ꞌ', 'ꞎ'), ('Ꞑ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟸ', 'ꟺ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'),
+  ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('Ａ', 'Ｚ'),
+  ('ａ', 'ｚ'), ('𐐀', '𐑏'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑢠', '𑣟'), ('𝐀', '𝑔'),
+  ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'),
+  ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝛀'), ('𝛂', '𝛚'),
+  ('𝛜', '𝛺'), ('𝛼', '𝜔'), ('𝜖', '𝜴'), ('𝜶', '𝝎'),
+  ('𝝐', '𝝮'), ('𝝰', '𝞈'), ('𝞊', '𝞨'), ('𝞪', '𝟂'),
+  ('𝟄', '𝟋'), ('𞤀', '𞥃'), ('🄰', '🅉'), ('🅐', '🅩'),
+  ('🅰', '🆉'),
+];
+
+pub const CHANGES_WHEN_CASEFOLDED: &'static [(char, char)] = &[
+  ('A', 'Z'), ('µ', 'µ'), ('À', 'Ö'), ('Ø', 'ß'), ('Ā', 'Ā'),
+  ('Ă', 'Ă'), ('Ą', 'Ą'), ('Ć', 'Ć'), ('Ĉ', 'Ĉ'), ('Ċ', 'Ċ'),
+  ('Č', 'Č'), ('Ď', 'Ď'), ('Đ', 'Đ'), ('Ē', 'Ē'), ('Ĕ', 'Ĕ'),
+  ('Ė', 'Ė'), ('Ę', 'Ę'), ('Ě', 'Ě'), ('Ĝ', 'Ĝ'), ('Ğ', 'Ğ'),
+  ('Ġ', 'Ġ'), ('Ģ', 'Ģ'), ('Ĥ', 'Ĥ'), ('Ħ', 'Ħ'), ('Ĩ', 'Ĩ'),
+  ('Ī', 'Ī'), ('Ĭ', 'Ĭ'), ('Į', 'Į'), ('İ', 'İ'), ('Ĳ', 'Ĳ'),
+  ('Ĵ', 'Ĵ'), ('Ķ', 'Ķ'), ('Ĺ', 'Ĺ'), ('Ļ', 'Ļ'), ('Ľ', 'Ľ'),
+  ('Ŀ', 'Ŀ'), ('Ł', 'Ł'), ('Ń', 'Ń'), ('Ņ', 'Ņ'), ('Ň', 'Ň'),
+  ('ŉ', 'Ŋ'), ('Ō', 'Ō'), ('Ŏ', 'Ŏ'), ('Ő', 'Ő'), ('Œ', 'Œ'),
+  ('Ŕ', 'Ŕ'), ('Ŗ', 'Ŗ'), ('Ř', 'Ř'), ('Ś', 'Ś'), ('Ŝ', 'Ŝ'),
+  ('Ş', 'Ş'), ('Š', 'Š'), ('Ţ', 'Ţ'), ('Ť', 'Ť'), ('Ŧ', 'Ŧ'),
+  ('Ũ', 'Ũ'), ('Ū', 'Ū'), ('Ŭ', 'Ŭ'), ('Ů', 'Ů'), ('Ű', 'Ű'),
+  ('Ų', 'Ų'), ('Ŵ', 'Ŵ'), ('Ŷ', 'Ŷ'), ('Ÿ', 'Ź'), ('Ż', 'Ż'),
+  ('Ž', 'Ž'), ('ſ', 'ſ'), ('Ɓ', 'Ƃ'), ('Ƅ', 'Ƅ'), ('Ɔ', 'Ƈ'),
+  ('Ɖ', 'Ƌ'), ('Ǝ', 'Ƒ'), ('Ɠ', 'Ɣ'), ('Ɩ', 'Ƙ'), ('Ɯ', 'Ɲ'),
+  ('Ɵ', 'Ơ'), ('Ƣ', 'Ƣ'), ('Ƥ', 'Ƥ'), ('Ʀ', 'Ƨ'), ('Ʃ', 'Ʃ'),
+  ('Ƭ', 'Ƭ'), ('Ʈ', 'Ư'), ('Ʊ', 'Ƴ'), ('Ƶ', 'Ƶ'), ('Ʒ', 'Ƹ'),
+  ('Ƽ', 'Ƽ'), ('Ǆ', 'ǅ'), ('Ǉ', 'ǈ'), ('Ǌ', 'ǋ'), ('Ǎ', 'Ǎ'),
+  ('Ǐ', 'Ǐ'), ('Ǒ', 'Ǒ'), ('Ǔ', 'Ǔ'), ('Ǖ', 'Ǖ'), ('Ǘ', 'Ǘ'),
+  ('Ǚ', 'Ǚ'), ('Ǜ', 'Ǜ'), ('Ǟ', 'Ǟ'), ('Ǡ', 'Ǡ'), ('Ǣ', 'Ǣ'),
+  ('Ǥ', 'Ǥ'), ('Ǧ', 'Ǧ'), ('Ǩ', 'Ǩ'), ('Ǫ', 'Ǫ'), ('Ǭ', 'Ǭ'),
+  ('Ǯ', 'Ǯ'), ('Ǳ', 'ǲ'), ('Ǵ', 'Ǵ'), ('Ƕ', 'Ǹ'), ('Ǻ', 'Ǻ'),
+  ('Ǽ', 'Ǽ'), ('Ǿ', 'Ǿ'), ('Ȁ', 'Ȁ'), ('Ȃ', 'Ȃ'), ('Ȅ', 'Ȅ'),
+  ('Ȇ', 'Ȇ'), ('Ȉ', 'Ȉ'), ('Ȋ', 'Ȋ'), ('Ȍ', 'Ȍ'), ('Ȏ', 'Ȏ'),
+  ('Ȑ', 'Ȑ'), ('Ȓ', 'Ȓ'), ('Ȕ', 'Ȕ'), ('Ȗ', 'Ȗ'), ('Ș', 'Ș'),
+  ('Ț', 'Ț'), ('Ȝ', 'Ȝ'), ('Ȟ', 'Ȟ'), ('Ƞ', 'Ƞ'), ('Ȣ', 'Ȣ'),
+  ('Ȥ', 'Ȥ'), ('Ȧ', 'Ȧ'), ('Ȩ', 'Ȩ'), ('Ȫ', 'Ȫ'), ('Ȭ', 'Ȭ'),
+  ('Ȯ', 'Ȯ'), ('Ȱ', 'Ȱ'), ('Ȳ', 'Ȳ'), ('Ⱥ', 'Ȼ'), ('Ƚ', 'Ⱦ'),
+  ('Ɂ', 'Ɂ'), ('Ƀ', 'Ɇ'), ('Ɉ', 'Ɉ'), ('Ɋ', 'Ɋ'), ('Ɍ', 'Ɍ'),
+  ('Ɏ', 'Ɏ'), ('ͅ', 'ͅ'), ('Ͱ', 'Ͱ'), ('Ͳ', 'Ͳ'), ('Ͷ', 'Ͷ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ώ'),
+  ('Α', 'Ρ'), ('Σ', 'Ϋ'), ('ς', 'ς'), ('Ϗ', 'ϑ'), ('ϕ', 'ϖ'),
+  ('Ϙ', 'Ϙ'), ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'), ('Ϡ', 'Ϡ'),
+  ('Ϣ', 'Ϣ'), ('Ϥ', 'Ϥ'), ('Ϧ', 'Ϧ'), ('Ϩ', 'Ϩ'), ('Ϫ', 'Ϫ'),
+  ('Ϭ', 'Ϭ'), ('Ϯ', 'Ϯ'), ('ϰ', 'ϱ'), ('ϴ', 'ϵ'), ('Ϸ', 'Ϸ'),
+  ('Ϲ', 'Ϻ'), ('Ͻ', 'Я'), ('Ѡ', 'Ѡ'), ('Ѣ', 'Ѣ'), ('Ѥ', 'Ѥ'),
+  ('Ѧ', 'Ѧ'), ('Ѩ', 'Ѩ'), ('Ѫ', 'Ѫ'), ('Ѭ', 'Ѭ'), ('Ѯ', 'Ѯ'),
+  ('Ѱ', 'Ѱ'), ('Ѳ', 'Ѳ'), ('Ѵ', 'Ѵ'), ('Ѷ', 'Ѷ'), ('Ѹ', 'Ѹ'),
+  ('Ѻ', 'Ѻ'), ('Ѽ', 'Ѽ'), ('Ѿ', 'Ѿ'), ('Ҁ', 'Ҁ'), ('Ҋ', 'Ҋ'),
+  ('Ҍ', 'Ҍ'), ('Ҏ', 'Ҏ'), ('Ґ', 'Ґ'), ('Ғ', 'Ғ'), ('Ҕ', 'Ҕ'),
+  ('Җ', 'Җ'), ('Ҙ', 'Ҙ'), ('Қ', 'Қ'), ('Ҝ', 'Ҝ'), ('Ҟ', 'Ҟ'),
+  ('Ҡ', 'Ҡ'), ('Ң', 'Ң'), ('Ҥ', 'Ҥ'), ('Ҧ', 'Ҧ'), ('Ҩ', 'Ҩ'),
+  ('Ҫ', 'Ҫ'), ('Ҭ', 'Ҭ'), ('Ү', 'Ү'), ('Ұ', 'Ұ'), ('Ҳ', 'Ҳ'),
+  ('Ҵ', 'Ҵ'), ('Ҷ', 'Ҷ'), ('Ҹ', 'Ҹ'), ('Һ', 'Һ'), ('Ҽ', 'Ҽ'),
+  ('Ҿ', 'Ҿ'), ('Ӏ', 'Ӂ'), ('Ӄ', 'Ӄ'), ('Ӆ', 'Ӆ'), ('Ӈ', 'Ӈ'),
+  ('Ӊ', 'Ӊ'), ('Ӌ', 'Ӌ'), ('Ӎ', 'Ӎ'), ('Ӑ', 'Ӑ'), ('Ӓ', 'Ӓ'),
+  ('Ӕ', 'Ӕ'), ('Ӗ', 'Ӗ'), ('Ә', 'Ә'), ('Ӛ', 'Ӛ'), ('Ӝ', 'Ӝ'),
+  ('Ӟ', 'Ӟ'), ('Ӡ', 'Ӡ'), ('Ӣ', 'Ӣ'), ('Ӥ', 'Ӥ'), ('Ӧ', 'Ӧ'),
+  ('Ө', 'Ө'), ('Ӫ', 'Ӫ'), ('Ӭ', 'Ӭ'), ('Ӯ', 'Ӯ'), ('Ӱ', 'Ӱ'),
+  ('Ӳ', 'Ӳ'), ('Ӵ', 'Ӵ'), ('Ӷ', 'Ӷ'), ('Ӹ', 'Ӹ'), ('Ӻ', 'Ӻ'),
+  ('Ӽ', 'Ӽ'), ('Ӿ', 'Ӿ'), ('Ԁ', 'Ԁ'), ('Ԃ', 'Ԃ'), ('Ԅ', 'Ԅ'),
+  ('Ԇ', 'Ԇ'), ('Ԉ', 'Ԉ'), ('Ԋ', 'Ԋ'), ('Ԍ', 'Ԍ'), ('Ԏ', 'Ԏ'),
+  ('Ԑ', 'Ԑ'), ('Ԓ', 'Ԓ'), ('Ԕ', 'Ԕ'), ('Ԗ', 'Ԗ'), ('Ԙ', 'Ԙ'),
+  ('Ԛ', 'Ԛ'), ('Ԝ', 'Ԝ'), ('Ԟ', 'Ԟ'), ('Ԡ', 'Ԡ'), ('Ԣ', 'Ԣ'),
+  ('Ԥ', 'Ԥ'), ('Ԧ', 'Ԧ'), ('Ԩ', 'Ԩ'), ('Ԫ', 'Ԫ'), ('Ԭ', 'Ԭ'),
+  ('Ԯ', 'Ԯ'), ('Ա', 'Ֆ'), ('և', 'և'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'),
+  ('Ⴭ', 'Ⴭ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('Ḁ', 'Ḁ'),
+  ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'), ('Ḇ', 'Ḇ'), ('Ḉ', 'Ḉ'),
+  ('Ḋ', 'Ḋ'), ('Ḍ', 'Ḍ'), ('Ḏ', 'Ḏ'), ('Ḑ', 'Ḑ'),
+  ('Ḓ', 'Ḓ'), ('Ḕ', 'Ḕ'), ('Ḗ', 'Ḗ'), ('Ḙ', 'Ḙ'),
+  ('Ḛ', 'Ḛ'), ('Ḝ', 'Ḝ'), ('Ḟ', 'Ḟ'), ('Ḡ', 'Ḡ'),
+  ('Ḣ', 'Ḣ'), ('Ḥ', 'Ḥ'), ('Ḧ', 'Ḧ'), ('Ḩ', 'Ḩ'),
+  ('Ḫ', 'Ḫ'), ('Ḭ', 'Ḭ'), ('Ḯ', 'Ḯ'), ('Ḱ', 'Ḱ'),
+  ('Ḳ', 'Ḳ'), ('Ḵ', 'Ḵ'), ('Ḷ', 'Ḷ'), ('Ḹ', 'Ḹ'),
+  ('Ḻ', 'Ḻ'), ('Ḽ', 'Ḽ'), ('Ḿ', 'Ḿ'), ('Ṁ', 'Ṁ'),
+  ('Ṃ', 'Ṃ'), ('Ṅ', 'Ṅ'), ('Ṇ', 'Ṇ'), ('Ṉ', 'Ṉ'),
+  ('Ṋ', 'Ṋ'), ('Ṍ', 'Ṍ'), ('Ṏ', 'Ṏ'), ('Ṑ', 'Ṑ'),
+  ('Ṓ', 'Ṓ'), ('Ṕ', 'Ṕ'), ('Ṗ', 'Ṗ'), ('Ṙ', 'Ṙ'),
+  ('Ṛ', 'Ṛ'), ('Ṝ', 'Ṝ'), ('Ṟ', 'Ṟ'), ('Ṡ', 'Ṡ'),
+  ('Ṣ', 'Ṣ'), ('Ṥ', 'Ṥ'), ('Ṧ', 'Ṧ'), ('Ṩ', 'Ṩ'),
+  ('Ṫ', 'Ṫ'), ('Ṭ', 'Ṭ'), ('Ṯ', 'Ṯ'), ('Ṱ', 'Ṱ'),
+  ('Ṳ', 'Ṳ'), ('Ṵ', 'Ṵ'), ('Ṷ', 'Ṷ'), ('Ṹ', 'Ṹ'),
+  ('Ṻ', 'Ṻ'), ('Ṽ', 'Ṽ'), ('Ṿ', 'Ṿ'), ('Ẁ', 'Ẁ'),
+  ('Ẃ', 'Ẃ'), ('Ẅ', 'Ẅ'), ('Ẇ', 'Ẇ'), ('Ẉ', 'Ẉ'),
+  ('Ẋ', 'Ẋ'), ('Ẍ', 'Ẍ'), ('Ẏ', 'Ẏ'), ('Ẑ', 'Ẑ'),
+  ('Ẓ', 'Ẓ'), ('Ẕ', 'Ẕ'), ('ẚ', 'ẛ'), ('ẞ', 'ẞ'),
+  ('Ạ', 'Ạ'), ('Ả', 'Ả'), ('Ấ', 'Ấ'), ('Ầ', 'Ầ'),
+  ('Ẩ', 'Ẩ'), ('Ẫ', 'Ẫ'), ('Ậ', 'Ậ'), ('Ắ', 'Ắ'),
+  ('Ằ', 'Ằ'), ('Ẳ', 'Ẳ'), ('Ẵ', 'Ẵ'), ('Ặ', 'Ặ'),
+  ('Ẹ', 'Ẹ'), ('Ẻ', 'Ẻ'), ('Ẽ', 'Ẽ'), ('Ế', 'Ế'),
+  ('Ề', 'Ề'), ('Ể', 'Ể'), ('Ễ', 'Ễ'), ('Ệ', 'Ệ'),
+  ('Ỉ', 'Ỉ'), ('Ị', 'Ị'), ('Ọ', 'Ọ'), ('Ỏ', 'Ỏ'),
+  ('Ố', 'Ố'), ('Ồ', 'Ồ'), ('Ổ', 'Ổ'), ('Ỗ', 'Ỗ'),
+  ('Ộ', 'Ộ'), ('Ớ', 'Ớ'), ('Ờ', 'Ờ'), ('Ở', 'Ở'),
+  ('Ỡ', 'Ỡ'), ('Ợ', 'Ợ'), ('Ụ', 'Ụ'), ('Ủ', 'Ủ'),
+  ('Ứ', 'Ứ'), ('Ừ', 'Ừ'), ('Ử', 'Ử'), ('Ữ', 'Ữ'),
+  ('Ự', 'Ự'), ('Ỳ', 'Ỳ'), ('Ỵ', 'Ỵ'), ('Ỷ', 'Ỷ'),
+  ('Ỹ', 'Ỹ'), ('Ỻ', 'Ỻ'), ('Ỽ', 'Ỽ'), ('Ỿ', 'Ỿ'),
+  ('Ἀ', 'Ἇ'), ('Ἐ', 'Ἕ'), ('Ἠ', 'Ἧ'), ('Ἰ', 'Ἷ'),
+  ('Ὀ', 'Ὅ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'Ὗ'), ('Ὠ', 'Ὧ'), ('ᾀ', 'ᾯ'), ('ᾲ', 'ᾴ'),
+  ('ᾷ', 'ᾼ'), ('ῂ', 'ῄ'), ('ῇ', 'ῌ'), ('Ῐ', 'Ί'),
+  ('Ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῷ', 'ῼ'), ('Ω', 'Ω'),
+  ('K', 'Å'), ('Ⅎ', 'Ⅎ'), ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'),
+  ('Ⓐ', 'Ⓩ'), ('Ⰰ', 'Ⱞ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'),
+  ('Ⱨ', 'Ⱨ'), ('Ⱪ', 'Ⱪ'), ('Ⱬ', 'Ⱬ'), ('Ɑ', 'Ɒ'),
+  ('Ⱳ', 'Ⱳ'), ('Ⱶ', 'Ⱶ'), ('Ȿ', 'Ⲁ'), ('Ⲃ', 'Ⲃ'),
+  ('Ⲅ', 'Ⲅ'), ('Ⲇ', 'Ⲇ'), ('Ⲉ', 'Ⲉ'), ('Ⲋ', 'Ⲋ'),
+  ('Ⲍ', 'Ⲍ'), ('Ⲏ', 'Ⲏ'), ('Ⲑ', 'Ⲑ'), ('Ⲓ', 'Ⲓ'),
+  ('Ⲕ', 'Ⲕ'), ('Ⲗ', 'Ⲗ'), ('Ⲙ', 'Ⲙ'), ('Ⲛ', 'Ⲛ'),
+  ('Ⲝ', 'Ⲝ'), ('Ⲟ', 'Ⲟ'), ('Ⲡ', 'Ⲡ'), ('Ⲣ', 'Ⲣ'),
+  ('Ⲥ', 'Ⲥ'), ('Ⲧ', 'Ⲧ'), ('Ⲩ', 'Ⲩ'), ('Ⲫ', 'Ⲫ'),
+  ('Ⲭ', 'Ⲭ'), ('Ⲯ', 'Ⲯ'), ('Ⲱ', 'Ⲱ'), ('Ⲳ', 'Ⲳ'),
+  ('Ⲵ', 'Ⲵ'), ('Ⲷ', 'Ⲷ'), ('Ⲹ', 'Ⲹ'), ('Ⲻ', 'Ⲻ'),
+  ('Ⲽ', 'Ⲽ'), ('Ⲿ', 'Ⲿ'), ('Ⳁ', 'Ⳁ'), ('Ⳃ', 'Ⳃ'),
+  ('Ⳅ', 'Ⳅ'), ('Ⳇ', 'Ⳇ'), ('Ⳉ', 'Ⳉ'), ('Ⳋ', 'Ⳋ'),
+  ('Ⳍ', 'Ⳍ'), ('Ⳏ', 'Ⳏ'), ('Ⳑ', 'Ⳑ'), ('Ⳓ', 'Ⳓ'),
+  ('Ⳕ', 'Ⳕ'), ('Ⳗ', 'Ⳗ'), ('Ⳙ', 'Ⳙ'), ('Ⳛ', 'Ⳛ'),
+  ('Ⳝ', 'Ⳝ'), ('Ⳟ', 'Ⳟ'), ('Ⳡ', 'Ⳡ'), ('Ⳣ', 'Ⳣ'),
+  ('Ⳬ', 'Ⳬ'), ('Ⳮ', 'Ⳮ'), ('Ⳳ', 'Ⳳ'), ('Ꙁ', 'Ꙁ'),
+  ('Ꙃ', 'Ꙃ'), ('Ꙅ', 'Ꙅ'), ('Ꙇ', 'Ꙇ'), ('Ꙉ', 'Ꙉ'),
+  ('Ꙋ', 'Ꙋ'), ('Ꙍ', 'Ꙍ'), ('Ꙏ', 'Ꙏ'), ('Ꙑ', 'Ꙑ'),
+  ('Ꙓ', 'Ꙓ'), ('Ꙕ', 'Ꙕ'), ('Ꙗ', 'Ꙗ'), ('Ꙙ', 'Ꙙ'),
+  ('Ꙛ', 'Ꙛ'), ('Ꙝ', 'Ꙝ'), ('Ꙟ', 'Ꙟ'), ('Ꙡ', 'Ꙡ'),
+  ('Ꙣ', 'Ꙣ'), ('Ꙥ', 'Ꙥ'), ('Ꙧ', 'Ꙧ'), ('Ꙩ', 'Ꙩ'),
+  ('Ꙫ', 'Ꙫ'), ('Ꙭ', 'Ꙭ'), ('Ꚁ', 'Ꚁ'), ('Ꚃ', 'Ꚃ'),
+  ('Ꚅ', 'Ꚅ'), ('Ꚇ', 'Ꚇ'), ('Ꚉ', 'Ꚉ'), ('Ꚋ', 'Ꚋ'),
+  ('Ꚍ', 'Ꚍ'), ('Ꚏ', 'Ꚏ'), ('Ꚑ', 'Ꚑ'), ('Ꚓ', 'Ꚓ'),
+  ('Ꚕ', 'Ꚕ'), ('Ꚗ', 'Ꚗ'), ('Ꚙ', 'Ꚙ'), ('Ꚛ', 'Ꚛ'),
+  ('Ꜣ', 'Ꜣ'), ('Ꜥ', 'Ꜥ'), ('Ꜧ', 'Ꜧ'), ('Ꜩ', 'Ꜩ'),
+  ('Ꜫ', 'Ꜫ'), ('Ꜭ', 'Ꜭ'), ('Ꜯ', 'Ꜯ'), ('Ꜳ', 'Ꜳ'),
+  ('Ꜵ', 'Ꜵ'), ('Ꜷ', 'Ꜷ'), ('Ꜹ', 'Ꜹ'), ('Ꜻ', 'Ꜻ'),
+  ('Ꜽ', 'Ꜽ'), ('Ꜿ', 'Ꜿ'), ('Ꝁ', 'Ꝁ'), ('Ꝃ', 'Ꝃ'),
+  ('Ꝅ', 'Ꝅ'), ('Ꝇ', 'Ꝇ'), ('Ꝉ', 'Ꝉ'), ('Ꝋ', 'Ꝋ'),
+  ('Ꝍ', 'Ꝍ'), ('Ꝏ', 'Ꝏ'), ('Ꝑ', 'Ꝑ'), ('Ꝓ', 'Ꝓ'),
+  ('Ꝕ', 'Ꝕ'), ('Ꝗ', 'Ꝗ'), ('Ꝙ', 'Ꝙ'), ('Ꝛ', 'Ꝛ'),
+  ('Ꝝ', 'Ꝝ'), ('Ꝟ', 'Ꝟ'), ('Ꝡ', 'Ꝡ'), ('Ꝣ', 'Ꝣ'),
+  ('Ꝥ', 'Ꝥ'), ('Ꝧ', 'Ꝧ'), ('Ꝩ', 'Ꝩ'), ('Ꝫ', 'Ꝫ'),
+  ('Ꝭ', 'Ꝭ'), ('Ꝯ', 'Ꝯ'), ('Ꝺ', 'Ꝺ'), ('Ꝼ', 'Ꝼ'),
+  ('Ᵹ', 'Ꝿ'), ('Ꞁ', 'Ꞁ'), ('Ꞃ', 'Ꞃ'), ('Ꞅ', 'Ꞅ'),
+  ('Ꞇ', 'Ꞇ'), ('Ꞌ', 'Ꞌ'), ('Ɥ', 'Ɥ'), ('Ꞑ', 'Ꞑ'),
+  ('Ꞓ', 'Ꞓ'), ('Ꞗ', 'Ꞗ'), ('Ꞙ', 'Ꞙ'), ('Ꞛ', 'Ꞛ'),
+  ('Ꞝ', 'Ꞝ'), ('Ꞟ', 'Ꞟ'), ('Ꞡ', 'Ꞡ'), ('Ꞣ', 'Ꞣ'),
+  ('Ꞥ', 'Ꞥ'), ('Ꞧ', 'Ꞧ'), ('Ꞩ', 'Ꞩ'), ('Ɦ', 'Ɪ'),
+  ('Ʞ', 'Ꞵ'), ('Ꞷ', 'Ꞷ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('Ａ', 'Ｚ'), ('𐐀', '𐐧'), ('𐒰', '𐓓'),
+  ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𞤀', '𞤡'),
+];
+
+pub const CHANGES_WHEN_CASEMAPPED: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('µ', 'µ'), ('À', 'Ö'), ('Ø', 'ö'),
+  ('ø', 'ķ'), ('Ĺ', 'ƌ'), ('Ǝ', 'ƚ'), ('Ɯ', 'Ʃ'), ('Ƭ', 'ƹ'),
+  ('Ƽ', 'ƽ'), ('ƿ', 'ƿ'), ('Ǆ', 'Ƞ'), ('Ȣ', 'ȳ'), ('Ⱥ', 'ɔ'),
+  ('ɖ', 'ɗ'), ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), ('ɣ', 'ɣ'),
+  ('ɥ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), ('ɵ', 'ɵ'),
+  ('ɽ', 'ɽ'), ('ʀ', 'ʀ'), ('ʃ', 'ʃ'), ('ʇ', 'ʌ'), ('ʒ', 'ʒ'),
+  ('ʝ', 'ʞ'), ('ͅ', 'ͅ'), ('Ͱ', 'ͳ'), ('Ͷ', 'ͷ'), ('ͻ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'),
+  ('Σ', 'ϑ'), ('ϕ', 'ϵ'), ('Ϸ', 'ϻ'), ('Ͻ', 'ҁ'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ա', 'և'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᵹ', 'ᵹ'),
+  ('ᵽ', 'ᵽ'), ('Ḁ', 'ẛ'), ('ẞ', 'ẞ'), ('Ạ', 'ἕ'),
+  ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'),
+  ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'),
+  ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'),
+  ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'), ('ῠ', 'Ῥ'),
+  ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('Ω', 'Ω'), ('K', 'Å'),
+  ('Ⅎ', 'Ⅎ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ⅿ'), ('Ↄ', 'ↄ'),
+  ('Ⓐ', 'ⓩ'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'Ɒ'),
+  ('Ⱳ', 'ⱳ'), ('Ⱶ', 'ⱶ'), ('Ȿ', 'ⳣ'), ('Ⳬ', 'ⳮ'),
+  ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('Ꙁ', 'ꙭ'), ('Ꚁ', 'ꚛ'), ('Ꜣ', 'ꜯ'), ('Ꜳ', 'ꝯ'),
+  ('Ꝺ', 'ꞇ'), ('Ꞌ', 'Ɥ'), ('Ꞑ', 'ꞓ'), ('Ꞗ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'), ('𐐀', '𐑏'),
+  ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐲀', '𐲲'), ('𐳀', '𐳲'),
+  ('𑢠', '𑣟'), ('𞤀', '𞥃'),
+];
+
+pub const CHANGES_WHEN_LOWERCASED: &'static [(char, char)] = &[
+  ('A', 'Z'), ('À', 'Ö'), ('Ø', 'Þ'), ('Ā', 'Ā'), ('Ă', 'Ă'),
+  ('Ą', 'Ą'), ('Ć', 'Ć'), ('Ĉ', 'Ĉ'), ('Ċ', 'Ċ'), ('Č', 'Č'),
+  ('Ď', 'Ď'), ('Đ', 'Đ'), ('Ē', 'Ē'), ('Ĕ', 'Ĕ'), ('Ė', 'Ė'),
+  ('Ę', 'Ę'), ('Ě', 'Ě'), ('Ĝ', 'Ĝ'), ('Ğ', 'Ğ'), ('Ġ', 'Ġ'),
+  ('Ģ', 'Ģ'), ('Ĥ', 'Ĥ'), ('Ħ', 'Ħ'), ('Ĩ', 'Ĩ'), ('Ī', 'Ī'),
+  ('Ĭ', 'Ĭ'), ('Į', 'Į'), ('İ', 'İ'), ('Ĳ', 'Ĳ'), ('Ĵ', 'Ĵ'),
+  ('Ķ', 'Ķ'), ('Ĺ', 'Ĺ'), ('Ļ', 'Ļ'), ('Ľ', 'Ľ'), ('Ŀ', 'Ŀ'),
+  ('Ł', 'Ł'), ('Ń', 'Ń'), ('Ņ', 'Ņ'), ('Ň', 'Ň'), ('Ŋ', 'Ŋ'),
+  ('Ō', 'Ō'), ('Ŏ', 'Ŏ'), ('Ő', 'Ő'), ('Œ', 'Œ'), ('Ŕ', 'Ŕ'),
+  ('Ŗ', 'Ŗ'), ('Ř', 'Ř'), ('Ś', 'Ś'), ('Ŝ', 'Ŝ'), ('Ş', 'Ş'),
+  ('Š', 'Š'), ('Ţ', 'Ţ'), ('Ť', 'Ť'), ('Ŧ', 'Ŧ'), ('Ũ', 'Ũ'),
+  ('Ū', 'Ū'), ('Ŭ', 'Ŭ'), ('Ů', 'Ů'), ('Ű', 'Ű'), ('Ų', 'Ų'),
+  ('Ŵ', 'Ŵ'), ('Ŷ', 'Ŷ'), ('Ÿ', 'Ź'), ('Ż', 'Ż'), ('Ž', 'Ž'),
+  ('Ɓ', 'Ƃ'), ('Ƅ', 'Ƅ'), ('Ɔ', 'Ƈ'), ('Ɖ', 'Ƌ'), ('Ǝ', 'Ƒ'),
+  ('Ɠ', 'Ɣ'), ('Ɩ', 'Ƙ'), ('Ɯ', 'Ɲ'), ('Ɵ', 'Ơ'), ('Ƣ', 'Ƣ'),
+  ('Ƥ', 'Ƥ'), ('Ʀ', 'Ƨ'), ('Ʃ', 'Ʃ'), ('Ƭ', 'Ƭ'), ('Ʈ', 'Ư'),
+  ('Ʊ', 'Ƴ'), ('Ƶ', 'Ƶ'), ('Ʒ', 'Ƹ'), ('Ƽ', 'Ƽ'), ('Ǆ', 'ǅ'),
+  ('Ǉ', 'ǈ'), ('Ǌ', 'ǋ'), ('Ǎ', 'Ǎ'), ('Ǐ', 'Ǐ'), ('Ǒ', 'Ǒ'),
+  ('Ǔ', 'Ǔ'), ('Ǖ', 'Ǖ'), ('Ǘ', 'Ǘ'), ('Ǚ', 'Ǚ'), ('Ǜ', 'Ǜ'),
+  ('Ǟ', 'Ǟ'), ('Ǡ', 'Ǡ'), ('Ǣ', 'Ǣ'), ('Ǥ', 'Ǥ'), ('Ǧ', 'Ǧ'),
+  ('Ǩ', 'Ǩ'), ('Ǫ', 'Ǫ'), ('Ǭ', 'Ǭ'), ('Ǯ', 'Ǯ'), ('Ǳ', 'ǲ'),
+  ('Ǵ', 'Ǵ'), ('Ƕ', 'Ǹ'), ('Ǻ', 'Ǻ'), ('Ǽ', 'Ǽ'), ('Ǿ', 'Ǿ'),
+  ('Ȁ', 'Ȁ'), ('Ȃ', 'Ȃ'), ('Ȅ', 'Ȅ'), ('Ȇ', 'Ȇ'), ('Ȉ', 'Ȉ'),
+  ('Ȋ', 'Ȋ'), ('Ȍ', 'Ȍ'), ('Ȏ', 'Ȏ'), ('Ȑ', 'Ȑ'), ('Ȓ', 'Ȓ'),
+  ('Ȕ', 'Ȕ'), ('Ȗ', 'Ȗ'), ('Ș', 'Ș'), ('Ț', 'Ț'), ('Ȝ', 'Ȝ'),
+  ('Ȟ', 'Ȟ'), ('Ƞ', 'Ƞ'), ('Ȣ', 'Ȣ'), ('Ȥ', 'Ȥ'), ('Ȧ', 'Ȧ'),
+  ('Ȩ', 'Ȩ'), ('Ȫ', 'Ȫ'), ('Ȭ', 'Ȭ'), ('Ȯ', 'Ȯ'), ('Ȱ', 'Ȱ'),
+  ('Ȳ', 'Ȳ'), ('Ⱥ', 'Ȼ'), ('Ƚ', 'Ⱦ'), ('Ɂ', 'Ɂ'), ('Ƀ', 'Ɇ'),
+  ('Ɉ', 'Ɉ'), ('Ɋ', 'Ɋ'), ('Ɍ', 'Ɍ'), ('Ɏ', 'Ɏ'), ('Ͱ', 'Ͱ'),
+  ('Ͳ', 'Ͳ'), ('Ͷ', 'Ͷ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ώ'), ('Α', 'Ρ'), ('Σ', 'Ϋ'), ('Ϗ', 'Ϗ'),
+  ('Ϙ', 'Ϙ'), ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'), ('Ϡ', 'Ϡ'),
+  ('Ϣ', 'Ϣ'), ('Ϥ', 'Ϥ'), ('Ϧ', 'Ϧ'), ('Ϩ', 'Ϩ'), ('Ϫ', 'Ϫ'),
+  ('Ϭ', 'Ϭ'), ('Ϯ', 'Ϯ'), ('ϴ', 'ϴ'), ('Ϸ', 'Ϸ'), ('Ϲ', 'Ϻ'),
+  ('Ͻ', 'Я'), ('Ѡ', 'Ѡ'), ('Ѣ', 'Ѣ'), ('Ѥ', 'Ѥ'), ('Ѧ', 'Ѧ'),
+  ('Ѩ', 'Ѩ'), ('Ѫ', 'Ѫ'), ('Ѭ', 'Ѭ'), ('Ѯ', 'Ѯ'), ('Ѱ', 'Ѱ'),
+  ('Ѳ', 'Ѳ'), ('Ѵ', 'Ѵ'), ('Ѷ', 'Ѷ'), ('Ѹ', 'Ѹ'), ('Ѻ', 'Ѻ'),
+  ('Ѽ', 'Ѽ'), ('Ѿ', 'Ѿ'), ('Ҁ', 'Ҁ'), ('Ҋ', 'Ҋ'), ('Ҍ', 'Ҍ'),
+  ('Ҏ', 'Ҏ'), ('Ґ', 'Ґ'), ('Ғ', 'Ғ'), ('Ҕ', 'Ҕ'), ('Җ', 'Җ'),
+  ('Ҙ', 'Ҙ'), ('Қ', 'Қ'), ('Ҝ', 'Ҝ'), ('Ҟ', 'Ҟ'), ('Ҡ', 'Ҡ'),
+  ('Ң', 'Ң'), ('Ҥ', 'Ҥ'), ('Ҧ', 'Ҧ'), ('Ҩ', 'Ҩ'), ('Ҫ', 'Ҫ'),
+  ('Ҭ', 'Ҭ'), ('Ү', 'Ү'), ('Ұ', 'Ұ'), ('Ҳ', 'Ҳ'), ('Ҵ', 'Ҵ'),
+  ('Ҷ', 'Ҷ'), ('Ҹ', 'Ҹ'), ('Һ', 'Һ'), ('Ҽ', 'Ҽ'), ('Ҿ', 'Ҿ'),
+  ('Ӏ', 'Ӂ'), ('Ӄ', 'Ӄ'), ('Ӆ', 'Ӆ'), ('Ӈ', 'Ӈ'), ('Ӊ', 'Ӊ'),
+  ('Ӌ', 'Ӌ'), ('Ӎ', 'Ӎ'), ('Ӑ', 'Ӑ'), ('Ӓ', 'Ӓ'), ('Ӕ', 'Ӕ'),
+  ('Ӗ', 'Ӗ'), ('Ә', 'Ә'), ('Ӛ', 'Ӛ'), ('Ӝ', 'Ӝ'), ('Ӟ', 'Ӟ'),
+  ('Ӡ', 'Ӡ'), ('Ӣ', 'Ӣ'), ('Ӥ', 'Ӥ'), ('Ӧ', 'Ӧ'), ('Ө', 'Ө'),
+  ('Ӫ', 'Ӫ'), ('Ӭ', 'Ӭ'), ('Ӯ', 'Ӯ'), ('Ӱ', 'Ӱ'), ('Ӳ', 'Ӳ'),
+  ('Ӵ', 'Ӵ'), ('Ӷ', 'Ӷ'), ('Ӹ', 'Ӹ'), ('Ӻ', 'Ӻ'), ('Ӽ', 'Ӽ'),
+  ('Ӿ', 'Ӿ'), ('Ԁ', 'Ԁ'), ('Ԃ', 'Ԃ'), ('Ԅ', 'Ԅ'), ('Ԇ', 'Ԇ'),
+  ('Ԉ', 'Ԉ'), ('Ԋ', 'Ԋ'), ('Ԍ', 'Ԍ'), ('Ԏ', 'Ԏ'), ('Ԑ', 'Ԑ'),
+  ('Ԓ', 'Ԓ'), ('Ԕ', 'Ԕ'), ('Ԗ', 'Ԗ'), ('Ԙ', 'Ԙ'), ('Ԛ', 'Ԛ'),
+  ('Ԝ', 'Ԝ'), ('Ԟ', 'Ԟ'), ('Ԡ', 'Ԡ'), ('Ԣ', 'Ԣ'), ('Ԥ', 'Ԥ'),
+  ('Ԧ', 'Ԧ'), ('Ԩ', 'Ԩ'), ('Ԫ', 'Ԫ'), ('Ԭ', 'Ԭ'), ('Ԯ', 'Ԯ'),
+  ('Ա', 'Ֆ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('Ḁ', 'Ḁ'), ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'),
+  ('Ḇ', 'Ḇ'), ('Ḉ', 'Ḉ'), ('Ḋ', 'Ḋ'), ('Ḍ', 'Ḍ'),
+  ('Ḏ', 'Ḏ'), ('Ḑ', 'Ḑ'), ('Ḓ', 'Ḓ'), ('Ḕ', 'Ḕ'),
+  ('Ḗ', 'Ḗ'), ('Ḙ', 'Ḙ'), ('Ḛ', 'Ḛ'), ('Ḝ', 'Ḝ'),
+  ('Ḟ', 'Ḟ'), ('Ḡ', 'Ḡ'), ('Ḣ', 'Ḣ'), ('Ḥ', 'Ḥ'),
+  ('Ḧ', 'Ḧ'), ('Ḩ', 'Ḩ'), ('Ḫ', 'Ḫ'), ('Ḭ', 'Ḭ'),
+  ('Ḯ', 'Ḯ'), ('Ḱ', 'Ḱ'), ('Ḳ', 'Ḳ'), ('Ḵ', 'Ḵ'),
+  ('Ḷ', 'Ḷ'), ('Ḹ', 'Ḹ'), ('Ḻ', 'Ḻ'), ('Ḽ', 'Ḽ'),
+  ('Ḿ', 'Ḿ'), ('Ṁ', 'Ṁ'), ('Ṃ', 'Ṃ'), ('Ṅ', 'Ṅ'),
+  ('Ṇ', 'Ṇ'), ('Ṉ', 'Ṉ'), ('Ṋ', 'Ṋ'), ('Ṍ', 'Ṍ'),
+  ('Ṏ', 'Ṏ'), ('Ṑ', 'Ṑ'), ('Ṓ', 'Ṓ'), ('Ṕ', 'Ṕ'),
+  ('Ṗ', 'Ṗ'), ('Ṙ', 'Ṙ'), ('Ṛ', 'Ṛ'), ('Ṝ', 'Ṝ'),
+  ('Ṟ', 'Ṟ'), ('Ṡ', 'Ṡ'), ('Ṣ', 'Ṣ'), ('Ṥ', 'Ṥ'),
+  ('Ṧ', 'Ṧ'), ('Ṩ', 'Ṩ'), ('Ṫ', 'Ṫ'), ('Ṭ', 'Ṭ'),
+  ('Ṯ', 'Ṯ'), ('Ṱ', 'Ṱ'), ('Ṳ', 'Ṳ'), ('Ṵ', 'Ṵ'),
+  ('Ṷ', 'Ṷ'), ('Ṹ', 'Ṹ'), ('Ṻ', 'Ṻ'), ('Ṽ', 'Ṽ'),
+  ('Ṿ', 'Ṿ'), ('Ẁ', 'Ẁ'), ('Ẃ', 'Ẃ'), ('Ẅ', 'Ẅ'),
+  ('Ẇ', 'Ẇ'), ('Ẉ', 'Ẉ'), ('Ẋ', 'Ẋ'), ('Ẍ', 'Ẍ'),
+  ('Ẏ', 'Ẏ'), ('Ẑ', 'Ẑ'), ('Ẓ', 'Ẓ'), ('Ẕ', 'Ẕ'),
+  ('ẞ', 'ẞ'), ('Ạ', 'Ạ'), ('Ả', 'Ả'), ('Ấ', 'Ấ'),
+  ('Ầ', 'Ầ'), ('Ẩ', 'Ẩ'), ('Ẫ', 'Ẫ'), ('Ậ', 'Ậ'),
+  ('Ắ', 'Ắ'), ('Ằ', 'Ằ'), ('Ẳ', 'Ẳ'), ('Ẵ', 'Ẵ'),
+  ('Ặ', 'Ặ'), ('Ẹ', 'Ẹ'), ('Ẻ', 'Ẻ'), ('Ẽ', 'Ẽ'),
+  ('Ế', 'Ế'), ('Ề', 'Ề'), ('Ể', 'Ể'), ('Ễ', 'Ễ'),
+  ('Ệ', 'Ệ'), ('Ỉ', 'Ỉ'), ('Ị', 'Ị'), ('Ọ', 'Ọ'),
+  ('Ỏ', 'Ỏ'), ('Ố', 'Ố'), ('Ồ', 'Ồ'), ('Ổ', 'Ổ'),
+  ('Ỗ', 'Ỗ'), ('Ộ', 'Ộ'), ('Ớ', 'Ớ'), ('Ờ', 'Ờ'),
+  ('Ở', 'Ở'), ('Ỡ', 'Ỡ'), ('Ợ', 'Ợ'), ('Ụ', 'Ụ'),
+  ('Ủ', 'Ủ'), ('Ứ', 'Ứ'), ('Ừ', 'Ừ'), ('Ử', 'Ử'),
+  ('Ữ', 'Ữ'), ('Ự', 'Ự'), ('Ỳ', 'Ỳ'), ('Ỵ', 'Ỵ'),
+  ('Ỷ', 'Ỷ'), ('Ỹ', 'Ỹ'), ('Ỻ', 'Ỻ'), ('Ỽ', 'Ỽ'),
+  ('Ỿ', 'Ỿ'), ('Ἀ', 'Ἇ'), ('Ἐ', 'Ἕ'), ('Ἠ', 'Ἧ'),
+  ('Ἰ', 'Ἷ'), ('Ὀ', 'Ὅ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'Ὗ'), ('Ὠ', 'Ὧ'), ('ᾈ', 'ᾏ'),
+  ('ᾘ', 'ᾟ'), ('ᾨ', 'ᾯ'), ('Ᾰ', 'ᾼ'), ('Ὲ', 'ῌ'),
+  ('Ῐ', 'Ί'), ('Ῠ', 'Ῥ'), ('Ὸ', 'ῼ'), ('Ω', 'Ω'),
+  ('K', 'Å'), ('Ⅎ', 'Ⅎ'), ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'),
+  ('Ⓐ', 'Ⓩ'), ('Ⰰ', 'Ⱞ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'),
+  ('Ⱨ', 'Ⱨ'), ('Ⱪ', 'Ⱪ'), ('Ⱬ', 'Ⱬ'), ('Ɑ', 'Ɒ'),
+  ('Ⱳ', 'Ⱳ'), ('Ⱶ', 'Ⱶ'), ('Ȿ', 'Ⲁ'), ('Ⲃ', 'Ⲃ'),
+  ('Ⲅ', 'Ⲅ'), ('Ⲇ', 'Ⲇ'), ('Ⲉ', 'Ⲉ'), ('Ⲋ', 'Ⲋ'),
+  ('Ⲍ', 'Ⲍ'), ('Ⲏ', 'Ⲏ'), ('Ⲑ', 'Ⲑ'), ('Ⲓ', 'Ⲓ'),
+  ('Ⲕ', 'Ⲕ'), ('Ⲗ', 'Ⲗ'), ('Ⲙ', 'Ⲙ'), ('Ⲛ', 'Ⲛ'),
+  ('Ⲝ', 'Ⲝ'), ('Ⲟ', 'Ⲟ'), ('Ⲡ', 'Ⲡ'), ('Ⲣ', 'Ⲣ'),
+  ('Ⲥ', 'Ⲥ'), ('Ⲧ', 'Ⲧ'), ('Ⲩ', 'Ⲩ'), ('Ⲫ', 'Ⲫ'),
+  ('Ⲭ', 'Ⲭ'), ('Ⲯ', 'Ⲯ'), ('Ⲱ', 'Ⲱ'), ('Ⲳ', 'Ⲳ'),
+  ('Ⲵ', 'Ⲵ'), ('Ⲷ', 'Ⲷ'), ('Ⲹ', 'Ⲹ'), ('Ⲻ', 'Ⲻ'),
+  ('Ⲽ', 'Ⲽ'), ('Ⲿ', 'Ⲿ'), ('Ⳁ', 'Ⳁ'), ('Ⳃ', 'Ⳃ'),
+  ('Ⳅ', 'Ⳅ'), ('Ⳇ', 'Ⳇ'), ('Ⳉ', 'Ⳉ'), ('Ⳋ', 'Ⳋ'),
+  ('Ⳍ', 'Ⳍ'), ('Ⳏ', 'Ⳏ'), ('Ⳑ', 'Ⳑ'), ('Ⳓ', 'Ⳓ'),
+  ('Ⳕ', 'Ⳕ'), ('Ⳗ', 'Ⳗ'), ('Ⳙ', 'Ⳙ'), ('Ⳛ', 'Ⳛ'),
+  ('Ⳝ', 'Ⳝ'), ('Ⳟ', 'Ⳟ'), ('Ⳡ', 'Ⳡ'), ('Ⳣ', 'Ⳣ'),
+  ('Ⳬ', 'Ⳬ'), ('Ⳮ', 'Ⳮ'), ('Ⳳ', 'Ⳳ'), ('Ꙁ', 'Ꙁ'),
+  ('Ꙃ', 'Ꙃ'), ('Ꙅ', 'Ꙅ'), ('Ꙇ', 'Ꙇ'), ('Ꙉ', 'Ꙉ'),
+  ('Ꙋ', 'Ꙋ'), ('Ꙍ', 'Ꙍ'), ('Ꙏ', 'Ꙏ'), ('Ꙑ', 'Ꙑ'),
+  ('Ꙓ', 'Ꙓ'), ('Ꙕ', 'Ꙕ'), ('Ꙗ', 'Ꙗ'), ('Ꙙ', 'Ꙙ'),
+  ('Ꙛ', 'Ꙛ'), ('Ꙝ', 'Ꙝ'), ('Ꙟ', 'Ꙟ'), ('Ꙡ', 'Ꙡ'),
+  ('Ꙣ', 'Ꙣ'), ('Ꙥ', 'Ꙥ'), ('Ꙧ', 'Ꙧ'), ('Ꙩ', 'Ꙩ'),
+  ('Ꙫ', 'Ꙫ'), ('Ꙭ', 'Ꙭ'), ('Ꚁ', 'Ꚁ'), ('Ꚃ', 'Ꚃ'),
+  ('Ꚅ', 'Ꚅ'), ('Ꚇ', 'Ꚇ'), ('Ꚉ', 'Ꚉ'), ('Ꚋ', 'Ꚋ'),
+  ('Ꚍ', 'Ꚍ'), ('Ꚏ', 'Ꚏ'), ('Ꚑ', 'Ꚑ'), ('Ꚓ', 'Ꚓ'),
+  ('Ꚕ', 'Ꚕ'), ('Ꚗ', 'Ꚗ'), ('Ꚙ', 'Ꚙ'), ('Ꚛ', 'Ꚛ'),
+  ('Ꜣ', 'Ꜣ'), ('Ꜥ', 'Ꜥ'), ('Ꜧ', 'Ꜧ'), ('Ꜩ', 'Ꜩ'),
+  ('Ꜫ', 'Ꜫ'), ('Ꜭ', 'Ꜭ'), ('Ꜯ', 'Ꜯ'), ('Ꜳ', 'Ꜳ'),
+  ('Ꜵ', 'Ꜵ'), ('Ꜷ', 'Ꜷ'), ('Ꜹ', 'Ꜹ'), ('Ꜻ', 'Ꜻ'),
+  ('Ꜽ', 'Ꜽ'), ('Ꜿ', 'Ꜿ'), ('Ꝁ', 'Ꝁ'), ('Ꝃ', 'Ꝃ'),
+  ('Ꝅ', 'Ꝅ'), ('Ꝇ', 'Ꝇ'), ('Ꝉ', 'Ꝉ'), ('Ꝋ', 'Ꝋ'),
+  ('Ꝍ', 'Ꝍ'), ('Ꝏ', 'Ꝏ'), ('Ꝑ', 'Ꝑ'), ('Ꝓ', 'Ꝓ'),
+  ('Ꝕ', 'Ꝕ'), ('Ꝗ', 'Ꝗ'), ('Ꝙ', 'Ꝙ'), ('Ꝛ', 'Ꝛ'),
+  ('Ꝝ', 'Ꝝ'), ('Ꝟ', 'Ꝟ'), ('Ꝡ', 'Ꝡ'), ('Ꝣ', 'Ꝣ'),
+  ('Ꝥ', 'Ꝥ'), ('Ꝧ', 'Ꝧ'), ('Ꝩ', 'Ꝩ'), ('Ꝫ', 'Ꝫ'),
+  ('Ꝭ', 'Ꝭ'), ('Ꝯ', 'Ꝯ'), ('Ꝺ', 'Ꝺ'), ('Ꝼ', 'Ꝼ'),
+  ('Ᵹ', 'Ꝿ'), ('Ꞁ', 'Ꞁ'), ('Ꞃ', 'Ꞃ'), ('Ꞅ', 'Ꞅ'),
+  ('Ꞇ', 'Ꞇ'), ('Ꞌ', 'Ꞌ'), ('Ɥ', 'Ɥ'), ('Ꞑ', 'Ꞑ'),
+  ('Ꞓ', 'Ꞓ'), ('Ꞗ', 'Ꞗ'), ('Ꞙ', 'Ꞙ'), ('Ꞛ', 'Ꞛ'),
+  ('Ꞝ', 'Ꞝ'), ('Ꞟ', 'Ꞟ'), ('Ꞡ', 'Ꞡ'), ('Ꞣ', 'Ꞣ'),
+  ('Ꞥ', 'Ꞥ'), ('Ꞧ', 'Ꞧ'), ('Ꞩ', 'Ꞩ'), ('Ɦ', 'Ɪ'),
+  ('Ʞ', 'Ꞵ'), ('Ꞷ', 'Ꞷ'), ('Ａ', 'Ｚ'), ('𐐀', '𐐧'),
+  ('𐒰', '𐓓'), ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𞤀', '𞤡'),
+];
+
+pub const CHANGES_WHEN_TITLECASED: &'static [(char, char)] = &[
+  ('a', 'z'), ('µ', 'µ'), ('ß', 'ö'), ('ø', 'ÿ'), ('ā', 'ā'),
+  ('ă', 'ă'), ('ą', 'ą'), ('ć', 'ć'), ('ĉ', 'ĉ'), ('ċ', 'ċ'),
+  ('č', 'č'), ('ď', 'ď'), ('đ', 'đ'), ('ē', 'ē'), ('ĕ', 'ĕ'),
+  ('ė', 'ė'), ('ę', 'ę'), ('ě', 'ě'), ('ĝ', 'ĝ'), ('ğ', 'ğ'),
+  ('ġ', 'ġ'), ('ģ', 'ģ'), ('ĥ', 'ĥ'), ('ħ', 'ħ'), ('ĩ', 'ĩ'),
+  ('ī', 'ī'), ('ĭ', 'ĭ'), ('į', 'į'), ('ı', 'ı'), ('ĳ', 'ĳ'),
+  ('ĵ', 'ĵ'), ('ķ', 'ķ'), ('ĺ', 'ĺ'), ('ļ', 'ļ'), ('ľ', 'ľ'),
+  ('ŀ', 'ŀ'), ('ł', 'ł'), ('ń', 'ń'), ('ņ', 'ņ'), ('ň', 'ŉ'),
+  ('ŋ', 'ŋ'), ('ō', 'ō'), ('ŏ', 'ŏ'), ('ő', 'ő'), ('œ', 'œ'),
+  ('ŕ', 'ŕ'), ('ŗ', 'ŗ'), ('ř', 'ř'), ('ś', 'ś'), ('ŝ', 'ŝ'),
+  ('ş', 'ş'), ('š', 'š'), ('ţ', 'ţ'), ('ť', 'ť'), ('ŧ', 'ŧ'),
+  ('ũ', 'ũ'), ('ū', 'ū'), ('ŭ', 'ŭ'), ('ů', 'ů'), ('ű', 'ű'),
+  ('ų', 'ų'), ('ŵ', 'ŵ'), ('ŷ', 'ŷ'), ('ź', 'ź'), ('ż', 'ż'),
+  ('ž', 'ƀ'), ('ƃ', 'ƃ'), ('ƅ', 'ƅ'), ('ƈ', 'ƈ'), ('ƌ', 'ƌ'),
+  ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), ('ƙ', 'ƚ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'),
+  ('ƣ', 'ƣ'), ('ƥ', 'ƥ'), ('ƨ', 'ƨ'), ('ƭ', 'ƭ'), ('ư', 'ư'),
+  ('ƴ', 'ƴ'), ('ƶ', 'ƶ'), ('ƹ', 'ƹ'), ('ƽ', 'ƽ'), ('ƿ', 'ƿ'),
+  ('Ǆ', 'Ǆ'), ('ǆ', 'Ǉ'), ('ǉ', 'Ǌ'), ('ǌ', 'ǌ'), ('ǎ', 'ǎ'),
+  ('ǐ', 'ǐ'), ('ǒ', 'ǒ'), ('ǔ', 'ǔ'), ('ǖ', 'ǖ'), ('ǘ', 'ǘ'),
+  ('ǚ', 'ǚ'), ('ǜ', 'ǝ'), ('ǟ', 'ǟ'), ('ǡ', 'ǡ'), ('ǣ', 'ǣ'),
+  ('ǥ', 'ǥ'), ('ǧ', 'ǧ'), ('ǩ', 'ǩ'), ('ǫ', 'ǫ'), ('ǭ', 'ǭ'),
+  ('ǯ', 'Ǳ'), ('ǳ', 'ǳ'), ('ǵ', 'ǵ'), ('ǹ', 'ǹ'), ('ǻ', 'ǻ'),
+  ('ǽ', 'ǽ'), ('ǿ', 'ǿ'), ('ȁ', 'ȁ'), ('ȃ', 'ȃ'), ('ȅ', 'ȅ'),
+  ('ȇ', 'ȇ'), ('ȉ', 'ȉ'), ('ȋ', 'ȋ'), ('ȍ', 'ȍ'), ('ȏ', 'ȏ'),
+  ('ȑ', 'ȑ'), ('ȓ', 'ȓ'), ('ȕ', 'ȕ'), ('ȗ', 'ȗ'), ('ș', 'ș'),
+  ('ț', 'ț'), ('ȝ', 'ȝ'), ('ȟ', 'ȟ'), ('ȣ', 'ȣ'), ('ȥ', 'ȥ'),
+  ('ȧ', 'ȧ'), ('ȩ', 'ȩ'), ('ȫ', 'ȫ'), ('ȭ', 'ȭ'), ('ȯ', 'ȯ'),
+  ('ȱ', 'ȱ'), ('ȳ', 'ȳ'), ('ȼ', 'ȼ'), ('ȿ', 'ɀ'), ('ɂ', 'ɂ'),
+  ('ɇ', 'ɇ'), ('ɉ', 'ɉ'), ('ɋ', 'ɋ'), ('ɍ', 'ɍ'), ('ɏ', 'ɔ'),
+  ('ɖ', 'ɗ'), ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), ('ɣ', 'ɣ'),
+  ('ɥ', 'ɦ'), ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), ('ɵ', 'ɵ'),
+  ('ɽ', 'ɽ'), ('ʀ', 'ʀ'), ('ʃ', 'ʃ'), ('ʇ', 'ʌ'), ('ʒ', 'ʒ'),
+  ('ʝ', 'ʞ'), ('ͅ', 'ͅ'), ('ͱ', 'ͱ'), ('ͳ', 'ͳ'), ('ͷ', 'ͷ'),
+  ('ͻ', 'ͽ'), ('ΐ', 'ΐ'), ('ά', 'ώ'), ('ϐ', 'ϑ'), ('ϕ', 'ϗ'),
+  ('ϙ', 'ϙ'), ('ϛ', 'ϛ'), ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'),
+  ('ϣ', 'ϣ'), ('ϥ', 'ϥ'), ('ϧ', 'ϧ'), ('ϩ', 'ϩ'), ('ϫ', 'ϫ'),
+  ('ϭ', 'ϭ'), ('ϯ', 'ϳ'), ('ϵ', 'ϵ'), ('ϸ', 'ϸ'), ('ϻ', 'ϻ'),
+  ('а', 'џ'), ('ѡ', 'ѡ'), ('ѣ', 'ѣ'), ('ѥ', 'ѥ'), ('ѧ', 'ѧ'),
+  ('ѩ', 'ѩ'), ('ѫ', 'ѫ'), ('ѭ', 'ѭ'), ('ѯ', 'ѯ'), ('ѱ', 'ѱ'),
+  ('ѳ', 'ѳ'), ('ѵ', 'ѵ'), ('ѷ', 'ѷ'), ('ѹ', 'ѹ'), ('ѻ', 'ѻ'),
+  ('ѽ', 'ѽ'), ('ѿ', 'ѿ'), ('ҁ', 'ҁ'), ('ҋ', 'ҋ'), ('ҍ', 'ҍ'),
+  ('ҏ', 'ҏ'), ('ґ', 'ґ'), ('ғ', 'ғ'), ('ҕ', 'ҕ'), ('җ', 'җ'),
+  ('ҙ', 'ҙ'), ('қ', 'қ'), ('ҝ', 'ҝ'), ('ҟ', 'ҟ'), ('ҡ', 'ҡ'),
+  ('ң', 'ң'), ('ҥ', 'ҥ'), ('ҧ', 'ҧ'), ('ҩ', 'ҩ'), ('ҫ', 'ҫ'),
+  ('ҭ', 'ҭ'), ('ү', 'ү'), ('ұ', 'ұ'), ('ҳ', 'ҳ'), ('ҵ', 'ҵ'),
+  ('ҷ', 'ҷ'), ('ҹ', 'ҹ'), ('һ', 'һ'), ('ҽ', 'ҽ'), ('ҿ', 'ҿ'),
+  ('ӂ', 'ӂ'), ('ӄ', 'ӄ'), ('ӆ', 'ӆ'), ('ӈ', 'ӈ'), ('ӊ', 'ӊ'),
+  ('ӌ', 'ӌ'), ('ӎ', 'ӏ'), ('ӑ', 'ӑ'), ('ӓ', 'ӓ'), ('ӕ', 'ӕ'),
+  ('ӗ', 'ӗ'), ('ә', 'ә'), ('ӛ', 'ӛ'), ('ӝ', 'ӝ'), ('ӟ', 'ӟ'),
+  ('ӡ', 'ӡ'), ('ӣ', 'ӣ'), ('ӥ', 'ӥ'), ('ӧ', 'ӧ'), ('ө', 'ө'),
+  ('ӫ', 'ӫ'), ('ӭ', 'ӭ'), ('ӯ', 'ӯ'), ('ӱ', 'ӱ'), ('ӳ', 'ӳ'),
+  ('ӵ', 'ӵ'), ('ӷ', 'ӷ'), ('ӹ', 'ӹ'), ('ӻ', 'ӻ'), ('ӽ', 'ӽ'),
+  ('ӿ', 'ӿ'), ('ԁ', 'ԁ'), ('ԃ', 'ԃ'), ('ԅ', 'ԅ'), ('ԇ', 'ԇ'),
+  ('ԉ', 'ԉ'), ('ԋ', 'ԋ'), ('ԍ', 'ԍ'), ('ԏ', 'ԏ'), ('ԑ', 'ԑ'),
+  ('ԓ', 'ԓ'), ('ԕ', 'ԕ'), ('ԗ', 'ԗ'), ('ԙ', 'ԙ'), ('ԛ', 'ԛ'),
+  ('ԝ', 'ԝ'), ('ԟ', 'ԟ'), ('ԡ', 'ԡ'), ('ԣ', 'ԣ'), ('ԥ', 'ԥ'),
+  ('ԧ', 'ԧ'), ('ԩ', 'ԩ'), ('ԫ', 'ԫ'), ('ԭ', 'ԭ'), ('ԯ', 'ԯ'),
+  ('ա', 'և'), ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᵹ', 'ᵹ'),
+  ('ᵽ', 'ᵽ'), ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), ('ḅ', 'ḅ'),
+  ('ḇ', 'ḇ'), ('ḉ', 'ḉ'), ('ḋ', 'ḋ'), ('ḍ', 'ḍ'),
+  ('ḏ', 'ḏ'), ('ḑ', 'ḑ'), ('ḓ', 'ḓ'), ('ḕ', 'ḕ'),
+  ('ḗ', 'ḗ'), ('ḙ', 'ḙ'), ('ḛ', 'ḛ'), ('ḝ', 'ḝ'),
+  ('ḟ', 'ḟ'), ('ḡ', 'ḡ'), ('ḣ', 'ḣ'), ('ḥ', 'ḥ'),
+  ('ḧ', 'ḧ'), ('ḩ', 'ḩ'), ('ḫ', 'ḫ'), ('ḭ', 'ḭ'),
+  ('ḯ', 'ḯ'), ('ḱ', 'ḱ'), ('ḳ', 'ḳ'), ('ḵ', 'ḵ'),
+  ('ḷ', 'ḷ'), ('ḹ', 'ḹ'), ('ḻ', 'ḻ'), ('ḽ', 'ḽ'),
+  ('ḿ', 'ḿ'), ('ṁ', 'ṁ'), ('ṃ', 'ṃ'), ('ṅ', 'ṅ'),
+  ('ṇ', 'ṇ'), ('ṉ', 'ṉ'), ('ṋ', 'ṋ'), ('ṍ', 'ṍ'),
+  ('ṏ', 'ṏ'), ('ṑ', 'ṑ'), ('ṓ', 'ṓ'), ('ṕ', 'ṕ'),
+  ('ṗ', 'ṗ'), ('ṙ', 'ṙ'), ('ṛ', 'ṛ'), ('ṝ', 'ṝ'),
+  ('ṟ', 'ṟ'), ('ṡ', 'ṡ'), ('ṣ', 'ṣ'), ('ṥ', 'ṥ'),
+  ('ṧ', 'ṧ'), ('ṩ', 'ṩ'), ('ṫ', 'ṫ'), ('ṭ', 'ṭ'),
+  ('ṯ', 'ṯ'), ('ṱ', 'ṱ'), ('ṳ', 'ṳ'), ('ṵ', 'ṵ'),
+  ('ṷ', 'ṷ'), ('ṹ', 'ṹ'), ('ṻ', 'ṻ'), ('ṽ', 'ṽ'),
+  ('ṿ', 'ṿ'), ('ẁ', 'ẁ'), ('ẃ', 'ẃ'), ('ẅ', 'ẅ'),
+  ('ẇ', 'ẇ'), ('ẉ', 'ẉ'), ('ẋ', 'ẋ'), ('ẍ', 'ẍ'),
+  ('ẏ', 'ẏ'), ('ẑ', 'ẑ'), ('ẓ', 'ẓ'), ('ẕ', 'ẛ'),
+  ('ạ', 'ạ'), ('ả', 'ả'), ('ấ', 'ấ'), ('ầ', 'ầ'),
+  ('ẩ', 'ẩ'), ('ẫ', 'ẫ'), ('ậ', 'ậ'), ('ắ', 'ắ'),
+  ('ằ', 'ằ'), ('ẳ', 'ẳ'), ('ẵ', 'ẵ'), ('ặ', 'ặ'),
+  ('ẹ', 'ẹ'), ('ẻ', 'ẻ'), ('ẽ', 'ẽ'), ('ế', 'ế'),
+  ('ề', 'ề'), ('ể', 'ể'), ('ễ', 'ễ'), ('ệ', 'ệ'),
+  ('ỉ', 'ỉ'), ('ị', 'ị'), ('ọ', 'ọ'), ('ỏ', 'ỏ'),
+  ('ố', 'ố'), ('ồ', 'ồ'), ('ổ', 'ổ'), ('ỗ', 'ỗ'),
+  ('ộ', 'ộ'), ('ớ', 'ớ'), ('ờ', 'ờ'), ('ở', 'ở'),
+  ('ỡ', 'ỡ'), ('ợ', 'ợ'), ('ụ', 'ụ'), ('ủ', 'ủ'),
+  ('ứ', 'ứ'), ('ừ', 'ừ'), ('ử', 'ử'), ('ữ', 'ữ'),
+  ('ự', 'ự'), ('ỳ', 'ỳ'), ('ỵ', 'ỵ'), ('ỷ', 'ỷ'),
+  ('ỹ', 'ỹ'), ('ỻ', 'ỻ'), ('ỽ', 'ỽ'), ('ỿ', 'ἇ'),
+  ('ἐ', 'ἕ'), ('ἠ', 'ἧ'), ('ἰ', 'ἷ'), ('ὀ', 'ὅ'),
+  ('ὐ', 'ὗ'), ('ὠ', 'ὧ'), ('ὰ', 'ώ'), ('ᾀ', 'ᾇ'),
+  ('ᾐ', 'ᾗ'), ('ᾠ', 'ᾧ'), ('ᾰ', 'ᾴ'), ('ᾶ', 'ᾷ'),
+  ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῇ'), ('ῐ', 'ΐ'),
+  ('ῖ', 'ῗ'), ('ῠ', 'ῧ'), ('ῲ', 'ῴ'), ('ῶ', 'ῷ'),
+  ('ⅎ', 'ⅎ'), ('ⅰ', 'ⅿ'), ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'),
+  ('ⰰ', 'ⱞ'), ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'),
+  ('ⱪ', 'ⱪ'), ('ⱬ', 'ⱬ'), ('ⱳ', 'ⱳ'), ('ⱶ', 'ⱶ'),
+  ('ⲁ', 'ⲁ'), ('ⲃ', 'ⲃ'), ('ⲅ', 'ⲅ'), ('ⲇ', 'ⲇ'),
+  ('ⲉ', 'ⲉ'), ('ⲋ', 'ⲋ'), ('ⲍ', 'ⲍ'), ('ⲏ', 'ⲏ'),
+  ('ⲑ', 'ⲑ'), ('ⲓ', 'ⲓ'), ('ⲕ', 'ⲕ'), ('ⲗ', 'ⲗ'),
+  ('ⲙ', 'ⲙ'), ('ⲛ', 'ⲛ'), ('ⲝ', 'ⲝ'), ('ⲟ', 'ⲟ'),
+  ('ⲡ', 'ⲡ'), ('ⲣ', 'ⲣ'), ('ⲥ', 'ⲥ'), ('ⲧ', 'ⲧ'),
+  ('ⲩ', 'ⲩ'), ('ⲫ', 'ⲫ'), ('ⲭ', 'ⲭ'), ('ⲯ', 'ⲯ'),
+  ('ⲱ', 'ⲱ'), ('ⲳ', 'ⲳ'), ('ⲵ', 'ⲵ'), ('ⲷ', 'ⲷ'),
+  ('ⲹ', 'ⲹ'), ('ⲻ', 'ⲻ'), ('ⲽ', 'ⲽ'), ('ⲿ', 'ⲿ'),
+  ('ⳁ', 'ⳁ'), ('ⳃ', 'ⳃ'), ('ⳅ', 'ⳅ'), ('ⳇ', 'ⳇ'),
+  ('ⳉ', 'ⳉ'), ('ⳋ', 'ⳋ'), ('ⳍ', 'ⳍ'), ('ⳏ', 'ⳏ'),
+  ('ⳑ', 'ⳑ'), ('ⳓ', 'ⳓ'), ('ⳕ', 'ⳕ'), ('ⳗ', 'ⳗ'),
+  ('ⳙ', 'ⳙ'), ('ⳛ', 'ⳛ'), ('ⳝ', 'ⳝ'), ('ⳟ', 'ⳟ'),
+  ('ⳡ', 'ⳡ'), ('ⳣ', 'ⳣ'), ('ⳬ', 'ⳬ'), ('ⳮ', 'ⳮ'),
+  ('ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ꙁ', 'ꙁ'), ('ꙃ', 'ꙃ'), ('ꙅ', 'ꙅ'), ('ꙇ', 'ꙇ'),
+  ('ꙉ', 'ꙉ'), ('ꙋ', 'ꙋ'), ('ꙍ', 'ꙍ'), ('ꙏ', 'ꙏ'),
+  ('ꙑ', 'ꙑ'), ('ꙓ', 'ꙓ'), ('ꙕ', 'ꙕ'), ('ꙗ', 'ꙗ'),
+  ('ꙙ', 'ꙙ'), ('ꙛ', 'ꙛ'), ('ꙝ', 'ꙝ'), ('ꙟ', 'ꙟ'),
+  ('ꙡ', 'ꙡ'), ('ꙣ', 'ꙣ'), ('ꙥ', 'ꙥ'), ('ꙧ', 'ꙧ'),
+  ('ꙩ', 'ꙩ'), ('ꙫ', 'ꙫ'), ('ꙭ', 'ꙭ'), ('ꚁ', 'ꚁ'),
+  ('ꚃ', 'ꚃ'), ('ꚅ', 'ꚅ'), ('ꚇ', 'ꚇ'), ('ꚉ', 'ꚉ'),
+  ('ꚋ', 'ꚋ'), ('ꚍ', 'ꚍ'), ('ꚏ', 'ꚏ'), ('ꚑ', 'ꚑ'),
+  ('ꚓ', 'ꚓ'), ('ꚕ', 'ꚕ'), ('ꚗ', 'ꚗ'), ('ꚙ', 'ꚙ'),
+  ('ꚛ', 'ꚛ'), ('ꜣ', 'ꜣ'), ('ꜥ', 'ꜥ'), ('ꜧ', 'ꜧ'),
+  ('ꜩ', 'ꜩ'), ('ꜫ', 'ꜫ'), ('ꜭ', 'ꜭ'), ('ꜯ', 'ꜯ'),
+  ('ꜳ', 'ꜳ'), ('ꜵ', 'ꜵ'), ('ꜷ', 'ꜷ'), ('ꜹ', 'ꜹ'),
+  ('ꜻ', 'ꜻ'), ('ꜽ', 'ꜽ'), ('ꜿ', 'ꜿ'), ('ꝁ', 'ꝁ'),
+  ('ꝃ', 'ꝃ'), ('ꝅ', 'ꝅ'), ('ꝇ', 'ꝇ'), ('ꝉ', 'ꝉ'),
+  ('ꝋ', 'ꝋ'), ('ꝍ', 'ꝍ'), ('ꝏ', 'ꝏ'), ('ꝑ', 'ꝑ'),
+  ('ꝓ', 'ꝓ'), ('ꝕ', 'ꝕ'), ('ꝗ', 'ꝗ'), ('ꝙ', 'ꝙ'),
+  ('ꝛ', 'ꝛ'), ('ꝝ', 'ꝝ'), ('ꝟ', 'ꝟ'), ('ꝡ', 'ꝡ'),
+  ('ꝣ', 'ꝣ'), ('ꝥ', 'ꝥ'), ('ꝧ', 'ꝧ'), ('ꝩ', 'ꝩ'),
+  ('ꝫ', 'ꝫ'), ('ꝭ', 'ꝭ'), ('ꝯ', 'ꝯ'), ('ꝺ', 'ꝺ'),
+  ('ꝼ', 'ꝼ'), ('ꝿ', 'ꝿ'), ('ꞁ', 'ꞁ'), ('ꞃ', 'ꞃ'),
+  ('ꞅ', 'ꞅ'), ('ꞇ', 'ꞇ'), ('ꞌ', 'ꞌ'), ('ꞑ', 'ꞑ'),
+  ('ꞓ', 'ꞓ'), ('ꞗ', 'ꞗ'), ('ꞙ', 'ꞙ'), ('ꞛ', 'ꞛ'),
+  ('ꞝ', 'ꞝ'), ('ꞟ', 'ꞟ'), ('ꞡ', 'ꞡ'), ('ꞣ', 'ꞣ'),
+  ('ꞥ', 'ꞥ'), ('ꞧ', 'ꞧ'), ('ꞩ', 'ꞩ'), ('ꞵ', 'ꞵ'),
+  ('ꞷ', 'ꞷ'), ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'),
+  ('ﬓ', 'ﬗ'), ('ａ', 'ｚ'), ('𐐨', '𐑏'), ('𐓘', '𐓻'),
+  ('𐳀', '𐳲'), ('𑣀', '𑣟'), ('𞤢', '𞥃'),
+];
+
+pub const CHANGES_WHEN_UPPERCASED: &'static [(char, char)] = &[
+  ('a', 'z'), ('µ', 'µ'), ('ß', 'ö'), ('ø', 'ÿ'), ('ā', 'ā'),
+  ('ă', 'ă'), ('ą', 'ą'), ('ć', 'ć'), ('ĉ', 'ĉ'), ('ċ', 'ċ'),
+  ('č', 'č'), ('ď', 'ď'), ('đ', 'đ'), ('ē', 'ē'), ('ĕ', 'ĕ'),
+  ('ė', 'ė'), ('ę', 'ę'), ('ě', 'ě'), ('ĝ', 'ĝ'), ('ğ', 'ğ'),
+  ('ġ', 'ġ'), ('ģ', 'ģ'), ('ĥ', 'ĥ'), ('ħ', 'ħ'), ('ĩ', 'ĩ'),
+  ('ī', 'ī'), ('ĭ', 'ĭ'), ('į', 'į'), ('ı', 'ı'), ('ĳ', 'ĳ'),
+  ('ĵ', 'ĵ'), ('ķ', 'ķ'), ('ĺ', 'ĺ'), ('ļ', 'ļ'), ('ľ', 'ľ'),
+  ('ŀ', 'ŀ'), ('ł', 'ł'), ('ń', 'ń'), ('ņ', 'ņ'), ('ň', 'ŉ'),
+  ('ŋ', 'ŋ'), ('ō', 'ō'), ('ŏ', 'ŏ'), ('ő', 'ő'), ('œ', 'œ'),
+  ('ŕ', 'ŕ'), ('ŗ', 'ŗ'), ('ř', 'ř'), ('ś', 'ś'), ('ŝ', 'ŝ'),
+  ('ş', 'ş'), ('š', 'š'), ('ţ', 'ţ'), ('ť', 'ť'), ('ŧ', 'ŧ'),
+  ('ũ', 'ũ'), ('ū', 'ū'), ('ŭ', 'ŭ'), ('ů', 'ů'), ('ű', 'ű'),
+  ('ų', 'ų'), ('ŵ', 'ŵ'), ('ŷ', 'ŷ'), ('ź', 'ź'), ('ż', 'ż'),
+  ('ž', 'ƀ'), ('ƃ', 'ƃ'), ('ƅ', 'ƅ'), ('ƈ', 'ƈ'), ('ƌ', 'ƌ'),
+  ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), ('ƙ', 'ƚ'), ('ƞ', 'ƞ'), ('ơ', 'ơ'),
+  ('ƣ', 'ƣ'), ('ƥ', 'ƥ'), ('ƨ', 'ƨ'), ('ƭ', 'ƭ'), ('ư', 'ư'),
+  ('ƴ', 'ƴ'), ('ƶ', 'ƶ'), ('ƹ', 'ƹ'), ('ƽ', 'ƽ'), ('ƿ', 'ƿ'),
+  ('ǅ', 'ǆ'), ('ǈ', 'ǉ'), ('ǋ', 'ǌ'), ('ǎ', 'ǎ'), ('ǐ', 'ǐ'),
+  ('ǒ', 'ǒ'), ('ǔ', 'ǔ'), ('ǖ', 'ǖ'), ('ǘ', 'ǘ'), ('ǚ', 'ǚ'),
+  ('ǜ', 'ǝ'), ('ǟ', 'ǟ'), ('ǡ', 'ǡ'), ('ǣ', 'ǣ'), ('ǥ', 'ǥ'),
+  ('ǧ', 'ǧ'), ('ǩ', 'ǩ'), ('ǫ', 'ǫ'), ('ǭ', 'ǭ'), ('ǯ', 'ǰ'),
+  ('ǲ', 'ǳ'), ('ǵ', 'ǵ'), ('ǹ', 'ǹ'), ('ǻ', 'ǻ'), ('ǽ', 'ǽ'),
+  ('ǿ', 'ǿ'), ('ȁ', 'ȁ'), ('ȃ', 'ȃ'), ('ȅ', 'ȅ'), ('ȇ', 'ȇ'),
+  ('ȉ', 'ȉ'), ('ȋ', 'ȋ'), ('ȍ', 'ȍ'), ('ȏ', 'ȏ'), ('ȑ', 'ȑ'),
+  ('ȓ', 'ȓ'), ('ȕ', 'ȕ'), ('ȗ', 'ȗ'), ('ș', 'ș'), ('ț', 'ț'),
+  ('ȝ', 'ȝ'), ('ȟ', 'ȟ'), ('ȣ', 'ȣ'), ('ȥ', 'ȥ'), ('ȧ', 'ȧ'),
+  ('ȩ', 'ȩ'), ('ȫ', 'ȫ'), ('ȭ', 'ȭ'), ('ȯ', 'ȯ'), ('ȱ', 'ȱ'),
+  ('ȳ', 'ȳ'), ('ȼ', 'ȼ'), ('ȿ', 'ɀ'), ('ɂ', 'ɂ'), ('ɇ', 'ɇ'),
+  ('ɉ', 'ɉ'), ('ɋ', 'ɋ'), ('ɍ', 'ɍ'), ('ɏ', 'ɔ'), ('ɖ', 'ɗ'),
+  ('ə', 'ə'), ('ɛ', 'ɜ'), ('ɠ', 'ɡ'), ('ɣ', 'ɣ'), ('ɥ', 'ɦ'),
+  ('ɨ', 'ɬ'), ('ɯ', 'ɯ'), ('ɱ', 'ɲ'), ('ɵ', 'ɵ'), ('ɽ', 'ɽ'),
+  ('ʀ', 'ʀ'), ('ʃ', 'ʃ'), ('ʇ', 'ʌ'), ('ʒ', 'ʒ'), ('ʝ', 'ʞ'),
+  ('ͅ', 'ͅ'), ('ͱ', 'ͱ'), ('ͳ', 'ͳ'), ('ͷ', 'ͷ'), ('ͻ', 'ͽ'),
+  ('ΐ', 'ΐ'), ('ά', 'ώ'), ('ϐ', 'ϑ'), ('ϕ', 'ϗ'), ('ϙ', 'ϙ'),
+  ('ϛ', 'ϛ'), ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'), ('ϣ', 'ϣ'),
+  ('ϥ', 'ϥ'), ('ϧ', 'ϧ'), ('ϩ', 'ϩ'), ('ϫ', 'ϫ'), ('ϭ', 'ϭ'),
+  ('ϯ', 'ϳ'), ('ϵ', 'ϵ'), ('ϸ', 'ϸ'), ('ϻ', 'ϻ'), ('а', 'џ'),
+  ('ѡ', 'ѡ'), ('ѣ', 'ѣ'), ('ѥ', 'ѥ'), ('ѧ', 'ѧ'), ('ѩ', 'ѩ'),
+  ('ѫ', 'ѫ'), ('ѭ', 'ѭ'), ('ѯ', 'ѯ'), ('ѱ', 'ѱ'), ('ѳ', 'ѳ'),
+  ('ѵ', 'ѵ'), ('ѷ', 'ѷ'), ('ѹ', 'ѹ'), ('ѻ', 'ѻ'), ('ѽ', 'ѽ'),
+  ('ѿ', 'ѿ'), ('ҁ', 'ҁ'), ('ҋ', 'ҋ'), ('ҍ', 'ҍ'), ('ҏ', 'ҏ'),
+  ('ґ', 'ґ'), ('ғ', 'ғ'), ('ҕ', 'ҕ'), ('җ', 'җ'), ('ҙ', 'ҙ'),
+  ('қ', 'қ'), ('ҝ', 'ҝ'), ('ҟ', 'ҟ'), ('ҡ', 'ҡ'), ('ң', 'ң'),
+  ('ҥ', 'ҥ'), ('ҧ', 'ҧ'), ('ҩ', 'ҩ'), ('ҫ', 'ҫ'), ('ҭ', 'ҭ'),
+  ('ү', 'ү'), ('ұ', 'ұ'), ('ҳ', 'ҳ'), ('ҵ', 'ҵ'), ('ҷ', 'ҷ'),
+  ('ҹ', 'ҹ'), ('һ', 'һ'), ('ҽ', 'ҽ'), ('ҿ', 'ҿ'), ('ӂ', 'ӂ'),
+  ('ӄ', 'ӄ'), ('ӆ', 'ӆ'), ('ӈ', 'ӈ'), ('ӊ', 'ӊ'), ('ӌ', 'ӌ'),
+  ('ӎ', 'ӏ'), ('ӑ', 'ӑ'), ('ӓ', 'ӓ'), ('ӕ', 'ӕ'), ('ӗ', 'ӗ'),
+  ('ә', 'ә'), ('ӛ', 'ӛ'), ('ӝ', 'ӝ'), ('ӟ', 'ӟ'), ('ӡ', 'ӡ'),
+  ('ӣ', 'ӣ'), ('ӥ', 'ӥ'), ('ӧ', 'ӧ'), ('ө', 'ө'), ('ӫ', 'ӫ'),
+  ('ӭ', 'ӭ'), ('ӯ', 'ӯ'), ('ӱ', 'ӱ'), ('ӳ', 'ӳ'), ('ӵ', 'ӵ'),
+  ('ӷ', 'ӷ'), ('ӹ', 'ӹ'), ('ӻ', 'ӻ'), ('ӽ', 'ӽ'), ('ӿ', 'ӿ'),
+  ('ԁ', 'ԁ'), ('ԃ', 'ԃ'), ('ԅ', 'ԅ'), ('ԇ', 'ԇ'), ('ԉ', 'ԉ'),
+  ('ԋ', 'ԋ'), ('ԍ', 'ԍ'), ('ԏ', 'ԏ'), ('ԑ', 'ԑ'), ('ԓ', 'ԓ'),
+  ('ԕ', 'ԕ'), ('ԗ', 'ԗ'), ('ԙ', 'ԙ'), ('ԛ', 'ԛ'), ('ԝ', 'ԝ'),
+  ('ԟ', 'ԟ'), ('ԡ', 'ԡ'), ('ԣ', 'ԣ'), ('ԥ', 'ԥ'), ('ԧ', 'ԧ'),
+  ('ԩ', 'ԩ'), ('ԫ', 'ԫ'), ('ԭ', 'ԭ'), ('ԯ', 'ԯ'), ('ա', 'և'),
+  ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᵹ', 'ᵹ'), ('ᵽ', 'ᵽ'),
+  ('ḁ', 'ḁ'), ('ḃ', 'ḃ'), ('ḅ', 'ḅ'), ('ḇ', 'ḇ'),
+  ('ḉ', 'ḉ'), ('ḋ', 'ḋ'), ('ḍ', 'ḍ'), ('ḏ', 'ḏ'),
+  ('ḑ', 'ḑ'), ('ḓ', 'ḓ'), ('ḕ', 'ḕ'), ('ḗ', 'ḗ'),
+  ('ḙ', 'ḙ'), ('ḛ', 'ḛ'), ('ḝ', 'ḝ'), ('ḟ', 'ḟ'),
+  ('ḡ', 'ḡ'), ('ḣ', 'ḣ'), ('ḥ', 'ḥ'), ('ḧ', 'ḧ'),
+  ('ḩ', 'ḩ'), ('ḫ', 'ḫ'), ('ḭ', 'ḭ'), ('ḯ', 'ḯ'),
+  ('ḱ', 'ḱ'), ('ḳ', 'ḳ'), ('ḵ', 'ḵ'), ('ḷ', 'ḷ'),
+  ('ḹ', 'ḹ'), ('ḻ', 'ḻ'), ('ḽ', 'ḽ'), ('ḿ', 'ḿ'),
+  ('ṁ', 'ṁ'), ('ṃ', 'ṃ'), ('ṅ', 'ṅ'), ('ṇ', 'ṇ'),
+  ('ṉ', 'ṉ'), ('ṋ', 'ṋ'), ('ṍ', 'ṍ'), ('ṏ', 'ṏ'),
+  ('ṑ', 'ṑ'), ('ṓ', 'ṓ'), ('ṕ', 'ṕ'), ('ṗ', 'ṗ'),
+  ('ṙ', 'ṙ'), ('ṛ', 'ṛ'), ('ṝ', 'ṝ'), ('ṟ', 'ṟ'),
+  ('ṡ', 'ṡ'), ('ṣ', 'ṣ'), ('ṥ', 'ṥ'), ('ṧ', 'ṧ'),
+  ('ṩ', 'ṩ'), ('ṫ', 'ṫ'), ('ṭ', 'ṭ'), ('ṯ', 'ṯ'),
+  ('ṱ', 'ṱ'), ('ṳ', 'ṳ'), ('ṵ', 'ṵ'), ('ṷ', 'ṷ'),
+  ('ṹ', 'ṹ'), ('ṻ', 'ṻ'), ('ṽ', 'ṽ'), ('ṿ', 'ṿ'),
+  ('ẁ', 'ẁ'), ('ẃ', 'ẃ'), ('ẅ', 'ẅ'), ('ẇ', 'ẇ'),
+  ('ẉ', 'ẉ'), ('ẋ', 'ẋ'), ('ẍ', 'ẍ'), ('ẏ', 'ẏ'),
+  ('ẑ', 'ẑ'), ('ẓ', 'ẓ'), ('ẕ', 'ẛ'), ('ạ', 'ạ'),
+  ('ả', 'ả'), ('ấ', 'ấ'), ('ầ', 'ầ'), ('ẩ', 'ẩ'),
+  ('ẫ', 'ẫ'), ('ậ', 'ậ'), ('ắ', 'ắ'), ('ằ', 'ằ'),
+  ('ẳ', 'ẳ'), ('ẵ', 'ẵ'), ('ặ', 'ặ'), ('ẹ', 'ẹ'),
+  ('ẻ', 'ẻ'), ('ẽ', 'ẽ'), ('ế', 'ế'), ('ề', 'ề'),
+  ('ể', 'ể'), ('ễ', 'ễ'), ('ệ', 'ệ'), ('ỉ', 'ỉ'),
+  ('ị', 'ị'), ('ọ', 'ọ'), ('ỏ', 'ỏ'), ('ố', 'ố'),
+  ('ồ', 'ồ'), ('ổ', 'ổ'), ('ỗ', 'ỗ'), ('ộ', 'ộ'),
+  ('ớ', 'ớ'), ('ờ', 'ờ'), ('ở', 'ở'), ('ỡ', 'ỡ'),
+  ('ợ', 'ợ'), ('ụ', 'ụ'), ('ủ', 'ủ'), ('ứ', 'ứ'),
+  ('ừ', 'ừ'), ('ử', 'ử'), ('ữ', 'ữ'), ('ự', 'ự'),
+  ('ỳ', 'ỳ'), ('ỵ', 'ỵ'), ('ỷ', 'ỷ'), ('ỹ', 'ỹ'),
+  ('ỻ', 'ỻ'), ('ỽ', 'ỽ'), ('ỿ', 'ἇ'), ('ἐ', 'ἕ'),
+  ('ἠ', 'ἧ'), ('ἰ', 'ἷ'), ('ὀ', 'ὅ'), ('ὐ', 'ὗ'),
+  ('ὠ', 'ὧ'), ('ὰ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾷ'),
+  ('ᾼ', 'ᾼ'), ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῇ'),
+  ('ῌ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'ῗ'), ('ῠ', 'ῧ'),
+  ('ῲ', 'ῴ'), ('ῶ', 'ῷ'), ('ῼ', 'ῼ'), ('ⅎ', 'ⅎ'),
+  ('ⅰ', 'ⅿ'), ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'), ('ⰰ', 'ⱞ'),
+  ('ⱡ', 'ⱡ'), ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), ('ⱪ', 'ⱪ'),
+  ('ⱬ', 'ⱬ'), ('ⱳ', 'ⱳ'), ('ⱶ', 'ⱶ'), ('ⲁ', 'ⲁ'),
+  ('ⲃ', 'ⲃ'), ('ⲅ', 'ⲅ'), ('ⲇ', 'ⲇ'), ('ⲉ', 'ⲉ'),
+  ('ⲋ', 'ⲋ'), ('ⲍ', 'ⲍ'), ('ⲏ', 'ⲏ'), ('ⲑ', 'ⲑ'),
+  ('ⲓ', 'ⲓ'), ('ⲕ', 'ⲕ'), ('ⲗ', 'ⲗ'), ('ⲙ', 'ⲙ'),
+  ('ⲛ', 'ⲛ'), ('ⲝ', 'ⲝ'), ('ⲟ', 'ⲟ'), ('ⲡ', 'ⲡ'),
+  ('ⲣ', 'ⲣ'), ('ⲥ', 'ⲥ'), ('ⲧ', 'ⲧ'), ('ⲩ', 'ⲩ'),
+  ('ⲫ', 'ⲫ'), ('ⲭ', 'ⲭ'), ('ⲯ', 'ⲯ'), ('ⲱ', 'ⲱ'),
+  ('ⲳ', 'ⲳ'), ('ⲵ', 'ⲵ'), ('ⲷ', 'ⲷ'), ('ⲹ', 'ⲹ'),
+  ('ⲻ', 'ⲻ'), ('ⲽ', 'ⲽ'), ('ⲿ', 'ⲿ'), ('ⳁ', 'ⳁ'),
+  ('ⳃ', 'ⳃ'), ('ⳅ', 'ⳅ'), ('ⳇ', 'ⳇ'), ('ⳉ', 'ⳉ'),
+  ('ⳋ', 'ⳋ'), ('ⳍ', 'ⳍ'), ('ⳏ', 'ⳏ'), ('ⳑ', 'ⳑ'),
+  ('ⳓ', 'ⳓ'), ('ⳕ', 'ⳕ'), ('ⳗ', 'ⳗ'), ('ⳙ', 'ⳙ'),
+  ('ⳛ', 'ⳛ'), ('ⳝ', 'ⳝ'), ('ⳟ', 'ⳟ'), ('ⳡ', 'ⳡ'),
+  ('ⳣ', 'ⳣ'), ('ⳬ', 'ⳬ'), ('ⳮ', 'ⳮ'), ('ⳳ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ꙁ', 'ꙁ'),
+  ('ꙃ', 'ꙃ'), ('ꙅ', 'ꙅ'), ('ꙇ', 'ꙇ'), ('ꙉ', 'ꙉ'),
+  ('ꙋ', 'ꙋ'), ('ꙍ', 'ꙍ'), ('ꙏ', 'ꙏ'), ('ꙑ', 'ꙑ'),
+  ('ꙓ', 'ꙓ'), ('ꙕ', 'ꙕ'), ('ꙗ', 'ꙗ'), ('ꙙ', 'ꙙ'),
+  ('ꙛ', 'ꙛ'), ('ꙝ', 'ꙝ'), ('ꙟ', 'ꙟ'), ('ꙡ', 'ꙡ'),
+  ('ꙣ', 'ꙣ'), ('ꙥ', 'ꙥ'), ('ꙧ', 'ꙧ'), ('ꙩ', 'ꙩ'),
+  ('ꙫ', 'ꙫ'), ('ꙭ', 'ꙭ'), ('ꚁ', 'ꚁ'), ('ꚃ', 'ꚃ'),
+  ('ꚅ', 'ꚅ'), ('ꚇ', 'ꚇ'), ('ꚉ', 'ꚉ'), ('ꚋ', 'ꚋ'),
+  ('ꚍ', 'ꚍ'), ('ꚏ', 'ꚏ'), ('ꚑ', 'ꚑ'), ('ꚓ', 'ꚓ'),
+  ('ꚕ', 'ꚕ'), ('ꚗ', 'ꚗ'), ('ꚙ', 'ꚙ'), ('ꚛ', 'ꚛ'),
+  ('ꜣ', 'ꜣ'), ('ꜥ', 'ꜥ'), ('ꜧ', 'ꜧ'), ('ꜩ', 'ꜩ'),
+  ('ꜫ', 'ꜫ'), ('ꜭ', 'ꜭ'), ('ꜯ', 'ꜯ'), ('ꜳ', 'ꜳ'),
+  ('ꜵ', 'ꜵ'), ('ꜷ', 'ꜷ'), ('ꜹ', 'ꜹ'), ('ꜻ', 'ꜻ'),
+  ('ꜽ', 'ꜽ'), ('ꜿ', 'ꜿ'), ('ꝁ', 'ꝁ'), ('ꝃ', 'ꝃ'),
+  ('ꝅ', 'ꝅ'), ('ꝇ', 'ꝇ'), ('ꝉ', 'ꝉ'), ('ꝋ', 'ꝋ'),
+  ('ꝍ', 'ꝍ'), ('ꝏ', 'ꝏ'), ('ꝑ', 'ꝑ'), ('ꝓ', 'ꝓ'),
+  ('ꝕ', 'ꝕ'), ('ꝗ', 'ꝗ'), ('ꝙ', 'ꝙ'), ('ꝛ', 'ꝛ'),
+  ('ꝝ', 'ꝝ'), ('ꝟ', 'ꝟ'), ('ꝡ', 'ꝡ'), ('ꝣ', 'ꝣ'),
+  ('ꝥ', 'ꝥ'), ('ꝧ', 'ꝧ'), ('ꝩ', 'ꝩ'), ('ꝫ', 'ꝫ'),
+  ('ꝭ', 'ꝭ'), ('ꝯ', 'ꝯ'), ('ꝺ', 'ꝺ'), ('ꝼ', 'ꝼ'),
+  ('ꝿ', 'ꝿ'), ('ꞁ', 'ꞁ'), ('ꞃ', 'ꞃ'), ('ꞅ', 'ꞅ'),
+  ('ꞇ', 'ꞇ'), ('ꞌ', 'ꞌ'), ('ꞑ', 'ꞑ'), ('ꞓ', 'ꞓ'),
+  ('ꞗ', 'ꞗ'), ('ꞙ', 'ꞙ'), ('ꞛ', 'ꞛ'), ('ꞝ', 'ꞝ'),
+  ('ꞟ', 'ꞟ'), ('ꞡ', 'ꞡ'), ('ꞣ', 'ꞣ'), ('ꞥ', 'ꞥ'),
+  ('ꞧ', 'ꞧ'), ('ꞩ', 'ꞩ'), ('ꞵ', 'ꞵ'), ('ꞷ', 'ꞷ'),
+  ('ꭓ', 'ꭓ'), ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'),
+  ('ａ', 'ｚ'), ('𐐨', '𐑏'), ('𐓘', '𐓻'), ('𐳀', '𐳲'),
+  ('𑣀', '𑣟'), ('𞤢', '𞥃'),
+];
+
+pub const DASH: &'static [(char, char)] = &[
+  ('-', '-'), ('֊', '֊'), ('־', '־'), ('᐀', '᐀'), ('᠆', '᠆'),
+  ('‐', '―'), ('⁓', '⁓'), ('⁻', '⁻'), ('₋', '₋'),
+  ('−', '−'), ('⸗', '⸗'), ('⸚', '⸚'), ('⸺', '⸻'),
+  ('⹀', '⹀'), ('〜', '〜'), ('〰', '〰'), ('゠', '゠'),
+  ('︱', '︲'), ('﹘', '﹘'), ('﹣', '﹣'), ('－', '－'),
+];
+
+pub const DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
+  ('\u{ad}', '\u{ad}'), ('͏', '͏'), ('\u{61c}', '\u{61c}'), ('ᅟ', 'ᅠ'),
+  ('឴', '឵'), ('᠋', '\u{180e}'), ('\u{200b}', '\u{200f}'),
+  ('\u{202a}', '\u{202e}'), ('\u{2060}', '\u{206f}'), ('ㅤ', 'ㅤ'),
+  ('︀', '️'), ('\u{feff}', '\u{feff}'), ('ﾠ', 'ﾠ'),
+  ('\u{fff0}', '\u{fff8}'), ('\u{1bca0}', '\u{1bca3}'),
+  ('\u{1d173}', '\u{1d17a}'), ('\u{e0000}', '\u{e0fff}'),
+];
+
+pub const DEPRECATED: &'static [(char, char)] = &[
+  ('ŉ', 'ŉ'), ('ٳ', 'ٳ'), ('ཷ', 'ཷ'), ('ཹ', 'ཹ'), ('ឣ', 'ឤ'),
+  ('\u{206a}', '\u{206f}'), ('〈', '〉'), ('\u{e0001}', '\u{e0001}'),
+];
+
+pub const DIACRITIC: &'static [(char, char)] = &[
+  ('^', '^'), ('`', '`'), ('¨', '¨'), ('¯', '¯'), ('´', '´'),
+  ('·', '¸'), ('ʰ', '͎'), ('͐', '͗'), ('͝', '͢'), ('ʹ', '͵'),
+  ('ͺ', 'ͺ'), ('΄', '΅'), ('҃', '҇'), ('ՙ', 'ՙ'), ('֑', '֡'),
+  ('֣', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'), ('ׄ', 'ׄ'), ('ً', 'ْ'),
+  ('ٗ', '٘'), ('۟', '۠'), ('ۥ', 'ۦ'), ('۪', '۬'), ('ܰ', '݊'),
+  ('ަ', 'ް'), ('߫', 'ߵ'), ('࠘', '࠙'), ('ࣣ', 'ࣾ'), ('़', '़'),
+  ('्', '्'), ('॑', '॔'), ('ॱ', 'ॱ'), ('়', '়'),
+  ('্', '্'), ('਼', '਼'), ('੍', '੍'), ('઼', '઼'),
+  ('્', '્'), ('૽', '૿'), ('଼', '଼'), ('୍', '୍'),
+  ('்', '்'), ('్', '్'), ('಼', '಼'), ('್', '್'),
+  ('഻', '഼'), ('്', '്'), ('්', '්'), ('็', '์'),
+  ('๎', '๎'), ('່', '໌'), ('༘', '༙'), ('༵', '༵'),
+  ('༷', '༷'), ('༹', '༹'), ('༾', '༿'), ('ྂ', '྄'),
+  ('྆', '྇'), ('࿆', '࿆'), ('့', '့'), ('္', '်'),
+  ('ႇ', 'ႍ'), ('ႏ', 'ႏ'), ('ႚ', 'ႛ'), ('៉', '៓'),
+  ('៝', '៝'), ('᤹', '᤻'), ('᩵', '᩼'), ('᩿', '᩿'),
+  ('᪰', '᪽'), ('᬴', '᬴'), ('᭄', '᭄'), ('᭫', '᭳'),
+  ('᮪', '᮫'), ('ᰶ', '᰷'), ('ᱸ', 'ᱽ'), ('᳐', '᳨'),
+  ('᳭', '᳭'), ('᳴', '᳴'), ('᳷', '᳹'), ('ᴬ', 'ᵪ'),
+  ('᷄', '᷏'), ('᷵', '᷹'), ('᷽', '᷿'), ('᾽', '᾽'),
+  ('᾿', '῁'), ('῍', '῏'), ('῝', '῟'), ('῭', '`'),
+  ('´', '῾'), ('⳯', '⳱'), ('ⸯ', 'ⸯ'), ('〪', '〯'),
+  ('゙', '゜'), ('ー', 'ー'), ('꙯', '꙯'), ('꙼', '꙽'),
+  ('ꙿ', 'ꙿ'), ('ꚜ', 'ꚝ'), ('꛰', '꛱'), ('ꜗ', '꜡'),
+  ('ꞈ', 'ꞈ'), ('ꟸ', 'ꟹ'), ('꣄', '꣄'), ('꣠', '꣱'),
+  ('꤫', '꤮'), ('꥓', '꥓'), ('꦳', '꦳'), ('꧀', '꧀'),
+  ('ꧥ', 'ꧥ'), ('ꩻ', 'ꩽ'), ('꪿', 'ꫂ'), ('꫶', '꫶'),
+  ('꭛', 'ꭟ'), ('꯬', '꯭'), ('ﬞ', 'ﬞ'), ('︠', '︯'),
+  ('＾', '＾'), ('｀', '｀'), ('ｰ', 'ｰ'), ('ﾞ', 'ﾟ'),
+  ('￣', '￣'), ('𐋠', '𐋠'), ('𐫥', '𐫦'), ('𑂹', '𑂺'),
+  ('𑄳', '𑄴'), ('𑅳', '𑅳'), ('𑇀', '𑇀'), ('𑇊', '𑇌'),
+  ('𑈵', '𑈶'), ('𑋩', '𑋪'), ('𑌼', '𑌼'), ('𑍍', '𑍍'),
+  ('𑍦', '𑍬'), ('𑍰', '𑍴'), ('𑑂', '𑑂'), ('𑑆', '𑑆'),
+  ('𑓂', '𑓃'), ('𑖿', '𑗀'), ('𑘿', '𑘿'), ('𑚶', '𑚷'),
+  ('𑜫', '𑜫'), ('𑨴', '𑨴'), ('𑩇', '𑩇'), ('𑪙', '𑪙'),
+  ('𑰿', '𑰿'), ('𑵂', '𑵂'), ('𑵄', '𑵅'), ('𖫰', '𖫴'),
+  ('𖾏', '𖾟'), ('𝅧', '𝅩'), ('𝅭', '𝅲'), ('𝅻', '𝆂'),
+  ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𞣐', '𞣖'), ('𞥄', '𞥆'),
+  ('𞥈', '𞥊'),
+];
+
+pub const EXTENDER: &'static [(char, char)] = &[
+  ('·', '·'), ('ː', 'ˑ'), ('ـ', 'ـ'), ('ߺ', 'ߺ'), ('ๆ', 'ๆ'),
+  ('ໆ', 'ໆ'), ('᠊', '᠊'), ('ᡃ', 'ᡃ'), ('ᪧ', 'ᪧ'),
+  ('ᰶ', 'ᰶ'), ('ᱻ', 'ᱻ'), ('々', '々'), ('〱', '〵'),
+  ('ゝ', 'ゞ'), ('ー', 'ヾ'), ('ꀕ', 'ꀕ'), ('ꘌ', 'ꘌ'),
+  ('ꧏ', 'ꧏ'), ('ꧦ', 'ꧦ'), ('ꩰ', 'ꩰ'), ('ꫝ', 'ꫝ'),
+  ('ꫳ', 'ꫴ'), ('ｰ', 'ｰ'), ('𑍝', '𑍝'), ('𑗆', '𑗈'),
+  ('𑪘', '𑪘'), ('𖭂', '𖭃'), ('𖿠', '𖿡'), ('𞥄', '𞥆'),
+];
+
+pub const GRAPHEME_BASE: &'static [(char, char)] = &[
+  (' ', '~'), ('\u{a0}', '¬'), ('®', '˿'), ('Ͱ', 'ͷ'), ('ͺ', 'Ϳ'),
+  ('΄', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', '҂'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ՙ', '՟'), ('ա', 'և'), ('։', '֊'), ('֍', '֏'),
+  ('־', '־'), ('׀', '׀'), ('׃', '׃'), ('׆', '׆'), ('א', 'ת'),
+  ('װ', '״'), ('؆', '؏'), ('؛', '؛'), ('؞', 'ي'), ('٠', 'ٯ'),
+  ('ٱ', 'ە'), ('۞', '۞'), ('ۥ', 'ۦ'), ('۩', '۩'), ('ۮ', '܍'),
+  ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'), ('߀', 'ߪ'),
+  ('ߴ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'), ('ࠤ', 'ࠤ'),
+  ('ࠨ', 'ࠨ'), ('࠰', '࠾'), ('ࡀ', 'ࡘ'), ('࡞', '࡞'),
+  ('ࡠ', 'ࡪ'), ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ः', 'ह'),
+  ('ऻ', 'ऻ'), ('ऽ', 'ी'), ('ॉ', 'ौ'), ('ॎ', 'ॐ'),
+  ('क़', 'ॡ'), ('।', 'ঀ'), ('ং', 'ঃ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ঽ'), ('ি', 'ী'), ('ে', 'ৈ'),
+  ('ো', 'ৌ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'), ('য়', 'ৡ'),
+  ('০', '৽'), ('ਃ', 'ਃ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'),
+  ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'),
+  ('ਸ', 'ਹ'), ('ਾ', 'ੀ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('੦', '੯'), ('ੲ', 'ੴ'), ('ઃ', 'ઃ'), ('અ', 'ઍ'),
+  ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'), ('લ', 'ળ'),
+  ('વ', 'હ'), ('ઽ', 'ી'), ('ૉ', 'ૉ'), ('ો', 'ૌ'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૡ'), ('૦', '૱'), ('ૹ', 'ૹ'),
+  ('ଂ', 'ଃ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'), ('ଓ', 'ନ'),
+  ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'), ('ଽ', 'ଽ'),
+  ('ୀ', 'ୀ'), ('େ', 'ୈ'), ('ୋ', 'ୌ'), ('ଡ଼', 'ଢ଼'),
+  ('ୟ', 'ୡ'), ('୦', '୷'), ('ஃ', 'ஃ'), ('அ', 'ஊ'),
+  ('எ', 'ஐ'), ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'),
+  ('ஞ', 'ட'), ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'),
+  ('ி', 'ி'), ('ு', 'ூ'), ('ெ', 'ை'), ('ொ', 'ௌ'),
+  ('ௐ', 'ௐ'), ('௦', '௺'), ('ఁ', 'ః'), ('అ', 'ఌ'),
+  ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ఽ'),
+  ('ు', 'ౄ'), ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('౦', '౯'),
+  ('౸', 'ಀ'), ('ಂ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'),
+  ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('ಽ', 'ಾ'),
+  ('ೀ', 'ು'), ('ೃ', 'ೄ'), ('ೇ', 'ೈ'), ('ೊ', 'ೋ'),
+  ('ೞ', 'ೞ'), ('ೠ', 'ೡ'), ('೦', '೯'), ('ೱ', 'ೲ'),
+  ('ം', 'ഃ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'),
+  ('ഽ', 'ഽ'), ('ി', 'ീ'), ('െ', 'ൈ'), ('ൊ', 'ൌ'),
+  ('ൎ', '൏'), ('ൔ', 'ൖ'), ('൘', 'ൡ'), ('൦', 'ൿ'),
+  ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('ැ', 'ෑ'), ('ෘ', 'ෞ'),
+  ('෦', '෯'), ('ෲ', '෴'), ('ก', 'ะ'), ('า', 'ำ'),
+  ('฿', 'ๆ'), ('๏', '๛'), ('ກ', 'ຂ'), ('ຄ', 'ຄ'),
+  ('ງ', 'ຈ'), ('ຊ', 'ຊ'), ('ຍ', 'ຍ'), ('ດ', 'ທ'),
+  ('ນ', 'ຟ'), ('ມ', 'ຣ'), ('ລ', 'ລ'), ('ວ', 'ວ'),
+  ('ສ', 'ຫ'), ('ອ', 'ະ'), ('າ', 'ຳ'), ('ຽ', 'ຽ'),
+  ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('໐', '໙'), ('ໜ', 'ໟ'),
+  ('ༀ', '༗'), ('༚', '༴'), ('༶', '༶'), ('༸', '༸'),
+  ('༺', 'ཇ'), ('ཉ', 'ཬ'), ('ཿ', 'ཿ'), ('྅', '྅'),
+  ('ྈ', 'ྌ'), ('྾', '࿅'), ('࿇', '࿌'), ('࿎', '࿚'),
+  ('က', 'ာ'), ('ေ', 'ေ'), ('း', 'း'), ('ျ', 'ြ'),
+  ('ဿ', 'ၗ'), ('ၚ', 'ၝ'), ('ၡ', 'ၰ'), ('ၵ', 'ႁ'),
+  ('ႃ', 'ႄ'), ('ႇ', 'ႌ'), ('ႎ', 'ႜ'), ('႞', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ቈ'), ('ቊ', 'ቍ'),
+  ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'), ('በ', 'ኈ'),
+  ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'), ('ኸ', 'ኾ'),
+  ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'), ('ዘ', 'ጐ'),
+  ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('፠', '፼'), ('ᎀ', '᎙'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('᐀', '᚜'), ('ᚠ', 'ᛸ'),
+  ('ᜀ', 'ᜌ'), ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('᜵', '᜶'),
+  ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ក', 'ឳ'),
+  ('ា', 'ា'), ('ើ', 'ៅ'), ('ះ', 'ៈ'), ('។', 'ៜ'),
+  ('០', '៩'), ('៰', '៹'), ('᠀', '᠊'), ('᠐', '᠙'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢄ'), ('ᢇ', 'ᢨ'), ('ᢪ', 'ᢪ'),
+  ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᤣ', 'ᤦ'), ('ᤩ', 'ᤫ'),
+  ('ᤰ', 'ᤱ'), ('ᤳ', 'ᤸ'), ('᥀', '᥀'), ('᥄', 'ᥭ'),
+  ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'),
+  ('᧞', 'ᨖ'), ('ᨙ', 'ᨚ'), ('᨞', 'ᩕ'), ('ᩗ', 'ᩗ'),
+  ('ᩡ', 'ᩡ'), ('ᩣ', 'ᩤ'), ('ᩭ', 'ᩲ'), ('᪀', '᪉'),
+  ('᪐', '᪙'), ('᪠', '᪭'), ('ᬄ', 'ᬳ'), ('ᬵ', 'ᬵ'),
+  ('ᬻ', 'ᬻ'), ('ᬽ', 'ᭁ'), ('ᭃ', 'ᭋ'), ('᭐', '᭪'),
+  ('᭴', '᭼'), ('ᮂ', 'ᮡ'), ('ᮦ', 'ᮧ'), ('᮪', '᮪'),
+  ('ᮮ', 'ᯥ'), ('ᯧ', 'ᯧ'), ('ᯪ', 'ᯬ'), ('ᯮ', 'ᯮ'),
+  ('᯲', '᯳'), ('᯼', 'ᰫ'), ('ᰴ', 'ᰵ'), ('᰻', '᱉'),
+  ('ᱍ', 'ᲈ'), ('᳀', '᳇'), ('᳓', '᳓'), ('᳡', '᳡'),
+  ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳳ'), ('ᳵ', '᳷'), ('ᴀ', 'ᶿ'),
+  ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ῄ'), ('ῆ', 'ΐ'),
+  ('ῖ', 'Ί'), ('῝', '`'), ('ῲ', 'ῴ'), ('ῶ', '῾'),
+  ('\u{2000}', '\u{200a}'), ('‐', '‧'), ('\u{202f}', '\u{205f}'),
+  ('⁰', 'ⁱ'), ('⁴', '₎'), ('ₐ', 'ₜ'), ('₠', '₿'),
+  ('℀', '↋'), ('←', '␦'), ('⑀', '⑊'), ('①', '⭳'),
+  ('⭶', '⮕'), ('⮘', '⮹'), ('⮽', '⯈'), ('⯊', '⯒'),
+  ('⯬', '⯯'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳮ'),
+  ('Ⳳ', 'ⳳ'), ('⳹', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('⸀', '⹉'),
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), ('⿰', '⿻'),
+  ('\u{3000}', '〩'), ('〰', '〿'), ('ぁ', 'ゖ'), ('゛', 'ヿ'),
+  ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('㆐', 'ㆺ'), ('㇀', '㇣'),
+  ('ㇰ', '㈞'), ('㈠', '㋾'), ('㌀', '䶵'), ('䷀', '鿪'),
+  ('ꀀ', 'ꒌ'), ('꒐', '꓆'), ('ꓐ', 'ꘫ'), ('Ꙁ', 'ꙮ'),
+  ('꙳', '꙳'), ('꙾', 'ꚝ'), ('ꚠ', 'ꛯ'), ('꛲', '꛷'),
+  ('꜀', 'Ɪ'), ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠁ'), ('ꠃ', 'ꠅ'),
+  ('ꠇ', 'ꠊ'), ('ꠌ', 'ꠤ'), ('ꠧ', '꠫'), ('꠰', '꠹'),
+  ('ꡀ', '꡷'), ('ꢀ', 'ꣃ'), ('꣎', '꣙'), ('ꣲ', 'ꣽ'),
+  ('꤀', 'ꤥ'), ('꤮', 'ꥆ'), ('ꥒ', '꥓'), ('꥟', 'ꥼ'),
+  ('ꦃ', 'ꦲ'), ('ꦴ', 'ꦵ'), ('ꦺ', 'ꦻ'), ('ꦽ', '꧍'),
+  ('ꧏ', '꧙'), ('꧞', 'ꧤ'), ('ꧦ', 'ꧾ'), ('ꨀ', 'ꨨ'),
+  ('ꨯ', 'ꨰ'), ('ꨳ', 'ꨴ'), ('ꩀ', 'ꩂ'), ('ꩄ', 'ꩋ'),
+  ('ꩍ', 'ꩍ'), ('꩐', '꩙'), ('꩜', 'ꩻ'), ('ꩽ', 'ꪯ'),
+  ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'),
+  ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫫ'), ('ꫮ', 'ꫵ'), ('ꬁ', 'ꬆ'),
+  ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'),
+  ('ꬰ', 'ꭥ'), ('ꭰ', 'ꯤ'), ('ꯦ', 'ꯧ'), ('ꯩ', '꯬'),
+  ('꯰', '꯹'), ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'),
+  ('豈', '舘'), ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'),
+  ('יִ', 'יִ'), ('ײַ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', '﯁'), ('ﯓ', '﴿'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', '﷽'), ('︐', '︙'),
+  ('︰', '﹒'), ('﹔', '﹦'), ('﹨', '﹫'), ('ﹰ', 'ﹴ'),
+  ('ﹶ', 'ﻼ'), ('！', 'ﾝ'), ('ﾠ', 'ﾾ'), ('ￂ', 'ￇ'),
+  ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('￠', '￦'),
+  ('￨', '￮'), ('', '�'), ('𐀀', '𐀋'), ('𐀍', '𐀦'),
+  ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'), ('𐁐', '𐁝'),
+  ('𐂀', '𐃺'), ('𐄀', '𐄂'), ('𐄇', '𐄳'), ('𐄷', '𐆎'),
+  ('𐆐', '𐆛'), ('𐆠', '𐆠'), ('𐇐', '𐇼'), ('𐊀', '𐊜'),
+  ('𐊠', '𐋐'), ('𐋡', '𐋻'), ('𐌀', '𐌣'), ('𐌭', '𐍊'),
+  ('𐍐', '𐍵'), ('𐎀', '𐎝'), ('𐎟', '𐏃'), ('𐏈', '𐏕'),
+  ('𐐀', '𐒝'), ('𐒠', '𐒩'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐕯', '𐕯'), ('𐘀', '𐜶'),
+  ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'),
+  ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'),
+  ('𐡗', '𐢞'), ('𐢧', '𐢯'), ('𐣠', '𐣲'), ('𐣴', '𐣵'),
+  ('𐣻', '𐤛'), ('𐤟', '𐤹'), ('𐤿', '𐤿'), ('𐦀', '𐦷'),
+  ('𐦼', '𐧏'), ('𐧒', '𐨀'), ('𐨐', '𐨓'), ('𐨕', '𐨗'),
+  ('𐨙', '𐨳'), ('𐩀', '𐩇'), ('𐩐', '𐩘'), ('𐩠', '𐪟'),
+  ('𐫀', '𐫤'), ('𐫫', '𐫶'), ('𐬀', '𐬵'), ('𐬹', '𐭕'),
+  ('𐭘', '𐭲'), ('𐭸', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯'),
+  ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿'),
+  ('𐹠', '𐹾'), ('𑀀', '𑀀'), ('𑀂', '𑀷'), ('𑁇', '𑁍'),
+  ('𑁒', '𑁯'), ('𑂂', '𑂲'), ('𑂷', '𑂸'), ('𑂻', '𑂼'),
+  ('𑂾', '𑃁'), ('𑃐', '𑃨'), ('𑃰', '𑃹'), ('𑄃', '𑄦'),
+  ('𑄬', '𑄬'), ('𑄶', '𑅃'), ('𑅐', '𑅲'), ('𑅴', '𑅶'),
+  ('𑆂', '𑆵'), ('𑆿', '𑇉'), ('𑇍', '𑇍'), ('𑇐', '𑇟'),
+  ('𑇡', '𑇴'), ('𑈀', '𑈑'), ('𑈓', '𑈮'), ('𑈲', '𑈳'),
+  ('𑈵', '𑈵'), ('𑈸', '𑈽'), ('𑊀', '𑊆'), ('𑊈', '𑊈'),
+  ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊩'), ('𑊰', '𑋞'),
+  ('𑋠', '𑋢'), ('𑋰', '𑋹'), ('𑌂', '𑌃'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌽', '𑌽'), ('𑌿', '𑌿'), ('𑍁', '𑍄'),
+  ('𑍇', '𑍈'), ('𑍋', '𑍍'), ('𑍐', '𑍐'), ('𑍝', '𑍣'),
+  ('𑐀', '𑐷'), ('𑑀', '𑑁'), ('𑑅', '𑑅'), ('𑑇', '𑑙'),
+  ('𑑛', '𑑛'), ('𑑝', '𑑝'), ('𑒀', '𑒯'), ('𑒱', '𑒲'),
+  ('𑒹', '𑒹'), ('𑒻', '𑒼'), ('𑒾', '𑒾'), ('𑓁', '𑓁'),
+  ('𑓄', '𑓇'), ('𑓐', '𑓙'), ('𑖀', '𑖮'), ('𑖰', '𑖱'),
+  ('𑖸', '𑖻'), ('𑖾', '𑖾'), ('𑗁', '𑗛'), ('𑘀', '𑘲'),
+  ('𑘻', '𑘼'), ('𑘾', '𑘾'), ('𑙁', '𑙄'), ('𑙐', '𑙙'),
+  ('𑙠', '𑙬'), ('𑚀', '𑚪'), ('𑚬', '𑚬'), ('𑚮', '𑚯'),
+  ('𑚶', '𑚶'), ('𑛀', '𑛉'), ('𑜀', '𑜙'), ('𑜠', '𑜡'),
+  ('𑜦', '𑜦'), ('𑜰', '𑜿'), ('𑢠', '𑣲'), ('𑣿', '𑣿'),
+  ('𑨀', '𑨀'), ('𑨇', '𑨈'), ('𑨋', '𑨲'), ('𑨹', '𑨺'),
+  ('𑨿', '𑩆'), ('𑩐', '𑩐'), ('𑩗', '𑩘'), ('𑩜', '𑪃'),
+  ('𑪆', '𑪉'), ('𑪗', '𑪗'), ('𑪚', '𑪜'), ('𑪞', '𑪢'),
+  ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰯'), ('𑰾', '𑰾'),
+  ('𑱀', '𑱅'), ('𑱐', '𑱬'), ('𑱰', '𑲏'), ('𑲩', '𑲩'),
+  ('𑲱', '𑲱'), ('𑲴', '𑲴'), ('𑴀', '𑴆'), ('𑴈', '𑴉'),
+  ('𑴋', '𑴰'), ('𑵆', '𑵆'), ('𑵐', '𑵙'), ('𒀀', '𒎙'),
+  ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃'), ('𓀀', '𓐮'),
+  ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'),
+  ('𖩮', '𖩯'), ('𖫐', '𖫭'), ('𖫵', '𖫵'), ('𖬀', '𖬯'),
+  ('𖬷', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾓', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𛲜', '𛲜'), ('𛲟', '𛲟'), ('𝀀', '𝃵'),
+  ('𝄀', '𝄦'), ('𝄩', '𝅘𝅥𝅲'), ('𝅦', '𝅦'), ('𝅪', '𝅭'),
+  ('𝆃', '𝆄'), ('𝆌', '𝆩'), ('𝆮', '𝇨'), ('𝈀', '𝉁'),
+  ('𝉅', '𝉅'), ('𝌀', '𝍖'), ('𝍠', '𝍱'), ('𝐀', '𝑔'),
+  ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'),
+  ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝟋'), ('𝟎', '𝧿'),
+  ('𝨷', '𝨺'), ('𝩭', '𝩴'), ('𝩶', '𝪃'), ('𝪅', '𝪋'),
+  ('𞠀', '𞣄'), ('𞣇', '𞣏'), ('𞤀', '𞥃'), ('𞥐', '𞥙'),
+  ('𞥞', '𞥟'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'),
+  ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'),
+  ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'),
+  ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'),
+  ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'),
+  ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'),
+  ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'),
+  ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'),
+  ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𞻰', '𞻱'), ('🀀', '🀫'),
+  ('🀰', '🂓'), ('🂠', '🂮'), ('🂱', '🂿'), ('🃁', '🃏'),
+  ('🃑', '🃵'), ('🄀', '🄌'), ('🄐', '🄮'), ('🄰', '🅫'),
+  ('🅰', '🆬'), ('🇦', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'),
+  ('🉐', '🉑'), ('🉠', '🉥'), ('🌀', '🛔'), ('🛠', '🛬'),
+  ('🛰', '🛸'), ('🜀', '🝳'), ('🞀', '🟔'), ('🠀', '🠋'),
+  ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'),
+  ('🤀', '🤋'), ('🤐', '🤾'), ('🥀', '🥌'), ('🥐', '🥫'),
+  ('🦀', '🦗'), ('🧀', '🧀'), ('🧐', '🧦'), ('𠀀', '𪛖'),
+  ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'),
+  ('丽', '𪘀'),
+];
+
+pub const GRAPHEME_EXTEND: &'static [(char, char)] = &[
+  ('̀', 'ͯ'), ('҃', '҉'), ('֑', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'),
+  ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('ؐ', 'ؚ'), ('ً', 'ٟ'), ('ٰ', 'ٰ'),
+  ('ۖ', 'ۜ'), ('۟', 'ۤ'), ('ۧ', 'ۨ'), ('۪', 'ۭ'), ('ܑ', 'ܑ'),
+  ('ܰ', '݊'), ('ަ', 'ް'), ('߫', '߳'), ('ࠖ', '࠙'), ('ࠛ', 'ࠣ'),
+  ('ࠥ', 'ࠧ'), ('ࠩ', '࠭'), ('࡙', '࡛'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ं'), ('ऺ', 'ऺ'), ('़', '़'), ('ु', 'ै'),
+  ('्', '्'), ('॑', 'ॗ'), ('ॢ', 'ॣ'), ('ঁ', 'ঁ'),
+  ('়', '়'), ('া', 'া'), ('ু', 'ৄ'), ('্', '্'),
+  ('ৗ', 'ৗ'), ('ৢ', 'ৣ'), ('ਁ', 'ਂ'), ('਼', '਼'),
+  ('ੁ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'), ('ੑ', 'ੑ'),
+  ('ੰ', 'ੱ'), ('ੵ', 'ੵ'), ('ઁ', 'ં'), ('઼', '઼'),
+  ('ુ', 'ૅ'), ('ે', 'ૈ'), ('્', '્'), ('ૢ', 'ૣ'),
+  ('ૺ', '૿'), ('ଁ', 'ଁ'), ('଼', '଼'), ('ା', 'ି'),
+  ('ୁ', 'ୄ'), ('୍', '୍'), ('ୖ', 'ୗ'), ('ୢ', 'ୣ'),
+  ('ஂ', 'ஂ'), ('ா', 'ா'), ('ீ', 'ீ'), ('்', '்'),
+  ('ௗ', 'ௗ'), ('ఀ', 'ఀ'), ('ా', 'ీ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౢ', 'ౣ'), ('ಁ', 'ಁ'),
+  ('಼', '಼'), ('ಿ', 'ಿ'), ('ೂ', 'ೂ'), ('ೆ', 'ೆ'),
+  ('ೌ', '್'), ('ೕ', 'ೖ'), ('ೢ', 'ೣ'), ('ഀ', 'ഁ'),
+  ('഻', '഼'), ('ാ', 'ാ'), ('ു', 'ൄ'), ('്', '്'),
+  ('ൗ', 'ൗ'), ('ൢ', 'ൣ'), ('්', '්'), ('ා', 'ා'),
+  ('ි', 'ු'), ('ූ', 'ූ'), ('ෟ', 'ෟ'), ('ั', 'ั'),
+  ('ิ', 'ฺ'), ('็', '๎'), ('ັ', 'ັ'), ('ິ', 'ູ'),
+  ('ົ', 'ຼ'), ('່', 'ໍ'), ('༘', '༙'), ('༵', '༵'),
+  ('༷', '༷'), ('༹', '༹'), ('ཱ', 'ཾ'), ('ྀ', '྄'),
+  ('྆', '྇'), ('ྍ', 'ྗ'), ('ྙ', 'ྼ'), ('࿆', '࿆'),
+  ('ိ', 'ူ'), ('ဲ', '့'), ('္', '်'), ('ွ', 'ှ'),
+  ('ၘ', 'ၙ'), ('ၞ', 'ၠ'), ('ၱ', 'ၴ'), ('ႂ', 'ႂ'),
+  ('ႅ', 'ႆ'), ('ႍ', 'ႍ'), ('ႝ', 'ႝ'), ('፝', '፟'),
+  ('ᜒ', '᜔'), ('ᜲ', '᜴'), ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'),
+  ('឴', '឵'), ('ិ', 'ួ'), ('ំ', 'ំ'), ('៉', '៓'),
+  ('៝', '៝'), ('᠋', '᠍'), ('ᢅ', 'ᢆ'), ('ᢩ', 'ᢩ'),
+  ('ᤠ', 'ᤢ'), ('ᤧ', 'ᤨ'), ('ᤲ', 'ᤲ'), ('᤹', '᤻'),
+  ('ᨗ', 'ᨘ'), ('ᨛ', 'ᨛ'), ('ᩖ', 'ᩖ'), ('ᩘ', 'ᩞ'),
+  ('᩠', '᩠'), ('ᩢ', 'ᩢ'), ('ᩥ', 'ᩬ'), ('ᩳ', '᩼'),
+  ('᩿', '᩿'), ('᪰', '᪾'), ('ᬀ', 'ᬃ'), ('᬴', '᬴'),
+  ('ᬶ', 'ᬺ'), ('ᬼ', 'ᬼ'), ('ᭂ', 'ᭂ'), ('᭫', '᭳'),
+  ('ᮀ', 'ᮁ'), ('ᮢ', 'ᮥ'), ('ᮨ', 'ᮩ'), ('᮫', 'ᮭ'),
+  ('᯦', '᯦'), ('ᯨ', 'ᯩ'), ('ᯭ', 'ᯭ'), ('ᯯ', 'ᯱ'),
+  ('ᰬ', 'ᰳ'), ('ᰶ', '᰷'), ('᳐', '᳒'), ('᳔', '᳠'),
+  ('᳢', '᳨'), ('᳭', '᳭'), ('᳴', '᳴'), ('᳸', '᳹'),
+  ('᷀', '᷹'), ('᷻', '᷿'), ('\u{200c}', '\u{200c}'), ('⃐', '⃰'),
+  ('⳯', '⳱'), ('⵿', '⵿'), ('ⷠ', 'ⷿ'), ('〪', '〯'),
+  ('゙', '゚'), ('꙯', '꙲'), ('ꙴ', '꙽'), ('ꚞ', 'ꚟ'),
+  ('꛰', '꛱'), ('ꠂ', 'ꠂ'), ('꠆', '꠆'), ('ꠋ', 'ꠋ'),
+  ('ꠥ', 'ꠦ'), ('꣄', 'ꣅ'), ('꣠', '꣱'), ('ꤦ', '꤭'),
+  ('ꥇ', 'ꥑ'), ('ꦀ', 'ꦂ'), ('꦳', '꦳'), ('ꦶ', 'ꦹ'),
+  ('ꦼ', 'ꦼ'), ('ꧥ', 'ꧥ'), ('ꨩ', 'ꨮ'), ('ꨱ', 'ꨲ'),
+  ('ꨵ', 'ꨶ'), ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩌ'), ('ꩼ', 'ꩼ'),
+  ('ꪰ', 'ꪰ'), ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', '꪿'),
+  ('꫁', '꫁'), ('ꫬ', 'ꫭ'), ('꫶', '꫶'), ('ꯥ', 'ꯥ'),
+  ('ꯨ', 'ꯨ'), ('꯭', '꯭'), ('ﬞ', 'ﬞ'), ('︀', '️'),
+  ('︠', '︯'), ('ﾞ', 'ﾟ'), ('𐇽', '𐇽'), ('𐋠', '𐋠'),
+  ('𐍶', '𐍺'), ('𐨁', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨏'),
+  ('𐨸', '𐨺'), ('𐨿', '𐨿'), ('𐫥', '𐫦'), ('𑀁', '𑀁'),
+  ('𑀸', '𑁆'), ('𑁿', '𑂁'), ('𑂳', '𑂶'), ('𑂹', '𑂺'),
+  ('𑄀', '𑄂'), ('𑄧', '𑄫'), ('𑄭', '𑄴'), ('𑅳', '𑅳'),
+  ('𑆀', '𑆁'), ('𑆶', '𑆾'), ('𑇊', '𑇌'), ('𑈯', '𑈱'),
+  ('𑈴', '𑈴'), ('𑈶', '𑈷'), ('𑈾', '𑈾'), ('𑋟', '𑋟'),
+  ('𑋣', '𑋪'), ('𑌀', '𑌁'), ('𑌼', '𑌼'), ('𑌾', '𑌾'),
+  ('𑍀', '𑍀'), ('𑍗', '𑍗'), ('𑍦', '𑍬'), ('𑍰', '𑍴'),
+  ('𑐸', '𑐿'), ('𑑂', '𑑄'), ('𑑆', '𑑆'), ('𑒰', '𑒰'),
+  ('𑒳', '𑒸'), ('𑒺', '𑒺'), ('𑒽', '𑒽'), ('𑒿', '𑓀'),
+  ('𑓂', '𑓃'), ('𑖯', '𑖯'), ('𑖲', '𑖵'), ('𑖼', '𑖽'),
+  ('𑖿', '𑗀'), ('𑗜', '𑗝'), ('𑘳', '𑘺'), ('𑘽', '𑘽'),
+  ('𑘿', '𑙀'), ('𑚫', '𑚫'), ('𑚭', '𑚭'), ('𑚰', '𑚵'),
+  ('𑚷', '𑚷'), ('𑜝', '𑜟'), ('𑜢', '𑜥'), ('𑜧', '𑜫'),
+  ('𑨁', '𑨆'), ('𑨉', '𑨊'), ('𑨳', '𑨸'), ('𑨻', '𑨾'),
+  ('𑩇', '𑩇'), ('𑩑', '𑩖'), ('𑩙', '𑩛'), ('𑪊', '𑪖'),
+  ('𑪘', '𑪙'), ('𑰰', '𑰶'), ('𑰸', '𑰽'), ('𑰿', '𑰿'),
+  ('𑲒', '𑲧'), ('𑲪', '𑲰'), ('𑲲', '𑲳'), ('𑲵', '𑲶'),
+  ('𑴱', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵅'),
+  ('𑵇', '𑵇'), ('𖫰', '𖫴'), ('𖬰', '𖬶'), ('𖾏', '𖾒'),
+  ('𛲝', '𛲞'), ('𝅥', '𝅥'), ('𝅧', '𝅩'), ('𝅮', '𝅲'),
+  ('𝅻', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𝉂', '𝉄'),
+  ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'),
+  ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'),
+  ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞣐', '𞣖'),
+  ('𞥄', '𞥊'), ('\u{e0020}', '\u{e007f}'), ('󠄀', '󠇯'),
+];
+
+pub const GRAPHEME_LINK: &'static [(char, char)] = &[
+  ('्', '्'), ('্', '্'), ('੍', '੍'), ('્', '્'),
+  ('୍', '୍'), ('்', '்'), ('్', '్'), ('್', '್'),
+  ('഻', '഼'), ('്', '്'), ('්', '්'), ('ฺ', 'ฺ'),
+  ('྄', '྄'), ('္', '်'), ('᜔', '᜔'), ('᜴', '᜴'),
+  ('្', '្'), ('᩠', '᩠'), ('᭄', '᭄'), ('᮪', '᮫'),
+  ('᯲', '᯳'), ('⵿', '⵿'), ('꠆', '꠆'), ('꣄', '꣄'),
+  ('꥓', '꥓'), ('꧀', '꧀'), ('꫶', '꫶'), ('꯭', '꯭'),
+  ('𐨿', '𐨿'), ('𑁆', '𑁆'), ('𑁿', '𑁿'), ('𑂹', '𑂹'),
+  ('𑄳', '𑄴'), ('𑇀', '𑇀'), ('𑈵', '𑈵'), ('𑋪', '𑋪'),
+  ('𑍍', '𑍍'), ('𑑂', '𑑂'), ('𑓂', '𑓂'), ('𑖿', '𑖿'),
+  ('𑘿', '𑘿'), ('𑚶', '𑚶'), ('𑜫', '𑜫'), ('𑨴', '𑨴'),
+  ('𑩇', '𑩇'), ('𑪙', '𑪙'), ('𑰿', '𑰿'), ('𑵄', '𑵅'),
+];
+
+pub const HEX_DIGIT: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'F'), ('a', 'f'), ('０', '９'), ('Ａ', 'Ｆ'),
+  ('ａ', 'ｆ'),
+];
+
+pub const HYPHEN: &'static [(char, char)] = &[
+  ('-', '-'), ('\u{ad}', '\u{ad}'), ('֊', '֊'), ('᠆', '᠆'),
+  ('‐', '‑'), ('⸗', '⸗'), ('・', '・'), ('﹣', '﹣'),
+  ('－', '－'), ('･', '･'),
+];
+
+pub const IDS_BINARY_OPERATOR: &'static [(char, char)] = &[
+  ('⿰', '⿱'), ('⿴', '⿻'),
+];
+
+pub const IDS_TRINARY_OPERATOR: &'static [(char, char)] = &[
+  ('⿲', '⿳'),
+];
+
+pub const ID_CONTINUE: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'),
+  ('·', '·'), ('º', 'º'), ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'),
+  ('ˆ', 'ˑ'), ('ˠ', 'ˤ'), ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('̀', 'ʹ'),
+  ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('҃', '҇'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ՙ', 'ՙ'), ('ա', 'և'), ('֑', 'ֽ'), ('ֿ', 'ֿ'),
+  ('ׁ', 'ׂ'), ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('א', 'ת'), ('װ', 'ײ'),
+  ('ؐ', 'ؚ'), ('ؠ', '٩'), ('ٮ', 'ۓ'), ('ە', 'ۜ'), ('۟', 'ۨ'),
+  ('۪', 'ۼ'), ('ۿ', 'ۿ'), ('ܐ', '݊'), ('ݍ', 'ޱ'), ('߀', 'ߵ'),
+  ('ߺ', 'ߺ'), ('ࠀ', '࠭'), ('ࡀ', '࡛'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'), ('ࣣ', 'ॣ'),
+  ('०', '९'), ('ॱ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'),
+  ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'),
+  ('়', 'ৄ'), ('ে', 'ৈ'), ('ো', 'ৎ'), ('ৗ', 'ৗ'),
+  ('ড়', 'ঢ়'), ('য়', 'ৣ'), ('০', 'ৱ'), ('ৼ', 'ৼ'),
+  ('ਁ', 'ਃ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'),
+  ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'),
+  ('਼', '਼'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'),
+  ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'),
+  ('ઁ', 'ઃ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'),
+  ('ે', 'ૉ'), ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૣ'),
+  ('૦', '૯'), ('ૹ', '૿'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('଼', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'),
+  ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('୦', '୯'),
+  ('ୱ', 'ୱ'), ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'),
+  ('ெ', 'ை'), ('ொ', '்'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'),
+  ('௦', '௯'), ('ఀ', 'ః'), ('అ', 'ఌ'), ('ఎ', 'ఐ'),
+  ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ౄ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౘ', 'ౚ'), ('ౠ', 'ౣ'),
+  ('౦', '౯'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'),
+  ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('಼', 'ೄ'),
+  ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೞ', 'ೞ'),
+  ('ೠ', 'ೣ'), ('೦', '೯'), ('ೱ', 'ೲ'), ('ഀ', 'ഃ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ൄ'), ('െ', 'ൈ'),
+  ('ൊ', 'ൎ'), ('ൔ', 'ൗ'), ('ൟ', 'ൣ'), ('൦', '൯'),
+  ('ൺ', 'ൿ'), ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'),
+  ('ඳ', 'ර'), ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'),
+  ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'),
+  ('ෲ', 'ෳ'), ('ก', 'ฺ'), ('เ', '๎'), ('๐', '๙'),
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('༘', '༙'),
+  ('༠', '༩'), ('༵', '༵'), ('༷', '༷'), ('༹', '༹'),
+  ('༾', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', '྄'), ('྆', 'ྗ'),
+  ('ྙ', 'ྼ'), ('࿆', '࿆'), ('က', '၉'), ('ၐ', 'ႝ'),
+  ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'),
+  ('ჼ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'),
+  ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'),
+  ('፝', '፟'), ('፩', '፱'), ('ᎀ', 'ᎏ'), ('Ꭰ', 'Ᏽ'),
+  ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'),
+  ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'),
+  ('ᜠ', '᜴'), ('ᝀ', 'ᝓ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'),
+  ('ᝲ', 'ᝳ'), ('ក', '៓'), ('ៗ', 'ៗ'), ('ៜ', '៝'),
+  ('០', '៩'), ('᠋', '᠍'), ('᠐', '᠙'), ('ᠠ', 'ᡷ'),
+  ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'),
+  ('ᤰ', '᤻'), ('᥆', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'),
+  ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('ᨀ', 'ᨛ'), ('ᨠ', 'ᩞ'),
+  ('᩠', '᩼'), ('᩿', '᪉'), ('᪐', '᪙'), ('ᪧ', 'ᪧ'),
+  ('᪰', '᪽'), ('ᬀ', 'ᭋ'), ('᭐', '᭙'), ('᭫', '᭳'),
+  ('ᮀ', '᯳'), ('ᰀ', '᰷'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'),
+  ('ᲀ', 'ᲈ'), ('᳐', '᳒'), ('᳔', '᳹'), ('ᴀ', '᷹'),
+  ('᷻', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'),
+  ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('‿', '⁀'),
+  ('⁔', '⁔'), ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'),
+  ('⃐', '⃜'), ('⃡', '⃡'), ('⃥', '⃰'), ('ℂ', 'ℂ'),
+  ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'),
+  ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'),
+  ('ⵯ', 'ⵯ'), ('⵿', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'),
+  ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'),
+  ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('ⷠ', 'ⷿ'), ('々', '〇'),
+  ('〡', '〯'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('゙', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'), ('ㄅ', 'ㄮ'),
+  ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'), ('㐀', '䶵'),
+  ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'),
+  ('ꘐ', 'ꘫ'), ('Ꙁ', '꙯'), ('ꙴ', '꙽'), ('ꙿ', '꛱'),
+  ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'),
+  ('ꟷ', 'ꠧ'), ('ꡀ', 'ꡳ'), ('ꢀ', 'ꣅ'), ('꣐', '꣙'),
+  ('꣠', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('꤀', '꤭'),
+  ('ꤰ', '꥓'), ('ꥠ', 'ꥼ'), ('ꦀ', '꧀'), ('ꧏ', '꧙'),
+  ('ꧠ', 'ꧾ'), ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'),
+  ('ꩠ', 'ꩶ'), ('ꩺ', 'ꫂ'), ('ꫛ', 'ꫝ'), ('ꫠ', 'ꫯ'),
+  ('ꫲ', '꫶'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'),
+  ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'),
+  ('ꭰ', 'ꯪ'), ('꯬', '꯭'), ('꯰', '꯹'), ('가', '힣'),
+  ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'), ('並', '龎'),
+  ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'ﬨ'), ('שׁ', 'זּ'),
+  ('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'),
+  ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'),
+  ('ﷰ', 'ﷻ'), ('︀', '️'), ('︠', '︯'), ('︳', '︴'),
+  ('﹍', '﹏'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'), ('０', '９'),
+  ('Ａ', 'Ｚ'), ('＿', '＿'), ('ａ', 'ｚ'), ('ｦ', 'ﾾ'),
+  ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'),
+  ('𐇽', '𐇽'), ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐋠', '𐋠'),
+  ('𐌀', '𐌟'), ('𐌭', '𐍊'), ('𐍐', '𐍺'), ('𐎀', '𐎝'),
+  ('𐎠', '𐏃'), ('𐏈', '𐏏'), ('𐏑', '𐏕'), ('𐐀', '𐒝'),
+  ('𐒠', '𐒩'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐔀', '𐔧'),
+  ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'),
+  ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'),
+  ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'), ('𐢀', '𐢞'),
+  ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'), ('𐤠', '𐤹'),
+  ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨃'), ('𐨅', '𐨆'),
+  ('𐨌', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐨸', '𐨺'),
+  ('𐨿', '𐨿'), ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'),
+  ('𐫉', '𐫦'), ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'),
+  ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'),
+  ('𑀀', '𑁆'), ('𑁦', '𑁯'), ('𑁿', '𑂺'), ('𑃐', '𑃨'),
+  ('𑃰', '𑃹'), ('𑄀', '𑄴'), ('𑄶', '𑄿'), ('𑅐', '𑅳'),
+  ('𑅶', '𑅶'), ('𑆀', '𑇄'), ('𑇊', '𑇌'), ('𑇐', '𑇚'),
+  ('𑇜', '𑇜'), ('𑈀', '𑈑'), ('𑈓', '𑈷'), ('𑈾', '𑈾'),
+  ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'),
+  ('𑊟', '𑊨'), ('𑊰', '𑋪'), ('𑋰', '𑋹'), ('𑌀', '𑌃'),
+  ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'),
+  ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌼', '𑍄'), ('𑍇', '𑍈'),
+  ('𑍋', '𑍍'), ('𑍐', '𑍐'), ('𑍗', '𑍗'), ('𑍝', '𑍣'),
+  ('𑍦', '𑍬'), ('𑍰', '𑍴'), ('𑐀', '𑑊'), ('𑑐', '𑑙'),
+  ('𑒀', '𑓅'), ('𑓇', '𑓇'), ('𑓐', '𑓙'), ('𑖀', '𑖵'),
+  ('𑖸', '𑗀'), ('𑗘', '𑗝'), ('𑘀', '𑙀'), ('𑙄', '𑙄'),
+  ('𑙐', '𑙙'), ('𑚀', '𑚷'), ('𑛀', '𑛉'), ('𑜀', '𑜙'),
+  ('𑜝', '𑜫'), ('𑜰', '𑜹'), ('𑢠', '𑣩'), ('𑣿', '𑣿'),
+  ('𑨀', '𑨾'), ('𑩇', '𑩇'), ('𑩐', '𑪃'), ('𑪆', '𑪙'),
+  ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱀'),
+  ('𑱐', '𑱙'), ('𑱲', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+  ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'), ('𑴺', '𑴺'),
+  ('𑴼', '𑴽'), ('𑴿', '𑵇'), ('𑵐', '𑵙'), ('𒀀', '𒎙'),
+  ('𒐀', '𒑮'), ('𒒀', '𒕃'), ('𓀀', '𓐮'), ('𔐀', '𔙆'),
+  ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖫐', '𖫭'),
+  ('𖫰', '𖫴'), ('𖬀', '𖬶'), ('𖭀', '𖭃'), ('𖭐', '𖭙'),
+  ('𖭣', '𖭷'), ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽾'),
+  ('𖾏', '𖾟'), ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'),
+  ('𛀀', '𛄞'), ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'),
+  ('𛲀', '𛲈'), ('𛲐', '𛲙'), ('𛲝', '𛲞'), ('𝅥', '𝅩'),
+  ('𝅭', '𝅲'), ('𝅻', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'),
+  ('𝉂', '𝉄'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'),
+  ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'),
+  ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'),
+  ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𝟎', '𝟿'),
+  ('𝨀', '𝨶'), ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'),
+  ('𝪛', '𝪟'), ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'),
+  ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞠀', '𞣄'),
+  ('𞣐', '𞣖'), ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞸀', '𞸃'),
+  ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'),
+  ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'),
+  ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'),
+  ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'),
+  ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'),
+  ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'),
+  ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'),
+  ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'),
+  ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'),
+  ('𬺰', '𮯠'), ('丽', '𪘀'), ('󠄀', '󠇯'),
+];
+
+pub const ID_START: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'),
+  ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('Ͱ', 'ʹ'), ('Ͷ', 'ͷ'), ('ͺ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'),
+  ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'), ('Ա', 'Ֆ'), ('ՙ', 'ՙ'),
+  ('ա', 'և'), ('א', 'ת'), ('װ', 'ײ'), ('ؠ', 'ي'), ('ٮ', 'ٯ'),
+  ('ٱ', 'ۓ'), ('ە', 'ە'), ('ۥ', 'ۦ'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'),
+  ('ۿ', 'ۿ'), ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'),
+  ('ߊ', 'ߪ'), ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'),
+  ('ࠤ', 'ࠤ'), ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'),
+  ('ॐ', 'ॐ'), ('क़', 'ॡ'), ('ॱ', 'ঀ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৡ'), ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('ੲ', 'ੴ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'),
+  ('ୱ', 'ୱ'), ('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'),
+  ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'),
+  ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'),
+  ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'),
+  ('ವ', 'ಹ'), ('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'),
+  ('ೱ', 'ೲ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'),
+  ('ഽ', 'ഽ'), ('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'),
+  ('ൺ', 'ൿ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('ก', 'ะ'), ('า', 'ำ'),
+  ('เ', 'ๆ'), ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'),
+  ('ຊ', 'ຊ'), ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'),
+  ('ມ', 'ຣ'), ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'),
+  ('ອ', 'ະ'), ('າ', 'ຳ'), ('ຽ', 'ຽ'), ('ເ', 'ໄ'),
+  ('ໆ', 'ໆ'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('ཀ', 'ཇ'),
+  ('ཉ', 'ཬ'), ('ྈ', 'ྌ'), ('က', 'ဪ'), ('ဿ', 'ဿ'),
+  ('ၐ', 'ၕ'), ('ၚ', 'ၝ'), ('ၡ', 'ၡ'), ('ၥ', 'ၦ'),
+  ('ၮ', 'ၰ'), ('ၵ', 'ႁ'), ('ႎ', 'ႎ'), ('Ⴀ', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'),
+  ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'),
+  ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'),
+  ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'),
+  ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'),
+  ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'),
+  ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'),
+  ('ᝮ', 'ᝰ'), ('ក', 'ឳ'), ('ៗ', 'ៗ'), ('ៜ', 'ៜ'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢨ'), ('ᢪ', 'ᢪ'), ('ᢰ', 'ᣵ'),
+  ('ᤀ', 'ᤞ'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'),
+  ('ᦰ', 'ᧉ'), ('ᨀ', 'ᨖ'), ('ᨠ', 'ᩔ'), ('ᪧ', 'ᪧ'),
+  ('ᬅ', 'ᬳ'), ('ᭅ', 'ᭋ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'),
+  ('ᮺ', 'ᯥ'), ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'),
+  ('ᲀ', 'ᲈ'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳱ'), ('ᳵ', 'ᳶ'),
+  ('ᴀ', 'ᶿ'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'),
+  ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'),
+  ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'),
+  ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℂ', 'ℂ'),
+  ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'),
+  ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳮ'),
+  ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('々', '〇'),
+  ('〡', '〩'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('゛', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'), ('ㄅ', 'ㄮ'),
+  ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'), ('㐀', '䶵'),
+  ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'),
+  ('ꘐ', 'ꘟ'), ('ꘪ', 'ꘫ'), ('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'),
+  ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'),
+  ('ꠌ', 'ꠢ'), ('ꡀ', 'ꡳ'), ('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'),
+  ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'),
+  ('ꥠ', 'ꥼ'), ('ꦄ', 'ꦲ'), ('ꧏ', 'ꧏ'), ('ꧠ', 'ꧤ'),
+  ('ꧦ', 'ꧯ'), ('ꧺ', 'ꧾ'), ('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'),
+  ('ꩄ', 'ꩋ'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪯ'),
+  ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'),
+  ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫝ'), ('ꫠ', 'ꫪ'), ('ꫲ', 'ꫴ'),
+  ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'),
+  ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯢ'),
+  ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'),
+  ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'יִ'),
+  ('ײַ', 'ﬨ'), ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﴽ'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷻ'), ('ﹰ', 'ﹴ'),
+  ('ﹶ', 'ﻼ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'), ('ｦ', 'ﾾ'),
+  ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'),
+  ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐌀', '𐌟'), ('𐌭', '𐍊'),
+  ('𐍐', '𐍵'), ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'),
+  ('𐏑', '𐏕'), ('𐐀', '𐒝'), ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+  ('𐔀', '𐔧'), ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'),
+  ('𐝠', '𐝧'), ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'),
+  ('𐠷', '𐠸'), ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'),
+  ('𐢀', '𐢞'), ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'),
+  ('𐤠', '𐤹'), ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨀'),
+  ('𐨐', '𐨓'), ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐩠', '𐩼'),
+  ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫤'), ('𐬀', '𐬵'),
+  ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'), ('𐰀', '𐱈'),
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑀃', '𑀷'), ('𑂃', '𑂯'),
+  ('𑃐', '𑃨'), ('𑄃', '𑄦'), ('𑅐', '𑅲'), ('𑅶', '𑅶'),
+  ('𑆃', '𑆲'), ('𑇁', '𑇄'), ('𑇚', '𑇚'), ('𑇜', '𑇜'),
+  ('𑈀', '𑈑'), ('𑈓', '𑈫'), ('𑊀', '𑊆'), ('𑊈', '𑊈'),
+  ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋞'),
+  ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'),
+  ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌽', '𑌽'), ('𑍐', '𑍐'),
+  ('𑍝', '𑍡'), ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑒀', '𑒯'),
+  ('𑓄', '𑓅'), ('𑓇', '𑓇'), ('𑖀', '𑖮'), ('𑗘', '𑗛'),
+  ('𑘀', '𑘯'), ('𑙄', '𑙄'), ('𑚀', '𑚪'), ('𑜀', '𑜙'),
+  ('𑢠', '𑣟'), ('𑣿', '𑣿'), ('𑨀', '𑨀'), ('𑨋', '𑨲'),
+  ('𑨺', '𑨺'), ('𑩐', '𑩐'), ('𑩜', '𑪃'), ('𑪆', '𑪉'),
+  ('𑫀', '𑫸'), ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'),
+  ('𑱲', '𑲏'), ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴰'),
+  ('𑵆', '𑵆'), ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'),
+  ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'),
+  ('𖫐', '𖫭'), ('𖬀', '𖬯'), ('𖭀', '𖭃'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽐'), ('𖾓', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'),
+  ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'),
+  ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'),
+  ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𞠀', '𞣄'),
+  ('𞤀', '𞥃'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'),
+  ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'),
+  ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'),
+  ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'),
+  ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'),
+  ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'),
+  ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'),
+  ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'),
+  ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'),
+  ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const IDEOGRAPHIC: &'static [(char, char)] = &[
+  ('〆', '〇'), ('〡', '〩'), ('〸', '〺'), ('㐀', '䶵'),
+  ('一', '鿪'), ('豈', '舘'), ('並', '龎'), ('𗀀', '𘟬'),
+  ('𘠀', '𘫲'), ('𛅰', '𛋻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'),
+  ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const JOIN_CONTROL: &'static [(char, char)] = &[
+  ('\u{200c}', '\u{200d}'),
+];
+
+pub const LOGICAL_ORDER_EXCEPTION: &'static [(char, char)] = &[
+  ('เ', 'ไ'), ('ເ', 'ໄ'), ('ᦵ', 'ᦷ'), ('ᦺ', 'ᦺ'),
+  ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪹ'), ('ꪻ', 'ꪼ'),
+];
+
+pub const LOWERCASE: &'static [(char, char)] = &[
+  ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'), ('ß', 'ö'),
+  ('ø', 'ÿ'), ('ā', 'ā'), ('ă', 'ă'), ('ą', 'ą'), ('ć', 'ć'),
+  ('ĉ', 'ĉ'), ('ċ', 'ċ'), ('č', 'č'), ('ď', 'ď'), ('đ', 'đ'),
+  ('ē', 'ē'), ('ĕ', 'ĕ'), ('ė', 'ė'), ('ę', 'ę'), ('ě', 'ě'),
+  ('ĝ', 'ĝ'), ('ğ', 'ğ'), ('ġ', 'ġ'), ('ģ', 'ģ'), ('ĥ', 'ĥ'),
+  ('ħ', 'ħ'), ('ĩ', 'ĩ'), ('ī', 'ī'), ('ĭ', 'ĭ'), ('į', 'į'),
+  ('ı', 'ı'), ('ĳ', 'ĳ'), ('ĵ', 'ĵ'), ('ķ', 'ĸ'), ('ĺ', 'ĺ'),
+  ('ļ', 'ļ'), ('ľ', 'ľ'), ('ŀ', 'ŀ'), ('ł', 'ł'), ('ń', 'ń'),
+  ('ņ', 'ņ'), ('ň', 'ŉ'), ('ŋ', 'ŋ'), ('ō', 'ō'), ('ŏ', 'ŏ'),
+  ('ő', 'ő'), ('œ', 'œ'), ('ŕ', 'ŕ'), ('ŗ', 'ŗ'), ('ř', 'ř'),
+  ('ś', 'ś'), ('ŝ', 'ŝ'), ('ş', 'ş'), ('š', 'š'), ('ţ', 'ţ'),
+  ('ť', 'ť'), ('ŧ', 'ŧ'), ('ũ', 'ũ'), ('ū', 'ū'), ('ŭ', 'ŭ'),
+  ('ů', 'ů'), ('ű', 'ű'), ('ų', 'ų'), ('ŵ', 'ŵ'), ('ŷ', 'ŷ'),
+  ('ź', 'ź'), ('ż', 'ż'), ('ž', 'ƀ'), ('ƃ', 'ƃ'), ('ƅ', 'ƅ'),
+  ('ƈ', 'ƈ'), ('ƌ', 'ƍ'), ('ƒ', 'ƒ'), ('ƕ', 'ƕ'), ('ƙ', 'ƛ'),
+  ('ƞ', 'ƞ'), ('ơ', 'ơ'), ('ƣ', 'ƣ'), ('ƥ', 'ƥ'), ('ƨ', 'ƨ'),
+  ('ƪ', 'ƫ'), ('ƭ', 'ƭ'), ('ư', 'ư'), ('ƴ', 'ƴ'), ('ƶ', 'ƶ'),
+  ('ƹ', 'ƺ'), ('ƽ', 'ƿ'), ('ǆ', 'ǆ'), ('ǉ', 'ǉ'), ('ǌ', 'ǌ'),
+  ('ǎ', 'ǎ'), ('ǐ', 'ǐ'), ('ǒ', 'ǒ'), ('ǔ', 'ǔ'), ('ǖ', 'ǖ'),
+  ('ǘ', 'ǘ'), ('ǚ', 'ǚ'), ('ǜ', 'ǝ'), ('ǟ', 'ǟ'), ('ǡ', 'ǡ'),
+  ('ǣ', 'ǣ'), ('ǥ', 'ǥ'), ('ǧ', 'ǧ'), ('ǩ', 'ǩ'), ('ǫ', 'ǫ'),
+  ('ǭ', 'ǭ'), ('ǯ', 'ǰ'), ('ǳ', 'ǳ'), ('ǵ', 'ǵ'), ('ǹ', 'ǹ'),
+  ('ǻ', 'ǻ'), ('ǽ', 'ǽ'), ('ǿ', 'ǿ'), ('ȁ', 'ȁ'), ('ȃ', 'ȃ'),
+  ('ȅ', 'ȅ'), ('ȇ', 'ȇ'), ('ȉ', 'ȉ'), ('ȋ', 'ȋ'), ('ȍ', 'ȍ'),
+  ('ȏ', 'ȏ'), ('ȑ', 'ȑ'), ('ȓ', 'ȓ'), ('ȕ', 'ȕ'), ('ȗ', 'ȗ'),
+  ('ș', 'ș'), ('ț', 'ț'), ('ȝ', 'ȝ'), ('ȟ', 'ȟ'), ('ȡ', 'ȡ'),
+  ('ȣ', 'ȣ'), ('ȥ', 'ȥ'), ('ȧ', 'ȧ'), ('ȩ', 'ȩ'), ('ȫ', 'ȫ'),
+  ('ȭ', 'ȭ'), ('ȯ', 'ȯ'), ('ȱ', 'ȱ'), ('ȳ', 'ȹ'), ('ȼ', 'ȼ'),
+  ('ȿ', 'ɀ'), ('ɂ', 'ɂ'), ('ɇ', 'ɇ'), ('ɉ', 'ɉ'), ('ɋ', 'ɋ'),
+  ('ɍ', 'ɍ'), ('ɏ', 'ʓ'), ('ʕ', 'ʸ'), ('ˀ', 'ˁ'), ('ˠ', 'ˤ'),
+  ('ͅ', 'ͅ'), ('ͱ', 'ͱ'), ('ͳ', 'ͳ'), ('ͷ', 'ͷ'), ('ͺ', 'ͽ'),
+  ('ΐ', 'ΐ'), ('ά', 'ώ'), ('ϐ', 'ϑ'), ('ϕ', 'ϗ'), ('ϙ', 'ϙ'),
+  ('ϛ', 'ϛ'), ('ϝ', 'ϝ'), ('ϟ', 'ϟ'), ('ϡ', 'ϡ'), ('ϣ', 'ϣ'),
+  ('ϥ', 'ϥ'), ('ϧ', 'ϧ'), ('ϩ', 'ϩ'), ('ϫ', 'ϫ'), ('ϭ', 'ϭ'),
+  ('ϯ', 'ϳ'), ('ϵ', 'ϵ'), ('ϸ', 'ϸ'), ('ϻ', 'ϼ'), ('а', 'џ'),
+  ('ѡ', 'ѡ'), ('ѣ', 'ѣ'), ('ѥ', 'ѥ'), ('ѧ', 'ѧ'), ('ѩ', 'ѩ'),
+  ('ѫ', 'ѫ'), ('ѭ', 'ѭ'), ('ѯ', 'ѯ'), ('ѱ', 'ѱ'), ('ѳ', 'ѳ'),
+  ('ѵ', 'ѵ'), ('ѷ', 'ѷ'), ('ѹ', 'ѹ'), ('ѻ', 'ѻ'), ('ѽ', 'ѽ'),
+  ('ѿ', 'ѿ'), ('ҁ', 'ҁ'), ('ҋ', 'ҋ'), ('ҍ', 'ҍ'), ('ҏ', 'ҏ'),
+  ('ґ', 'ґ'), ('ғ', 'ғ'), ('ҕ', 'ҕ'), ('җ', 'җ'), ('ҙ', 'ҙ'),
+  ('қ', 'қ'), ('ҝ', 'ҝ'), ('ҟ', 'ҟ'), ('ҡ', 'ҡ'), ('ң', 'ң'),
+  ('ҥ', 'ҥ'), ('ҧ', 'ҧ'), ('ҩ', 'ҩ'), ('ҫ', 'ҫ'), ('ҭ', 'ҭ'),
+  ('ү', 'ү'), ('ұ', 'ұ'), ('ҳ', 'ҳ'), ('ҵ', 'ҵ'), ('ҷ', 'ҷ'),
+  ('ҹ', 'ҹ'), ('һ', 'һ'), ('ҽ', 'ҽ'), ('ҿ', 'ҿ'), ('ӂ', 'ӂ'),
+  ('ӄ', 'ӄ'), ('ӆ', 'ӆ'), ('ӈ', 'ӈ'), ('ӊ', 'ӊ'), ('ӌ', 'ӌ'),
+  ('ӎ', 'ӏ'), ('ӑ', 'ӑ'), ('ӓ', 'ӓ'), ('ӕ', 'ӕ'), ('ӗ', 'ӗ'),
+  ('ә', 'ә'), ('ӛ', 'ӛ'), ('ӝ', 'ӝ'), ('ӟ', 'ӟ'), ('ӡ', 'ӡ'),
+  ('ӣ', 'ӣ'), ('ӥ', 'ӥ'), ('ӧ', 'ӧ'), ('ө', 'ө'), ('ӫ', 'ӫ'),
+  ('ӭ', 'ӭ'), ('ӯ', 'ӯ'), ('ӱ', 'ӱ'), ('ӳ', 'ӳ'), ('ӵ', 'ӵ'),
+  ('ӷ', 'ӷ'), ('ӹ', 'ӹ'), ('ӻ', 'ӻ'), ('ӽ', 'ӽ'), ('ӿ', 'ӿ'),
+  ('ԁ', 'ԁ'), ('ԃ', 'ԃ'), ('ԅ', 'ԅ'), ('ԇ', 'ԇ'), ('ԉ', 'ԉ'),
+  ('ԋ', 'ԋ'), ('ԍ', 'ԍ'), ('ԏ', 'ԏ'), ('ԑ', 'ԑ'), ('ԓ', 'ԓ'),
+  ('ԕ', 'ԕ'), ('ԗ', 'ԗ'), ('ԙ', 'ԙ'), ('ԛ', 'ԛ'), ('ԝ', 'ԝ'),
+  ('ԟ', 'ԟ'), ('ԡ', 'ԡ'), ('ԣ', 'ԣ'), ('ԥ', 'ԥ'), ('ԧ', 'ԧ'),
+  ('ԩ', 'ԩ'), ('ԫ', 'ԫ'), ('ԭ', 'ԭ'), ('ԯ', 'ԯ'), ('ա', 'և'),
+  ('ᏸ', 'ᏽ'), ('ᲀ', 'ᲈ'), ('ᴀ', 'ᶿ'), ('ḁ', 'ḁ'),
+  ('ḃ', 'ḃ'), ('ḅ', 'ḅ'), ('ḇ', 'ḇ'), ('ḉ', 'ḉ'),
+  ('ḋ', 'ḋ'), ('ḍ', 'ḍ'), ('ḏ', 'ḏ'), ('ḑ', 'ḑ'),
+  ('ḓ', 'ḓ'), ('ḕ', 'ḕ'), ('ḗ', 'ḗ'), ('ḙ', 'ḙ'),
+  ('ḛ', 'ḛ'), ('ḝ', 'ḝ'), ('ḟ', 'ḟ'), ('ḡ', 'ḡ'),
+  ('ḣ', 'ḣ'), ('ḥ', 'ḥ'), ('ḧ', 'ḧ'), ('ḩ', 'ḩ'),
+  ('ḫ', 'ḫ'), ('ḭ', 'ḭ'), ('ḯ', 'ḯ'), ('ḱ', 'ḱ'),
+  ('ḳ', 'ḳ'), ('ḵ', 'ḵ'), ('ḷ', 'ḷ'), ('ḹ', 'ḹ'),
+  ('ḻ', 'ḻ'), ('ḽ', 'ḽ'), ('ḿ', 'ḿ'), ('ṁ', 'ṁ'),
+  ('ṃ', 'ṃ'), ('ṅ', 'ṅ'), ('ṇ', 'ṇ'), ('ṉ', 'ṉ'),
+  ('ṋ', 'ṋ'), ('ṍ', 'ṍ'), ('ṏ', 'ṏ'), ('ṑ', 'ṑ'),
+  ('ṓ', 'ṓ'), ('ṕ', 'ṕ'), ('ṗ', 'ṗ'), ('ṙ', 'ṙ'),
+  ('ṛ', 'ṛ'), ('ṝ', 'ṝ'), ('ṟ', 'ṟ'), ('ṡ', 'ṡ'),
+  ('ṣ', 'ṣ'), ('ṥ', 'ṥ'), ('ṧ', 'ṧ'), ('ṩ', 'ṩ'),
+  ('ṫ', 'ṫ'), ('ṭ', 'ṭ'), ('ṯ', 'ṯ'), ('ṱ', 'ṱ'),
+  ('ṳ', 'ṳ'), ('ṵ', 'ṵ'), ('ṷ', 'ṷ'), ('ṹ', 'ṹ'),
+  ('ṻ', 'ṻ'), ('ṽ', 'ṽ'), ('ṿ', 'ṿ'), ('ẁ', 'ẁ'),
+  ('ẃ', 'ẃ'), ('ẅ', 'ẅ'), ('ẇ', 'ẇ'), ('ẉ', 'ẉ'),
+  ('ẋ', 'ẋ'), ('ẍ', 'ẍ'), ('ẏ', 'ẏ'), ('ẑ', 'ẑ'),
+  ('ẓ', 'ẓ'), ('ẕ', 'ẝ'), ('ẟ', 'ẟ'), ('ạ', 'ạ'),
+  ('ả', 'ả'), ('ấ', 'ấ'), ('ầ', 'ầ'), ('ẩ', 'ẩ'),
+  ('ẫ', 'ẫ'), ('ậ', 'ậ'), ('ắ', 'ắ'), ('ằ', 'ằ'),
+  ('ẳ', 'ẳ'), ('ẵ', 'ẵ'), ('ặ', 'ặ'), ('ẹ', 'ẹ'),
+  ('ẻ', 'ẻ'), ('ẽ', 'ẽ'), ('ế', 'ế'), ('ề', 'ề'),
+  ('ể', 'ể'), ('ễ', 'ễ'), ('ệ', 'ệ'), ('ỉ', 'ỉ'),
+  ('ị', 'ị'), ('ọ', 'ọ'), ('ỏ', 'ỏ'), ('ố', 'ố'),
+  ('ồ', 'ồ'), ('ổ', 'ổ'), ('ỗ', 'ỗ'), ('ộ', 'ộ'),
+  ('ớ', 'ớ'), ('ờ', 'ờ'), ('ở', 'ở'), ('ỡ', 'ỡ'),
+  ('ợ', 'ợ'), ('ụ', 'ụ'), ('ủ', 'ủ'), ('ứ', 'ứ'),
+  ('ừ', 'ừ'), ('ử', 'ử'), ('ữ', 'ữ'), ('ự', 'ự'),
+  ('ỳ', 'ỳ'), ('ỵ', 'ỵ'), ('ỷ', 'ỷ'), ('ỹ', 'ỹ'),
+  ('ỻ', 'ỻ'), ('ỽ', 'ỽ'), ('ỿ', 'ἇ'), ('ἐ', 'ἕ'),
+  ('ἠ', 'ἧ'), ('ἰ', 'ἷ'), ('ὀ', 'ὅ'), ('ὐ', 'ὗ'),
+  ('ὠ', 'ὧ'), ('ὰ', 'ώ'), ('ᾀ', 'ᾇ'), ('ᾐ', 'ᾗ'),
+  ('ᾠ', 'ᾧ'), ('ᾰ', 'ᾴ'), ('ᾶ', 'ᾷ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῇ'), ('ῐ', 'ΐ'), ('ῖ', 'ῗ'),
+  ('ῠ', 'ῧ'), ('ῲ', 'ῴ'), ('ῶ', 'ῷ'), ('ⁱ', 'ⁱ'),
+  ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℊ', 'ℊ'), ('ℎ', 'ℏ'),
+  ('ℓ', 'ℓ'), ('ℯ', 'ℯ'), ('ℴ', 'ℴ'), ('ℹ', 'ℹ'),
+  ('ℼ', 'ℽ'), ('ⅆ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('ⅰ', 'ⅿ'),
+  ('ↄ', 'ↄ'), ('ⓐ', 'ⓩ'), ('ⰰ', 'ⱞ'), ('ⱡ', 'ⱡ'),
+  ('ⱥ', 'ⱦ'), ('ⱨ', 'ⱨ'), ('ⱪ', 'ⱪ'), ('ⱬ', 'ⱬ'),
+  ('ⱱ', 'ⱱ'), ('ⱳ', 'ⱴ'), ('ⱶ', 'ⱽ'), ('ⲁ', 'ⲁ'),
+  ('ⲃ', 'ⲃ'), ('ⲅ', 'ⲅ'), ('ⲇ', 'ⲇ'), ('ⲉ', 'ⲉ'),
+  ('ⲋ', 'ⲋ'), ('ⲍ', 'ⲍ'), ('ⲏ', 'ⲏ'), ('ⲑ', 'ⲑ'),
+  ('ⲓ', 'ⲓ'), ('ⲕ', 'ⲕ'), ('ⲗ', 'ⲗ'), ('ⲙ', 'ⲙ'),
+  ('ⲛ', 'ⲛ'), ('ⲝ', 'ⲝ'), ('ⲟ', 'ⲟ'), ('ⲡ', 'ⲡ'),
+  ('ⲣ', 'ⲣ'), ('ⲥ', 'ⲥ'), ('ⲧ', 'ⲧ'), ('ⲩ', 'ⲩ'),
+  ('ⲫ', 'ⲫ'), ('ⲭ', 'ⲭ'), ('ⲯ', 'ⲯ'), ('ⲱ', 'ⲱ'),
+  ('ⲳ', 'ⲳ'), ('ⲵ', 'ⲵ'), ('ⲷ', 'ⲷ'), ('ⲹ', 'ⲹ'),
+  ('ⲻ', 'ⲻ'), ('ⲽ', 'ⲽ'), ('ⲿ', 'ⲿ'), ('ⳁ', 'ⳁ'),
+  ('ⳃ', 'ⳃ'), ('ⳅ', 'ⳅ'), ('ⳇ', 'ⳇ'), ('ⳉ', 'ⳉ'),
+  ('ⳋ', 'ⳋ'), ('ⳍ', 'ⳍ'), ('ⳏ', 'ⳏ'), ('ⳑ', 'ⳑ'),
+  ('ⳓ', 'ⳓ'), ('ⳕ', 'ⳕ'), ('ⳗ', 'ⳗ'), ('ⳙ', 'ⳙ'),
+  ('ⳛ', 'ⳛ'), ('ⳝ', 'ⳝ'), ('ⳟ', 'ⳟ'), ('ⳡ', 'ⳡ'),
+  ('ⳣ', 'ⳤ'), ('ⳬ', 'ⳬ'), ('ⳮ', 'ⳮ'), ('ⳳ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ꙁ', 'ꙁ'),
+  ('ꙃ', 'ꙃ'), ('ꙅ', 'ꙅ'), ('ꙇ', 'ꙇ'), ('ꙉ', 'ꙉ'),
+  ('ꙋ', 'ꙋ'), ('ꙍ', 'ꙍ'), ('ꙏ', 'ꙏ'), ('ꙑ', 'ꙑ'),
+  ('ꙓ', 'ꙓ'), ('ꙕ', 'ꙕ'), ('ꙗ', 'ꙗ'), ('ꙙ', 'ꙙ'),
+  ('ꙛ', 'ꙛ'), ('ꙝ', 'ꙝ'), ('ꙟ', 'ꙟ'), ('ꙡ', 'ꙡ'),
+  ('ꙣ', 'ꙣ'), ('ꙥ', 'ꙥ'), ('ꙧ', 'ꙧ'), ('ꙩ', 'ꙩ'),
+  ('ꙫ', 'ꙫ'), ('ꙭ', 'ꙭ'), ('ꚁ', 'ꚁ'), ('ꚃ', 'ꚃ'),
+  ('ꚅ', 'ꚅ'), ('ꚇ', 'ꚇ'), ('ꚉ', 'ꚉ'), ('ꚋ', 'ꚋ'),
+  ('ꚍ', 'ꚍ'), ('ꚏ', 'ꚏ'), ('ꚑ', 'ꚑ'), ('ꚓ', 'ꚓ'),
+  ('ꚕ', 'ꚕ'), ('ꚗ', 'ꚗ'), ('ꚙ', 'ꚙ'), ('ꚛ', 'ꚝ'),
+  ('ꜣ', 'ꜣ'), ('ꜥ', 'ꜥ'), ('ꜧ', 'ꜧ'), ('ꜩ', 'ꜩ'),
+  ('ꜫ', 'ꜫ'), ('ꜭ', 'ꜭ'), ('ꜯ', 'ꜱ'), ('ꜳ', 'ꜳ'),
+  ('ꜵ', 'ꜵ'), ('ꜷ', 'ꜷ'), ('ꜹ', 'ꜹ'), ('ꜻ', 'ꜻ'),
+  ('ꜽ', 'ꜽ'), ('ꜿ', 'ꜿ'), ('ꝁ', 'ꝁ'), ('ꝃ', 'ꝃ'),
+  ('ꝅ', 'ꝅ'), ('ꝇ', 'ꝇ'), ('ꝉ', 'ꝉ'), ('ꝋ', 'ꝋ'),
+  ('ꝍ', 'ꝍ'), ('ꝏ', 'ꝏ'), ('ꝑ', 'ꝑ'), ('ꝓ', 'ꝓ'),
+  ('ꝕ', 'ꝕ'), ('ꝗ', 'ꝗ'), ('ꝙ', 'ꝙ'), ('ꝛ', 'ꝛ'),
+  ('ꝝ', 'ꝝ'), ('ꝟ', 'ꝟ'), ('ꝡ', 'ꝡ'), ('ꝣ', 'ꝣ'),
+  ('ꝥ', 'ꝥ'), ('ꝧ', 'ꝧ'), ('ꝩ', 'ꝩ'), ('ꝫ', 'ꝫ'),
+  ('ꝭ', 'ꝭ'), ('ꝯ', 'ꝸ'), ('ꝺ', 'ꝺ'), ('ꝼ', 'ꝼ'),
+  ('ꝿ', 'ꝿ'), ('ꞁ', 'ꞁ'), ('ꞃ', 'ꞃ'), ('ꞅ', 'ꞅ'),
+  ('ꞇ', 'ꞇ'), ('ꞌ', 'ꞌ'), ('ꞎ', 'ꞎ'), ('ꞑ', 'ꞑ'),
+  ('ꞓ', 'ꞕ'), ('ꞗ', 'ꞗ'), ('ꞙ', 'ꞙ'), ('ꞛ', 'ꞛ'),
+  ('ꞝ', 'ꞝ'), ('ꞟ', 'ꞟ'), ('ꞡ', 'ꞡ'), ('ꞣ', 'ꞣ'),
+  ('ꞥ', 'ꞥ'), ('ꞧ', 'ꞧ'), ('ꞩ', 'ꞩ'), ('ꞵ', 'ꞵ'),
+  ('ꞷ', 'ꞷ'), ('ꟸ', 'ꟺ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'),
+  ('ꭰ', 'ꮿ'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('ａ', 'ｚ'),
+  ('𐐨', '𐑏'), ('𐓘', '𐓻'), ('𐳀', '𐳲'), ('𑣀', '𑣟'),
+  ('𝐚', '𝐳'), ('𝑎', '𝑔'), ('𝑖', '𝑧'), ('𝒂', '𝒛'),
+  ('𝒶', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝓏'),
+  ('𝓪', '𝔃'), ('𝔞', '𝔷'), ('𝕒', '𝕫'), ('𝖆', '𝖟'),
+  ('𝖺', '𝗓'), ('𝗮', '𝘇'), ('𝘢', '𝘻'), ('𝙖', '𝙯'),
+  ('𝚊', '𝚥'), ('𝛂', '𝛚'), ('𝛜', '𝛡'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜛'), ('𝜶', '𝝎'), ('𝝐', '𝝕'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞏'), ('𝞪', '𝟂'), ('𝟄', '𝟉'), ('𝟋', '𝟋'),
+  ('𞤢', '𞥃'),
+];
+
+pub const MATH: &'static [(char, char)] = &[
+  ('+', '+'), ('<', '>'), ('^', '^'), ('|', '|'), ('~', '~'), ('¬', '¬'),
+  ('±', '±'), ('×', '×'), ('÷', '÷'), ('ϐ', 'ϒ'), ('ϕ', 'ϕ'),
+  ('ϰ', 'ϱ'), ('ϴ', '϶'), ('؆', '؈'), ('‖', '‖'), ('′', '‴'),
+  ('⁀', '⁀'), ('⁄', '⁄'), ('⁒', '⁒'), ('\u{2061}', '\u{2064}'),
+  ('⁺', '⁾'), ('₊', '₎'), ('⃐', '⃜'), ('⃡', '⃡'),
+  ('⃥', '⃦'), ('⃫', '⃯'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'),
+  ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'), ('ℤ', 'ℤ'),
+  ('ℨ', '℩'), ('ℬ', 'ℭ'), ('ℯ', 'ℱ'), ('ℳ', 'ℸ'),
+  ('ℼ', 'ⅉ'), ('⅋', '⅋'), ('←', '↧'), ('↩', '↮'),
+  ('↰', '↱'), ('↶', '↷'), ('↼', '⇛'), ('⇝', '⇝'),
+  ('⇤', '⇥'), ('⇴', '⋿'), ('⌈', '⌋'), ('⌠', '⌡'),
+  ('⍼', '⍼'), ('⎛', '⎵'), ('⎷', '⎷'), ('⏐', '⏐'),
+  ('⏜', '⏢'), ('■', '□'), ('▮', '▷'), ('▼', '◁'),
+  ('◆', '◇'), ('◊', '○'), ('●', '◓'), ('◢', '◢'),
+  ('◤', '◤'), ('◧', '◬'), ('◸', '◿'), ('★', '☆'),
+  ('♀', '♀'), ('♂', '♂'), ('♠', '♣'), ('♭', '♯'),
+  ('⟀', '⟿'), ('⤀', '⫿'), ('⬰', '⭄'), ('⭇', '⭌'),
+  ('﬩', '﬩'), ('﹡', '﹦'), ('﹨', '﹨'), ('＋', '＋'),
+  ('＜', '＞'), ('＼', '＼'), ('＾', '＾'), ('｜', '｜'),
+  ('～', '～'), ('￢', '￢'), ('￩', '￬'), ('𝐀', '𝑔'),
+  ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'),
+  ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝟋'), ('𝟎', '𝟿'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('𞻰', '𞻱'),
+];
+
+pub const NONCHARACTER_CODE_POINT: &'static [(char, char)] = &[
+  ('\u{fdd0}', '\u{fdef}'), ('\u{fffe}', '\u{ffff}'),
+  ('\u{1fffe}', '\u{1ffff}'), ('\u{2fffe}', '\u{2ffff}'),
+  ('\u{3fffe}', '\u{3ffff}'), ('\u{4fffe}', '\u{4ffff}'),
+  ('\u{5fffe}', '\u{5ffff}'), ('\u{6fffe}', '\u{6ffff}'),
+  ('\u{7fffe}', '\u{7ffff}'), ('\u{8fffe}', '\u{8ffff}'),
+  ('\u{9fffe}', '\u{9ffff}'), ('\u{afffe}', '\u{affff}'),
+  ('\u{bfffe}', '\u{bffff}'), ('\u{cfffe}', '\u{cffff}'),
+  ('\u{dfffe}', '\u{dffff}'), ('\u{efffe}', '\u{effff}'),
+  ('\u{ffffe}', '\u{fffff}'), ('\u{10fffe}', '\u{10ffff}'),
+];
+
+pub const OTHER_ALPHABETIC: &'static [(char, char)] = &[
+  ('ͅ', 'ͅ'), ('ְ', 'ֽ'), ('ֿ', 'ֿ'), ('ׁ', 'ׂ'), ('ׄ', 'ׅ'),
+  ('ׇ', 'ׇ'), ('ؐ', 'ؚ'), ('ً', 'ٗ'), ('ٙ', 'ٟ'), ('ٰ', 'ٰ'),
+  ('ۖ', 'ۜ'), ('ۡ', 'ۤ'), ('ۧ', 'ۨ'), ('ۭ', 'ۭ'), ('ܑ', 'ܑ'),
+  ('ܰ', 'ܿ'), ('ަ', 'ް'), ('ࠖ', 'ࠗ'), ('ࠛ', 'ࠣ'), ('ࠥ', 'ࠧ'),
+  ('ࠩ', 'ࠬ'), ('ࣔ', 'ࣟ'), ('ࣣ', 'ࣩ'), ('ࣰ', 'ः'),
+  ('ऺ', 'ऻ'), ('ा', 'ौ'), ('ॎ', 'ॏ'), ('ॕ', 'ॗ'),
+  ('ॢ', 'ॣ'), ('ঁ', 'ঃ'), ('া', 'ৄ'), ('ে', 'ৈ'),
+  ('ো', 'ৌ'), ('ৗ', 'ৗ'), ('ৢ', 'ৣ'), ('ਁ', 'ਃ'),
+  ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', 'ੌ'), ('ੑ', 'ੑ'),
+  ('ੰ', 'ੱ'), ('ੵ', 'ੵ'), ('ઁ', 'ઃ'), ('ા', 'ૅ'),
+  ('ે', 'ૉ'), ('ો', 'ૌ'), ('ૢ', 'ૣ'), ('ૺ', 'ૼ'),
+  ('ଁ', 'ଃ'), ('ା', 'ୄ'), ('େ', 'ୈ'), ('ୋ', 'ୌ'),
+  ('ୖ', 'ୗ'), ('ୢ', 'ୣ'), ('ஂ', 'ஂ'), ('ா', 'ூ'),
+  ('ெ', 'ை'), ('ொ', 'ௌ'), ('ௗ', 'ௗ'), ('ఀ', 'ః'),
+  ('ా', 'ౄ'), ('ె', 'ై'), ('ొ', 'ౌ'), ('ౕ', 'ౖ'),
+  ('ౢ', 'ౣ'), ('ಁ', 'ಃ'), ('ಾ', 'ೄ'), ('ೆ', 'ೈ'),
+  ('ೊ', 'ೌ'), ('ೕ', 'ೖ'), ('ೢ', 'ೣ'), ('ഀ', 'ഃ'),
+  ('ാ', 'ൄ'), ('െ', 'ൈ'), ('ൊ', 'ൌ'), ('ൗ', 'ൗ'),
+  ('ൢ', 'ൣ'), ('ං', 'ඃ'), ('ා', 'ු'), ('ූ', 'ූ'),
+  ('ෘ', 'ෟ'), ('ෲ', 'ෳ'), ('ั', 'ั'), ('ิ', 'ฺ'),
+  ('ํ', 'ํ'), ('ັ', 'ັ'), ('ິ', 'ູ'), ('ົ', 'ຼ'),
+  ('ໍ', 'ໍ'), ('ཱ', 'ཱྀ'), ('ྍ', 'ྗ'), ('ྙ', 'ྼ'),
+  ('ါ', 'ံ'), ('း', 'း'), ('ျ', 'ှ'), ('ၖ', 'ၙ'),
+  ('ၞ', 'ၠ'), ('ၢ', 'ၢ'), ('ၧ', 'ၨ'), ('ၱ', 'ၴ'),
+  ('ႂ', 'ႆ'), ('ႜ', 'ႝ'), ('፟', '፟'), ('ᜒ', 'ᜓ'),
+  ('ᜲ', 'ᜳ'), ('ᝒ', 'ᝓ'), ('ᝲ', 'ᝳ'), ('ា', 'ៈ'),
+  ('ᢅ', 'ᢆ'), ('ᢩ', 'ᢩ'), ('ᤠ', 'ᤫ'), ('ᤰ', 'ᤸ'),
+  ('ᨗ', 'ᨛ'), ('ᩕ', 'ᩞ'), ('ᩡ', 'ᩴ'), ('ᬀ', 'ᬄ'),
+  ('ᬵ', 'ᭃ'), ('ᮀ', 'ᮂ'), ('ᮡ', 'ᮩ'), ('ᮬ', 'ᮭ'),
+  ('ᯧ', 'ᯱ'), ('ᰤ', 'ᰵ'), ('ᳲ', 'ᳳ'), ('ᷧ', 'ᷴ'),
+  ('Ⓐ', 'ⓩ'), ('ⷠ', 'ⷿ'), ('ꙴ', 'ꙻ'), ('ꚞ', 'ꚟ'),
+  ('ꠣ', 'ꠧ'), ('ꢀ', 'ꢁ'), ('ꢴ', 'ꣃ'), ('ꣅ', 'ꣅ'),
+  ('ꤦ', 'ꤪ'), ('ꥇ', 'ꥒ'), ('ꦀ', 'ꦃ'), ('ꦴ', 'ꦿ'),
+  ('ꨩ', 'ꨶ'), ('ꩃ', 'ꩃ'), ('ꩌ', 'ꩍ'), ('ꪰ', 'ꪰ'),
+  ('ꪲ', 'ꪴ'), ('ꪷ', 'ꪸ'), ('ꪾ', 'ꪾ'), ('ꫫ', 'ꫯ'),
+  ('ꫵ', 'ꫵ'), ('ꯣ', 'ꯪ'), ('ﬞ', 'ﬞ'), ('𐍶', '𐍺'),
+  ('𐨁', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨏'), ('𑀀', '𑀂'),
+  ('𑀸', '𑁅'), ('𑂂', '𑂂'), ('𑂰', '𑂸'), ('𑄀', '𑄂'),
+  ('𑄧', '𑄲'), ('𑆀', '𑆂'), ('𑆳', '𑆿'), ('𑈬', '𑈴'),
+  ('𑈷', '𑈷'), ('𑈾', '𑈾'), ('𑋟', '𑋨'), ('𑌀', '𑌃'),
+  ('𑌾', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍌'), ('𑍗', '𑍗'),
+  ('𑍢', '𑍣'), ('𑐵', '𑑁'), ('𑑃', '𑑅'), ('𑒰', '𑓁'),
+  ('𑖯', '𑖵'), ('𑖸', '𑖾'), ('𑗜', '𑗝'), ('𑘰', '𑘾'),
+  ('𑙀', '𑙀'), ('𑚫', '𑚵'), ('𑜝', '𑜪'), ('𑨁', '𑨊'),
+  ('𑨵', '𑨹'), ('𑨻', '𑨾'), ('𑩑', '𑩛'), ('𑪊', '𑪗'),
+  ('𑰯', '𑰶'), ('𑰸', '𑰾'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+  ('𑴱', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'), ('𑴿', '𑵁'),
+  ('𑵃', '𑵃'), ('𑵇', '𑵇'), ('𖬰', '𖬶'), ('𖽑', '𖽾'),
+  ('𛲞', '𛲞'), ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'),
+  ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞥇', '𞥇'), ('🄰', '🅉'),
+  ('🅐', '🅩'), ('🅰', '🆉'),
+];
+
+pub const OTHER_DEFAULT_IGNORABLE_CODE_POINT: &'static [(char, char)] = &[
+  ('͏', '͏'), ('ᅟ', 'ᅠ'), ('឴', '឵'), ('\u{2065}', '\u{2065}'),
+  ('ㅤ', 'ㅤ'), ('ﾠ', 'ﾠ'), ('\u{fff0}', '\u{fff8}'),
+  ('\u{e0000}', '\u{e0000}'), ('\u{e0002}', '\u{e001f}'),
+  ('\u{e0080}', '\u{e00ff}'), ('\u{e01f0}', '\u{e0fff}'),
+];
+
+pub const OTHER_GRAPHEME_EXTEND: &'static [(char, char)] = &[
+  ('া', 'া'), ('ৗ', 'ৗ'), ('ା', 'ା'), ('ୗ', 'ୗ'),
+  ('ா', 'ா'), ('ௗ', 'ௗ'), ('ೂ', 'ೂ'), ('ೕ', 'ೖ'),
+  ('ാ', 'ാ'), ('ൗ', 'ൗ'), ('ා', 'ා'), ('ෟ', 'ෟ'),
+  ('\u{200c}', '\u{200c}'), ('〮', '〯'), ('ﾞ', 'ﾟ'), ('𑌾', '𑌾'),
+  ('𑍗', '𑍗'), ('𑒰', '𑒰'), ('𑒽', '𑒽'), ('𑖯', '𑖯'),
+  ('𝅥', '𝅥'), ('𝅮', '𝅲'), ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const OTHER_ID_CONTINUE: &'static [(char, char)] = &[
+  ('·', '·'), ('·', '·'), ('፩', '፱'), ('᧚', '᧚'),
+];
+
+pub const OTHER_ID_START: &'static [(char, char)] = &[
+  ('ᢅ', 'ᢆ'), ('℘', '℘'), ('℮', '℮'), ('゛', '゜'),
+];
+
+pub const OTHER_LOWERCASE: &'static [(char, char)] = &[
+  ('ª', 'ª'), ('º', 'º'), ('ʰ', 'ʸ'), ('ˀ', 'ˁ'), ('ˠ', 'ˤ'),
+  ('ͅ', 'ͅ'), ('ͺ', 'ͺ'), ('ᴬ', 'ᵪ'), ('ᵸ', 'ᵸ'), ('ᶛ', 'ᶿ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ⅰ', 'ⅿ'),
+  ('ⓐ', 'ⓩ'), ('ⱼ', 'ⱽ'), ('ꚜ', 'ꚝ'), ('ꝰ', 'ꝰ'),
+  ('ꟸ', 'ꟹ'), ('ꭜ', 'ꭟ'),
+];
+
+pub const OTHER_MATH: &'static [(char, char)] = &[
+  ('^', '^'), ('ϐ', 'ϒ'), ('ϕ', 'ϕ'), ('ϰ', 'ϱ'), ('ϴ', 'ϵ'),
+  ('‖', '‖'), ('′', '‴'), ('⁀', '⁀'), ('\u{2061}', '\u{2064}'),
+  ('⁽', '⁾'), ('₍', '₎'), ('⃐', '⃜'), ('⃡', '⃡'),
+  ('⃥', '⃦'), ('⃫', '⃯'), ('ℂ', 'ℂ'), ('ℇ', 'ℇ'),
+  ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'),
+  ('ℨ', '℩'), ('ℬ', 'ℭ'), ('ℯ', 'ℱ'), ('ℳ', 'ℸ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('↕', '↙'), ('↜', '↟'),
+  ('↡', '↢'), ('↤', '↥'), ('↧', '↧'), ('↩', '↭'),
+  ('↰', '↱'), ('↶', '↷'), ('↼', '⇍'), ('⇐', '⇑'),
+  ('⇓', '⇓'), ('⇕', '⇛'), ('⇝', '⇝'), ('⇤', '⇥'),
+  ('⌈', '⌋'), ('⎴', '⎵'), ('⎷', '⎷'), ('⏐', '⏐'),
+  ('⏢', '⏢'), ('■', '□'), ('▮', '▶'), ('▼', '◀'),
+  ('◆', '◇'), ('◊', '○'), ('●', '◓'), ('◢', '◢'),
+  ('◤', '◤'), ('◧', '◬'), ('★', '☆'), ('♀', '♀'),
+  ('♂', '♂'), ('♠', '♣'), ('♭', '♮'), ('⟅', '⟆'),
+  ('⟦', '⟯'), ('⦃', '⦘'), ('⧘', '⧛'), ('⧼', '⧽'),
+  ('﹡', '﹡'), ('﹣', '﹣'), ('﹨', '﹨'), ('＼', '＼'),
+  ('＾', '＾'), ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'),
+  ('𝒢', '𝒢'), ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'),
+  ('𝒻', '𝒻'), ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'),
+  ('𝔍', '𝔔'), ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'),
+  ('𝕀', '𝕄'), ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'),
+  ('𝚨', '𝛀'), ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'),
+  ('𝜖', '𝜴'), ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'),
+  ('𝞊', '𝞨'), ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𝟎', '𝟿'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'),
+];
+
+pub const OTHER_UPPERCASE: &'static [(char, char)] = &[
+  ('Ⅰ', 'Ⅿ'), ('Ⓐ', 'Ⓩ'), ('🄰', '🅉'), ('🅐', '🅩'),
+  ('🅰', '🆉'),
+];
+
+pub const PATTERN_SYNTAX: &'static [(char, char)] = &[
+  ('!', '/'), (':', '@'), ('[', '^'), ('`', '`'), ('{', '~'), ('¡', '§'),
+  ('©', '©'), ('«', '¬'), ('®', '®'), ('°', '±'), ('¶', '¶'),
+  ('»', '»'), ('¿', '¿'), ('×', '×'), ('÷', '÷'), ('‐', '‧'),
+  ('‰', '‾'), ('⁁', '⁓'), ('⁕', '⁞'), ('←', '\u{245f}'),
+  ('─', '❵'), ('➔', '\u{2bff}'), ('⸀', '\u{2e7f}'), ('、', '〃'),
+  ('〈', '〠'), ('〰', '〰'), ('﴾', '﴿'), ('﹅', '﹆'),
+];
+
+pub const PATTERN_WHITE_SPACE: &'static [(char, char)] = &[
+  ('\t', '\r'), (' ', ' '), ('\u{85}', '\u{85}'), ('\u{200e}', '\u{200f}'),
+  ('\u{2028}', '\u{2029}'),
+];
+
+pub const PREPENDED_CONCATENATION_MARK: &'static [(char, char)] = &[
+  ('\u{600}', '\u{605}'), ('\u{6dd}', '\u{6dd}'), ('\u{70f}', '\u{70f}'),
+  ('\u{8e2}', '\u{8e2}'), ('\u{110bd}', '\u{110bd}'),
+];
+
+pub const QUOTATION_MARK: &'static [(char, char)] = &[
+  ('\"', '\"'), ('\'', '\''), ('«', '«'), ('»', '»'), ('‘', '‟'),
+  ('‹', '›'), ('⹂', '⹂'), ('「', '』'), ('〝', '〟'),
+  ('﹁', '﹄'), ('＂', '＂'), ('＇', '＇'), ('｢', '｣'),
+];
+
+pub const RADICAL: &'static [(char, char)] = &[
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'),
+];
+
+pub const REGIONAL_INDICATOR: &'static [(char, char)] = &[
+  ('🇦', '🇿'),
+];
+
+pub const SENTENCE_TERMINAL: &'static [(char, char)] = &[
+  ('!', '!'), ('.', '.'), ('?', '?'), ('։', '։'), ('؟', '؟'),
+  ('۔', '۔'), ('܀', '܂'), ('߹', '߹'), ('।', '॥'), ('၊', '။'),
+  ('።', '።'), ('፧', '፨'), ('᙮', '᙮'), ('᜵', '᜶'),
+  ('᠃', '᠃'), ('᠉', '᠉'), ('᥄', '᥅'), ('᪨', '᪫'),
+  ('᭚', '᭛'), ('᭞', '᭟'), ('᰻', '᰼'), ('᱾', '᱿'),
+  ('‼', '‽'), ('⁇', '⁉'), ('⸮', '⸮'), ('⸼', '⸼'),
+  ('。', '。'), ('꓿', '꓿'), ('꘎', '꘏'), ('꛳', '꛳'),
+  ('꛷', '꛷'), ('꡶', '꡷'), ('꣎', '꣏'), ('꤯', '꤯'),
+  ('꧈', '꧉'), ('꩝', '꩟'), ('꫰', '꫱'), ('꯫', '꯫'),
+  ('﹒', '﹒'), ('﹖', '﹗'), ('！', '！'), ('．', '．'),
+  ('？', '？'), ('｡', '｡'), ('𐩖', '𐩗'), ('𑁇', '𑁈'),
+  ('𑂾', '𑃁'), ('𑅁', '𑅃'), ('𑇅', '𑇆'), ('𑇍', '𑇍'),
+  ('𑇞', '𑇟'), ('𑈸', '𑈹'), ('𑈻', '𑈼'), ('𑊩', '𑊩'),
+  ('𑑋', '𑑌'), ('𑗂', '𑗃'), ('𑗉', '𑗗'), ('𑙁', '𑙂'),
+  ('𑜼', '𑜾'), ('𑩂', '𑩃'), ('𑪛', '𑪜'), ('𑱁', '𑱂'),
+  ('𖩮', '𖩯'), ('𖫵', '𖫵'), ('𖬷', '𖬸'), ('𖭄', '𖭄'),
+  ('𛲟', '𛲟'), ('𝪈', '𝪈'),
+];
+
+pub const SOFT_DOTTED: &'static [(char, char)] = &[
+  ('i', 'j'), ('į', 'į'), ('ɉ', 'ɉ'), ('ɨ', 'ɨ'), ('ʝ', 'ʝ'),
+  ('ʲ', 'ʲ'), ('ϳ', 'ϳ'), ('і', 'і'), ('ј', 'ј'), ('ᵢ', 'ᵢ'),
+  ('ᶖ', 'ᶖ'), ('ᶤ', 'ᶤ'), ('ᶨ', 'ᶨ'), ('ḭ', 'ḭ'),
+  ('ị', 'ị'), ('ⁱ', 'ⁱ'), ('ⅈ', 'ⅉ'), ('ⱼ', 'ⱼ'),
+  ('𝐢', '𝐣'), ('𝑖', '𝑗'), ('𝒊', '𝒋'), ('𝒾', '𝒿'),
+  ('𝓲', '𝓳'), ('𝔦', '𝔧'), ('𝕚', '𝕛'), ('𝖎', '𝖏'),
+  ('𝗂', '𝗃'), ('𝗶', '𝗷'), ('𝘪', '𝘫'), ('𝙞', '𝙟'),
+  ('𝚒', '𝚓'),
+];
+
+pub const TERMINAL_PUNCTUATION: &'static [(char, char)] = &[
+  ('!', '!'), (',', ','), ('.', '.'), (':', ';'), ('?', '?'), (';', ';'),
+  ('·', '·'), ('։', '։'), ('׃', '׃'), ('،', '،'), ('؛', '؛'),
+  ('؟', '؟'), ('۔', '۔'), ('܀', '܊'), ('܌', '܌'), ('߸', '߹'),
+  ('࠰', '࠾'), ('࡞', '࡞'), ('।', '॥'), ('๚', '๛'),
+  ('༈', '༈'), ('།', '༒'), ('၊', '။'), ('፡', '፨'),
+  ('᙭', '᙮'), ('᛫', '᛭'), ('᜵', '᜶'), ('។', '៖'),
+  ('៚', '៚'), ('᠂', '᠅'), ('᠈', '᠉'), ('᥄', '᥅'),
+  ('᪨', '᪫'), ('᭚', '᭛'), ('᭝', '᭟'), ('᰻', '᰿'),
+  ('᱾', '᱿'), ('‼', '‽'), ('⁇', '⁉'), ('⸮', '⸮'),
+  ('⸼', '⸼'), ('⹁', '⹁'), ('、', '。'), ('꓾', '꓿'),
+  ('꘍', '꘏'), ('꛳', '꛷'), ('꡶', '꡷'), ('꣎', '꣏'),
+  ('꤯', '꤯'), ('꧇', '꧉'), ('꩝', '꩟'), ('꫟', '꫟'),
+  ('꫰', '꫱'), ('꯫', '꯫'), ('﹐', '﹒'), ('﹔', '﹗'),
+  ('！', '！'), ('，', '，'), ('．', '．'), ('：', '；'),
+  ('？', '？'), ('｡', '｡'), ('､', '､'), ('𐎟', '𐎟'),
+  ('𐏐', '𐏐'), ('𐡗', '𐡗'), ('𐤟', '𐤟'), ('𐩖', '𐩗'),
+  ('𐫰', '𐫵'), ('𐬺', '𐬿'), ('𐮙', '𐮜'), ('𑁇', '𑁍'),
+  ('𑂾', '𑃁'), ('𑅁', '𑅃'), ('𑇅', '𑇆'), ('𑇍', '𑇍'),
+  ('𑇞', '𑇟'), ('𑈸', '𑈼'), ('𑊩', '𑊩'), ('𑑋', '𑑍'),
+  ('𑑛', '𑑛'), ('𑗂', '𑗅'), ('𑗉', '𑗗'), ('𑙁', '𑙂'),
+  ('𑜼', '𑜾'), ('𑩂', '𑩃'), ('𑪛', '𑪜'), ('𑪡', '𑪢'),
+  ('𑱁', '𑱃'), ('𑱱', '𑱱'), ('𒑰', '𒑴'), ('𖩮', '𖩯'),
+  ('𖫵', '𖫵'), ('𖬷', '𖬹'), ('𖭄', '𖭄'), ('𛲟', '𛲟'),
+  ('𝪇', '𝪊'),
+];
+
+pub const UNIFIED_IDEOGRAPH: &'static [(char, char)] = &[
+  ('㐀', '䶵'), ('一', '鿪'), ('﨎', '﨏'), ('﨑', '﨑'),
+  ('﨓', '﨔'), ('﨟', '﨟'), ('﨡', '﨡'), ('﨣', '﨤'),
+  ('﨧', '﨩'), ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'),
+  ('𫠠', '𬺡'), ('𬺰', '𮯠'),
+];
+
+pub const UPPERCASE: &'static [(char, char)] = &[
+  ('A', 'Z'), ('À', 'Ö'), ('Ø', 'Þ'), ('Ā', 'Ā'), ('Ă', 'Ă'),
+  ('Ą', 'Ą'), ('Ć', 'Ć'), ('Ĉ', 'Ĉ'), ('Ċ', 'Ċ'), ('Č', 'Č'),
+  ('Ď', 'Ď'), ('Đ', 'Đ'), ('Ē', 'Ē'), ('Ĕ', 'Ĕ'), ('Ė', 'Ė'),
+  ('Ę', 'Ę'), ('Ě', 'Ě'), ('Ĝ', 'Ĝ'), ('Ğ', 'Ğ'), ('Ġ', 'Ġ'),
+  ('Ģ', 'Ģ'), ('Ĥ', 'Ĥ'), ('Ħ', 'Ħ'), ('Ĩ', 'Ĩ'), ('Ī', 'Ī'),
+  ('Ĭ', 'Ĭ'), ('Į', 'Į'), ('İ', 'İ'), ('Ĳ', 'Ĳ'), ('Ĵ', 'Ĵ'),
+  ('Ķ', 'Ķ'), ('Ĺ', 'Ĺ'), ('Ļ', 'Ļ'), ('Ľ', 'Ľ'), ('Ŀ', 'Ŀ'),
+  ('Ł', 'Ł'), ('Ń', 'Ń'), ('Ņ', 'Ņ'), ('Ň', 'Ň'), ('Ŋ', 'Ŋ'),
+  ('Ō', 'Ō'), ('Ŏ', 'Ŏ'), ('Ő', 'Ő'), ('Œ', 'Œ'), ('Ŕ', 'Ŕ'),
+  ('Ŗ', 'Ŗ'), ('Ř', 'Ř'), ('Ś', 'Ś'), ('Ŝ', 'Ŝ'), ('Ş', 'Ş'),
+  ('Š', 'Š'), ('Ţ', 'Ţ'), ('Ť', 'Ť'), ('Ŧ', 'Ŧ'), ('Ũ', 'Ũ'),
+  ('Ū', 'Ū'), ('Ŭ', 'Ŭ'), ('Ů', 'Ů'), ('Ű', 'Ű'), ('Ų', 'Ų'),
+  ('Ŵ', 'Ŵ'), ('Ŷ', 'Ŷ'), ('Ÿ', 'Ź'), ('Ż', 'Ż'), ('Ž', 'Ž'),
+  ('Ɓ', 'Ƃ'), ('Ƅ', 'Ƅ'), ('Ɔ', 'Ƈ'), ('Ɖ', 'Ƌ'), ('Ǝ', 'Ƒ'),
+  ('Ɠ', 'Ɣ'), ('Ɩ', 'Ƙ'), ('Ɯ', 'Ɲ'), ('Ɵ', 'Ơ'), ('Ƣ', 'Ƣ'),
+  ('Ƥ', 'Ƥ'), ('Ʀ', 'Ƨ'), ('Ʃ', 'Ʃ'), ('Ƭ', 'Ƭ'), ('Ʈ', 'Ư'),
+  ('Ʊ', 'Ƴ'), ('Ƶ', 'Ƶ'), ('Ʒ', 'Ƹ'), ('Ƽ', 'Ƽ'), ('Ǆ', 'Ǆ'),
+  ('Ǉ', 'Ǉ'), ('Ǌ', 'Ǌ'), ('Ǎ', 'Ǎ'), ('Ǐ', 'Ǐ'), ('Ǒ', 'Ǒ'),
+  ('Ǔ', 'Ǔ'), ('Ǖ', 'Ǖ'), ('Ǘ', 'Ǘ'), ('Ǚ', 'Ǚ'), ('Ǜ', 'Ǜ'),
+  ('Ǟ', 'Ǟ'), ('Ǡ', 'Ǡ'), ('Ǣ', 'Ǣ'), ('Ǥ', 'Ǥ'), ('Ǧ', 'Ǧ'),
+  ('Ǩ', 'Ǩ'), ('Ǫ', 'Ǫ'), ('Ǭ', 'Ǭ'), ('Ǯ', 'Ǯ'), ('Ǳ', 'Ǳ'),
+  ('Ǵ', 'Ǵ'), ('Ƕ', 'Ǹ'), ('Ǻ', 'Ǻ'), ('Ǽ', 'Ǽ'), ('Ǿ', 'Ǿ'),
+  ('Ȁ', 'Ȁ'), ('Ȃ', 'Ȃ'), ('Ȅ', 'Ȅ'), ('Ȇ', 'Ȇ'), ('Ȉ', 'Ȉ'),
+  ('Ȋ', 'Ȋ'), ('Ȍ', 'Ȍ'), ('Ȏ', 'Ȏ'), ('Ȑ', 'Ȑ'), ('Ȓ', 'Ȓ'),
+  ('Ȕ', 'Ȕ'), ('Ȗ', 'Ȗ'), ('Ș', 'Ș'), ('Ț', 'Ț'), ('Ȝ', 'Ȝ'),
+  ('Ȟ', 'Ȟ'), ('Ƞ', 'Ƞ'), ('Ȣ', 'Ȣ'), ('Ȥ', 'Ȥ'), ('Ȧ', 'Ȧ'),
+  ('Ȩ', 'Ȩ'), ('Ȫ', 'Ȫ'), ('Ȭ', 'Ȭ'), ('Ȯ', 'Ȯ'), ('Ȱ', 'Ȱ'),
+  ('Ȳ', 'Ȳ'), ('Ⱥ', 'Ȼ'), ('Ƚ', 'Ⱦ'), ('Ɂ', 'Ɂ'), ('Ƀ', 'Ɇ'),
+  ('Ɉ', 'Ɉ'), ('Ɋ', 'Ɋ'), ('Ɍ', 'Ɍ'), ('Ɏ', 'Ɏ'), ('Ͱ', 'Ͱ'),
+  ('Ͳ', 'Ͳ'), ('Ͷ', 'Ͷ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'),
+  ('Ό', 'Ό'), ('Ύ', 'Ώ'), ('Α', 'Ρ'), ('Σ', 'Ϋ'), ('Ϗ', 'Ϗ'),
+  ('ϒ', 'ϔ'), ('Ϙ', 'Ϙ'), ('Ϛ', 'Ϛ'), ('Ϝ', 'Ϝ'), ('Ϟ', 'Ϟ'),
+  ('Ϡ', 'Ϡ'), ('Ϣ', 'Ϣ'), ('Ϥ', 'Ϥ'), ('Ϧ', 'Ϧ'), ('Ϩ', 'Ϩ'),
+  ('Ϫ', 'Ϫ'), ('Ϭ', 'Ϭ'), ('Ϯ', 'Ϯ'), ('ϴ', 'ϴ'), ('Ϸ', 'Ϸ'),
+  ('Ϲ', 'Ϻ'), ('Ͻ', 'Я'), ('Ѡ', 'Ѡ'), ('Ѣ', 'Ѣ'), ('Ѥ', 'Ѥ'),
+  ('Ѧ', 'Ѧ'), ('Ѩ', 'Ѩ'), ('Ѫ', 'Ѫ'), ('Ѭ', 'Ѭ'), ('Ѯ', 'Ѯ'),
+  ('Ѱ', 'Ѱ'), ('Ѳ', 'Ѳ'), ('Ѵ', 'Ѵ'), ('Ѷ', 'Ѷ'), ('Ѹ', 'Ѹ'),
+  ('Ѻ', 'Ѻ'), ('Ѽ', 'Ѽ'), ('Ѿ', 'Ѿ'), ('Ҁ', 'Ҁ'), ('Ҋ', 'Ҋ'),
+  ('Ҍ', 'Ҍ'), ('Ҏ', 'Ҏ'), ('Ґ', 'Ґ'), ('Ғ', 'Ғ'), ('Ҕ', 'Ҕ'),
+  ('Җ', 'Җ'), ('Ҙ', 'Ҙ'), ('Қ', 'Қ'), ('Ҝ', 'Ҝ'), ('Ҟ', 'Ҟ'),
+  ('Ҡ', 'Ҡ'), ('Ң', 'Ң'), ('Ҥ', 'Ҥ'), ('Ҧ', 'Ҧ'), ('Ҩ', 'Ҩ'),
+  ('Ҫ', 'Ҫ'), ('Ҭ', 'Ҭ'), ('Ү', 'Ү'), ('Ұ', 'Ұ'), ('Ҳ', 'Ҳ'),
+  ('Ҵ', 'Ҵ'), ('Ҷ', 'Ҷ'), ('Ҹ', 'Ҹ'), ('Һ', 'Һ'), ('Ҽ', 'Ҽ'),
+  ('Ҿ', 'Ҿ'), ('Ӏ', 'Ӂ'), ('Ӄ', 'Ӄ'), ('Ӆ', 'Ӆ'), ('Ӈ', 'Ӈ'),
+  ('Ӊ', 'Ӊ'), ('Ӌ', 'Ӌ'), ('Ӎ', 'Ӎ'), ('Ӑ', 'Ӑ'), ('Ӓ', 'Ӓ'),
+  ('Ӕ', 'Ӕ'), ('Ӗ', 'Ӗ'), ('Ә', 'Ә'), ('Ӛ', 'Ӛ'), ('Ӝ', 'Ӝ'),
+  ('Ӟ', 'Ӟ'), ('Ӡ', 'Ӡ'), ('Ӣ', 'Ӣ'), ('Ӥ', 'Ӥ'), ('Ӧ', 'Ӧ'),
+  ('Ө', 'Ө'), ('Ӫ', 'Ӫ'), ('Ӭ', 'Ӭ'), ('Ӯ', 'Ӯ'), ('Ӱ', 'Ӱ'),
+  ('Ӳ', 'Ӳ'), ('Ӵ', 'Ӵ'), ('Ӷ', 'Ӷ'), ('Ӹ', 'Ӹ'), ('Ӻ', 'Ӻ'),
+  ('Ӽ', 'Ӽ'), ('Ӿ', 'Ӿ'), ('Ԁ', 'Ԁ'), ('Ԃ', 'Ԃ'), ('Ԅ', 'Ԅ'),
+  ('Ԇ', 'Ԇ'), ('Ԉ', 'Ԉ'), ('Ԋ', 'Ԋ'), ('Ԍ', 'Ԍ'), ('Ԏ', 'Ԏ'),
+  ('Ԑ', 'Ԑ'), ('Ԓ', 'Ԓ'), ('Ԕ', 'Ԕ'), ('Ԗ', 'Ԗ'), ('Ԙ', 'Ԙ'),
+  ('Ԛ', 'Ԛ'), ('Ԝ', 'Ԝ'), ('Ԟ', 'Ԟ'), ('Ԡ', 'Ԡ'), ('Ԣ', 'Ԣ'),
+  ('Ԥ', 'Ԥ'), ('Ԧ', 'Ԧ'), ('Ԩ', 'Ԩ'), ('Ԫ', 'Ԫ'), ('Ԭ', 'Ԭ'),
+  ('Ԯ', 'Ԯ'), ('Ա', 'Ֆ'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('Ꭰ', 'Ᏽ'), ('Ḁ', 'Ḁ'), ('Ḃ', 'Ḃ'), ('Ḅ', 'Ḅ'),
+  ('Ḇ', 'Ḇ'), ('Ḉ', 'Ḉ'), ('Ḋ', 'Ḋ'), ('Ḍ', 'Ḍ'),
+  ('Ḏ', 'Ḏ'), ('Ḑ', 'Ḑ'), ('Ḓ', 'Ḓ'), ('Ḕ', 'Ḕ'),
+  ('Ḗ', 'Ḗ'), ('Ḙ', 'Ḙ'), ('Ḛ', 'Ḛ'), ('Ḝ', 'Ḝ'),
+  ('Ḟ', 'Ḟ'), ('Ḡ', 'Ḡ'), ('Ḣ', 'Ḣ'), ('Ḥ', 'Ḥ'),
+  ('Ḧ', 'Ḧ'), ('Ḩ', 'Ḩ'), ('Ḫ', 'Ḫ'), ('Ḭ', 'Ḭ'),
+  ('Ḯ', 'Ḯ'), ('Ḱ', 'Ḱ'), ('Ḳ', 'Ḳ'), ('Ḵ', 'Ḵ'),
+  ('Ḷ', 'Ḷ'), ('Ḹ', 'Ḹ'), ('Ḻ', 'Ḻ'), ('Ḽ', 'Ḽ'),
+  ('Ḿ', 'Ḿ'), ('Ṁ', 'Ṁ'), ('Ṃ', 'Ṃ'), ('Ṅ', 'Ṅ'),
+  ('Ṇ', 'Ṇ'), ('Ṉ', 'Ṉ'), ('Ṋ', 'Ṋ'), ('Ṍ', 'Ṍ'),
+  ('Ṏ', 'Ṏ'), ('Ṑ', 'Ṑ'), ('Ṓ', 'Ṓ'), ('Ṕ', 'Ṕ'),
+  ('Ṗ', 'Ṗ'), ('Ṙ', 'Ṙ'), ('Ṛ', 'Ṛ'), ('Ṝ', 'Ṝ'),
+  ('Ṟ', 'Ṟ'), ('Ṡ', 'Ṡ'), ('Ṣ', 'Ṣ'), ('Ṥ', 'Ṥ'),
+  ('Ṧ', 'Ṧ'), ('Ṩ', 'Ṩ'), ('Ṫ', 'Ṫ'), ('Ṭ', 'Ṭ'),
+  ('Ṯ', 'Ṯ'), ('Ṱ', 'Ṱ'), ('Ṳ', 'Ṳ'), ('Ṵ', 'Ṵ'),
+  ('Ṷ', 'Ṷ'), ('Ṹ', 'Ṹ'), ('Ṻ', 'Ṻ'), ('Ṽ', 'Ṽ'),
+  ('Ṿ', 'Ṿ'), ('Ẁ', 'Ẁ'), ('Ẃ', 'Ẃ'), ('Ẅ', 'Ẅ'),
+  ('Ẇ', 'Ẇ'), ('Ẉ', 'Ẉ'), ('Ẋ', 'Ẋ'), ('Ẍ', 'Ẍ'),
+  ('Ẏ', 'Ẏ'), ('Ẑ', 'Ẑ'), ('Ẓ', 'Ẓ'), ('Ẕ', 'Ẕ'),
+  ('ẞ', 'ẞ'), ('Ạ', 'Ạ'), ('Ả', 'Ả'), ('Ấ', 'Ấ'),
+  ('Ầ', 'Ầ'), ('Ẩ', 'Ẩ'), ('Ẫ', 'Ẫ'), ('Ậ', 'Ậ'),
+  ('Ắ', 'Ắ'), ('Ằ', 'Ằ'), ('Ẳ', 'Ẳ'), ('Ẵ', 'Ẵ'),
+  ('Ặ', 'Ặ'), ('Ẹ', 'Ẹ'), ('Ẻ', 'Ẻ'), ('Ẽ', 'Ẽ'),
+  ('Ế', 'Ế'), ('Ề', 'Ề'), ('Ể', 'Ể'), ('Ễ', 'Ễ'),
+  ('Ệ', 'Ệ'), ('Ỉ', 'Ỉ'), ('Ị', 'Ị'), ('Ọ', 'Ọ'),
+  ('Ỏ', 'Ỏ'), ('Ố', 'Ố'), ('Ồ', 'Ồ'), ('Ổ', 'Ổ'),
+  ('Ỗ', 'Ỗ'), ('Ộ', 'Ộ'), ('Ớ', 'Ớ'), ('Ờ', 'Ờ'),
+  ('Ở', 'Ở'), ('Ỡ', 'Ỡ'), ('Ợ', 'Ợ'), ('Ụ', 'Ụ'),
+  ('Ủ', 'Ủ'), ('Ứ', 'Ứ'), ('Ừ', 'Ừ'), ('Ử', 'Ử'),
+  ('Ữ', 'Ữ'), ('Ự', 'Ự'), ('Ỳ', 'Ỳ'), ('Ỵ', 'Ỵ'),
+  ('Ỷ', 'Ỷ'), ('Ỹ', 'Ỹ'), ('Ỻ', 'Ỻ'), ('Ỽ', 'Ỽ'),
+  ('Ỿ', 'Ỿ'), ('Ἀ', 'Ἇ'), ('Ἐ', 'Ἕ'), ('Ἠ', 'Ἧ'),
+  ('Ἰ', 'Ἷ'), ('Ὀ', 'Ὅ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'Ὗ'), ('Ὠ', 'Ὧ'), ('Ᾰ', 'Ά'),
+  ('Ὲ', 'Ή'), ('Ῐ', 'Ί'), ('Ῠ', 'Ῥ'), ('Ὸ', 'Ώ'),
+  ('ℂ', 'ℂ'), ('ℇ', 'ℇ'), ('ℋ', 'ℍ'), ('ℐ', 'ℒ'),
+  ('ℕ', 'ℕ'), ('ℙ', 'ℝ'), ('ℤ', 'ℤ'), ('Ω', 'Ω'),
+  ('ℨ', 'ℨ'), ('K', 'ℭ'), ('ℰ', 'ℳ'), ('ℾ', 'ℿ'),
+  ('ⅅ', 'ⅅ'), ('Ⅰ', 'Ⅿ'), ('Ↄ', 'Ↄ'), ('Ⓐ', 'Ⓩ'),
+  ('Ⰰ', 'Ⱞ'), ('Ⱡ', 'Ⱡ'), ('Ɫ', 'Ɽ'), ('Ⱨ', 'Ⱨ'),
+  ('Ⱪ', 'Ⱪ'), ('Ⱬ', 'Ⱬ'), ('Ɑ', 'Ɒ'), ('Ⱳ', 'Ⱳ'),
+  ('Ⱶ', 'Ⱶ'), ('Ȿ', 'Ⲁ'), ('Ⲃ', 'Ⲃ'), ('Ⲅ', 'Ⲅ'),
+  ('Ⲇ', 'Ⲇ'), ('Ⲉ', 'Ⲉ'), ('Ⲋ', 'Ⲋ'), ('Ⲍ', 'Ⲍ'),
+  ('Ⲏ', 'Ⲏ'), ('Ⲑ', 'Ⲑ'), ('Ⲓ', 'Ⲓ'), ('Ⲕ', 'Ⲕ'),
+  ('Ⲗ', 'Ⲗ'), ('Ⲙ', 'Ⲙ'), ('Ⲛ', 'Ⲛ'), ('Ⲝ', 'Ⲝ'),
+  ('Ⲟ', 'Ⲟ'), ('Ⲡ', 'Ⲡ'), ('Ⲣ', 'Ⲣ'), ('Ⲥ', 'Ⲥ'),
+  ('Ⲧ', 'Ⲧ'), ('Ⲩ', 'Ⲩ'), ('Ⲫ', 'Ⲫ'), ('Ⲭ', 'Ⲭ'),
+  ('Ⲯ', 'Ⲯ'), ('Ⲱ', 'Ⲱ'), ('Ⲳ', 'Ⲳ'), ('Ⲵ', 'Ⲵ'),
+  ('Ⲷ', 'Ⲷ'), ('Ⲹ', 'Ⲹ'), ('Ⲻ', 'Ⲻ'), ('Ⲽ', 'Ⲽ'),
+  ('Ⲿ', 'Ⲿ'), ('Ⳁ', 'Ⳁ'), ('Ⳃ', 'Ⳃ'), ('Ⳅ', 'Ⳅ'),
+  ('Ⳇ', 'Ⳇ'), ('Ⳉ', 'Ⳉ'), ('Ⳋ', 'Ⳋ'), ('Ⳍ', 'Ⳍ'),
+  ('Ⳏ', 'Ⳏ'), ('Ⳑ', 'Ⳑ'), ('Ⳓ', 'Ⳓ'), ('Ⳕ', 'Ⳕ'),
+  ('Ⳗ', 'Ⳗ'), ('Ⳙ', 'Ⳙ'), ('Ⳛ', 'Ⳛ'), ('Ⳝ', 'Ⳝ'),
+  ('Ⳟ', 'Ⳟ'), ('Ⳡ', 'Ⳡ'), ('Ⳣ', 'Ⳣ'), ('Ⳬ', 'Ⳬ'),
+  ('Ⳮ', 'Ⳮ'), ('Ⳳ', 'Ⳳ'), ('Ꙁ', 'Ꙁ'), ('Ꙃ', 'Ꙃ'),
+  ('Ꙅ', 'Ꙅ'), ('Ꙇ', 'Ꙇ'), ('Ꙉ', 'Ꙉ'), ('Ꙋ', 'Ꙋ'),
+  ('Ꙍ', 'Ꙍ'), ('Ꙏ', 'Ꙏ'), ('Ꙑ', 'Ꙑ'), ('Ꙓ', 'Ꙓ'),
+  ('Ꙕ', 'Ꙕ'), ('Ꙗ', 'Ꙗ'), ('Ꙙ', 'Ꙙ'), ('Ꙛ', 'Ꙛ'),
+  ('Ꙝ', 'Ꙝ'), ('Ꙟ', 'Ꙟ'), ('Ꙡ', 'Ꙡ'), ('Ꙣ', 'Ꙣ'),
+  ('Ꙥ', 'Ꙥ'), ('Ꙧ', 'Ꙧ'), ('Ꙩ', 'Ꙩ'), ('Ꙫ', 'Ꙫ'),
+  ('Ꙭ', 'Ꙭ'), ('Ꚁ', 'Ꚁ'), ('Ꚃ', 'Ꚃ'), ('Ꚅ', 'Ꚅ'),
+  ('Ꚇ', 'Ꚇ'), ('Ꚉ', 'Ꚉ'), ('Ꚋ', 'Ꚋ'), ('Ꚍ', 'Ꚍ'),
+  ('Ꚏ', 'Ꚏ'), ('Ꚑ', 'Ꚑ'), ('Ꚓ', 'Ꚓ'), ('Ꚕ', 'Ꚕ'),
+  ('Ꚗ', 'Ꚗ'), ('Ꚙ', 'Ꚙ'), ('Ꚛ', 'Ꚛ'), ('Ꜣ', 'Ꜣ'),
+  ('Ꜥ', 'Ꜥ'), ('Ꜧ', 'Ꜧ'), ('Ꜩ', 'Ꜩ'), ('Ꜫ', 'Ꜫ'),
+  ('Ꜭ', 'Ꜭ'), ('Ꜯ', 'Ꜯ'), ('Ꜳ', 'Ꜳ'), ('Ꜵ', 'Ꜵ'),
+  ('Ꜷ', 'Ꜷ'), ('Ꜹ', 'Ꜹ'), ('Ꜻ', 'Ꜻ'), ('Ꜽ', 'Ꜽ'),
+  ('Ꜿ', 'Ꜿ'), ('Ꝁ', 'Ꝁ'), ('Ꝃ', 'Ꝃ'), ('Ꝅ', 'Ꝅ'),
+  ('Ꝇ', 'Ꝇ'), ('Ꝉ', 'Ꝉ'), ('Ꝋ', 'Ꝋ'), ('Ꝍ', 'Ꝍ'),
+  ('Ꝏ', 'Ꝏ'), ('Ꝑ', 'Ꝑ'), ('Ꝓ', 'Ꝓ'), ('Ꝕ', 'Ꝕ'),
+  ('Ꝗ', 'Ꝗ'), ('Ꝙ', 'Ꝙ'), ('Ꝛ', 'Ꝛ'), ('Ꝝ', 'Ꝝ'),
+  ('Ꝟ', 'Ꝟ'), ('Ꝡ', 'Ꝡ'), ('Ꝣ', 'Ꝣ'), ('Ꝥ', 'Ꝥ'),
+  ('Ꝧ', 'Ꝧ'), ('Ꝩ', 'Ꝩ'), ('Ꝫ', 'Ꝫ'), ('Ꝭ', 'Ꝭ'),
+  ('Ꝯ', 'Ꝯ'), ('Ꝺ', 'Ꝺ'), ('Ꝼ', 'Ꝼ'), ('Ᵹ', 'Ꝿ'),
+  ('Ꞁ', 'Ꞁ'), ('Ꞃ', 'Ꞃ'), ('Ꞅ', 'Ꞅ'), ('Ꞇ', 'Ꞇ'),
+  ('Ꞌ', 'Ꞌ'), ('Ɥ', 'Ɥ'), ('Ꞑ', 'Ꞑ'), ('Ꞓ', 'Ꞓ'),
+  ('Ꞗ', 'Ꞗ'), ('Ꞙ', 'Ꞙ'), ('Ꞛ', 'Ꞛ'), ('Ꞝ', 'Ꞝ'),
+  ('Ꞟ', 'Ꞟ'), ('Ꞡ', 'Ꞡ'), ('Ꞣ', 'Ꞣ'), ('Ꞥ', 'Ꞥ'),
+  ('Ꞧ', 'Ꞧ'), ('Ꞩ', 'Ꞩ'), ('Ɦ', 'Ɪ'), ('Ʞ', 'Ꞵ'),
+  ('Ꞷ', 'Ꞷ'), ('Ａ', 'Ｚ'), ('𐐀', '𐐧'), ('𐒰', '𐓓'),
+  ('𐲀', '𐲲'), ('𑢠', '𑢿'), ('𝐀', '𝐙'), ('𝐴', '𝑍'),
+  ('𝑨', '𝒁'), ('𝒜', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'),
+  ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒵'), ('𝓐', '𝓩'),
+  ('𝔄', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔸', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕬', '𝖅'), ('𝖠', '𝖹'), ('𝗔', '𝗭'),
+  ('𝘈', '𝘡'), ('𝘼', '𝙕'), ('𝙰', '𝚉'), ('𝚨', '𝛀'),
+  ('𝛢', '𝛺'), ('𝜜', '𝜴'), ('𝝖', '𝝮'), ('𝞐', '𝞨'),
+  ('𝟊', '𝟊'), ('𞤀', '𞤡'), ('🄰', '🅉'), ('🅐', '🅩'),
+  ('🅰', '🆉'),
+];
+
+pub const VARIATION_SELECTOR: &'static [(char, char)] = &[
+  ('᠋', '᠍'), ('︀', '️'), ('󠄀', '󠇯'),
+];
+
+pub const WHITE_SPACE: &'static [(char, char)] = &[
+  ('\t', '\r'), (' ', ' '), ('\u{85}', '\u{85}'), ('\u{a0}', '\u{a0}'),
+  ('\u{1680}', '\u{1680}'), ('\u{2000}', '\u{200a}'),
+  ('\u{2028}', '\u{2029}'), ('\u{202f}', '\u{202f}'),
+  ('\u{205f}', '\u{205f}'), ('\u{3000}', '\u{3000}'),
+];
+
+pub const XID_CONTINUE: &'static [(char, char)] = &[
+  ('0', '9'), ('A', 'Z'), ('_', '_'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'),
+  ('·', '·'), ('º', 'º'), ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'),
+  ('ˆ', 'ˑ'), ('ˠ', 'ˤ'), ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('̀', 'ʹ'),
+  ('Ͷ', 'ͷ'), ('ͻ', 'ͽ'), ('Ϳ', 'Ϳ'), ('Ά', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('҃', '҇'), ('Ҋ', 'ԯ'),
+  ('Ա', 'Ֆ'), ('ՙ', 'ՙ'), ('ա', 'և'), ('֑', 'ֽ'), ('ֿ', 'ֿ'),
+  ('ׁ', 'ׂ'), ('ׄ', 'ׅ'), ('ׇ', 'ׇ'), ('א', 'ת'), ('װ', 'ײ'),
+  ('ؐ', 'ؚ'), ('ؠ', '٩'), ('ٮ', 'ۓ'), ('ە', 'ۜ'), ('۟', 'ۨ'),
+  ('۪', 'ۼ'), ('ۿ', 'ۿ'), ('ܐ', '݊'), ('ݍ', 'ޱ'), ('߀', 'ߵ'),
+  ('ߺ', 'ߺ'), ('ࠀ', '࠭'), ('ࡀ', '࡛'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'), ('ࣣ', 'ॣ'),
+  ('०', '९'), ('ॱ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'),
+  ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'),
+  ('়', 'ৄ'), ('ে', 'ৈ'), ('ো', 'ৎ'), ('ৗ', 'ৗ'),
+  ('ড়', 'ঢ়'), ('য়', 'ৣ'), ('০', 'ৱ'), ('ৼ', 'ৼ'),
+  ('ਁ', 'ਃ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'),
+  ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'),
+  ('਼', '਼'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'),
+  ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'),
+  ('ઁ', 'ઃ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'),
+  ('ે', 'ૉ'), ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૣ'),
+  ('૦', '૯'), ('ૹ', '૿'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('଼', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'),
+  ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('୦', '୯'),
+  ('ୱ', 'ୱ'), ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'),
+  ('ெ', 'ை'), ('ொ', '்'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'),
+  ('௦', '௯'), ('ఀ', 'ః'), ('అ', 'ఌ'), ('ఎ', 'ఐ'),
+  ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ౄ'), ('ె', 'ై'),
+  ('ొ', '్'), ('ౕ', 'ౖ'), ('ౘ', 'ౚ'), ('ౠ', 'ౣ'),
+  ('౦', '౯'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'),
+  ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('಼', 'ೄ'),
+  ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೞ', 'ೞ'),
+  ('ೠ', 'ೣ'), ('೦', '೯'), ('ೱ', 'ೲ'), ('ഀ', 'ഃ'),
+  ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ൄ'), ('െ', 'ൈ'),
+  ('ൊ', 'ൎ'), ('ൔ', 'ൗ'), ('ൟ', 'ൣ'), ('൦', '൯'),
+  ('ൺ', 'ൿ'), ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'),
+  ('ඳ', 'ර'), ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'),
+  ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'),
+  ('ෲ', 'ෳ'), ('ก', 'ฺ'), ('เ', '๎'), ('๐', '๙'),
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('༘', '༙'),
+  ('༠', '༩'), ('༵', '༵'), ('༷', '༷'), ('༹', '༹'),
+  ('༾', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', '྄'), ('྆', 'ྗ'),
+  ('ྙ', 'ྼ'), ('࿆', '࿆'), ('က', '၉'), ('ၐ', 'ႝ'),
+  ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'),
+  ('ჼ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'),
+  ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'),
+  ('፝', '፟'), ('፩', '፱'), ('ᎀ', 'ᎏ'), ('Ꭰ', 'Ᏽ'),
+  ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'), ('ᚁ', 'ᚚ'),
+  ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'),
+  ('ᜠ', '᜴'), ('ᝀ', 'ᝓ'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'),
+  ('ᝲ', 'ᝳ'), ('ក', '៓'), ('ៗ', 'ៗ'), ('ៜ', '៝'),
+  ('០', '៩'), ('᠋', '᠍'), ('᠐', '᠙'), ('ᠠ', 'ᡷ'),
+  ('ᢀ', 'ᢪ'), ('ᢰ', 'ᣵ'), ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'),
+  ('ᤰ', '᤻'), ('᥆', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'),
+  ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('ᨀ', 'ᨛ'), ('ᨠ', 'ᩞ'),
+  ('᩠', '᩼'), ('᩿', '᪉'), ('᪐', '᪙'), ('ᪧ', 'ᪧ'),
+  ('᪰', '᪽'), ('ᬀ', 'ᭋ'), ('᭐', '᭙'), ('᭫', '᭳'),
+  ('ᮀ', '᯳'), ('ᰀ', '᰷'), ('᱀', '᱉'), ('ᱍ', 'ᱽ'),
+  ('ᲀ', 'ᲈ'), ('᳐', '᳒'), ('᳔', '᳹'), ('ᴀ', '᷹'),
+  ('᷻', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'),
+  ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'),
+  ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'), ('ι', 'ι'),
+  ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'), ('ῖ', 'Ί'),
+  ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'), ('‿', '⁀'),
+  ('⁔', '⁔'), ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'),
+  ('⃐', '⃜'), ('⃡', '⃡'), ('⃥', '⃰'), ('ℂ', 'ℂ'),
+  ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'),
+  ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳳ'),
+  ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'), ('ⴰ', 'ⵧ'),
+  ('ⵯ', 'ⵯ'), ('⵿', 'ⶖ'), ('ⶠ', 'ⶦ'), ('ⶨ', 'ⶮ'),
+  ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'), ('ⷈ', 'ⷎ'),
+  ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('ⷠ', 'ⷿ'), ('々', '〇'),
+  ('〡', '〯'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('゙', '゚'), ('ゝ', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'),
+  ('ㄅ', 'ㄮ'), ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'),
+  ('㐀', '䶵'), ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'),
+  ('ꔀ', 'ꘌ'), ('ꘐ', 'ꘫ'), ('Ꙁ', '꙯'), ('ꙴ', '꙽'),
+  ('ꙿ', '꛱'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠧ'), ('ꡀ', 'ꡳ'), ('ꢀ', 'ꣅ'),
+  ('꣐', '꣙'), ('꣠', 'ꣷ'), ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'),
+  ('꤀', '꤭'), ('ꤰ', '꥓'), ('ꥠ', 'ꥼ'), ('ꦀ', '꧀'),
+  ('ꧏ', '꧙'), ('ꧠ', 'ꧾ'), ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'),
+  ('꩐', '꩙'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꫂ'), ('ꫛ', 'ꫝ'),
+  ('ꫠ', 'ꫯ'), ('ꫲ', '꫶'), ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'),
+  ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'),
+  ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯪ'), ('꯬', '꯭'), ('꯰', '꯹'),
+  ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'),
+  ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'ﬨ'),
+  ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'), ('נּ', 'סּ'),
+  ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﱝ'), ('ﱤ', 'ﴽ'),
+  ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷹ'), ('︀', '️'),
+  ('︠', '︯'), ('︳', '︴'), ('﹍', '﹏'), ('ﹱ', 'ﹱ'),
+  ('ﹳ', 'ﹳ'), ('ﹷ', 'ﹷ'), ('ﹹ', 'ﹹ'), ('ﹻ', 'ﹻ'),
+  ('ﹽ', 'ﹽ'), ('ﹿ', 'ﻼ'), ('０', '９'), ('Ａ', 'Ｚ'),
+  ('＿', '＿'), ('ａ', 'ｚ'), ('ｦ', 'ﾾ'), ('ￂ', 'ￇ'),
+  ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('𐀀', '𐀋'),
+  ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'),
+  ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'), ('𐇽', '𐇽'),
+  ('𐊀', '𐊜'), ('𐊠', '𐋐'), ('𐋠', '𐋠'), ('𐌀', '𐌟'),
+  ('𐌭', '𐍊'), ('𐍐', '𐍺'), ('𐎀', '𐎝'), ('𐎠', '𐏃'),
+  ('𐏈', '𐏏'), ('𐏑', '𐏕'), ('𐐀', '𐒝'), ('𐒠', '𐒩'),
+  ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐔀', '𐔧'), ('𐔰', '𐕣'),
+  ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'), ('𐠀', '𐠅'),
+  ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'),
+  ('𐠿', '𐡕'), ('𐡠', '𐡶'), ('𐢀', '𐢞'), ('𐣠', '𐣲'),
+  ('𐣴', '𐣵'), ('𐤀', '𐤕'), ('𐤠', '𐤹'), ('𐦀', '𐦷'),
+  ('𐦾', '𐦿'), ('𐨀', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨓'),
+  ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐨸', '𐨺'), ('𐨿', '𐨿'),
+  ('𐩠', '𐩼'), ('𐪀', '𐪜'), ('𐫀', '𐫇'), ('𐫉', '𐫦'),
+  ('𐬀', '𐬵'), ('𐭀', '𐭕'), ('𐭠', '𐭲'), ('𐮀', '𐮑'),
+  ('𐰀', '𐱈'), ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𑀀', '𑁆'),
+  ('𑁦', '𑁯'), ('𑁿', '𑂺'), ('𑃐', '𑃨'), ('𑃰', '𑃹'),
+  ('𑄀', '𑄴'), ('𑄶', '𑄿'), ('𑅐', '𑅳'), ('𑅶', '𑅶'),
+  ('𑆀', '𑇄'), ('𑇊', '𑇌'), ('𑇐', '𑇚'), ('𑇜', '𑇜'),
+  ('𑈀', '𑈑'), ('𑈓', '𑈷'), ('𑈾', '𑈾'), ('𑊀', '𑊆'),
+  ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'), ('𑊟', '𑊨'),
+  ('𑊰', '𑋪'), ('𑋰', '𑋹'), ('𑌀', '𑌃'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌼', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍍'),
+  ('𑍐', '𑍐'), ('𑍗', '𑍗'), ('𑍝', '𑍣'), ('𑍦', '𑍬'),
+  ('𑍰', '𑍴'), ('𑐀', '𑑊'), ('𑑐', '𑑙'), ('𑒀', '𑓅'),
+  ('𑓇', '𑓇'), ('𑓐', '𑓙'), ('𑖀', '𑖵'), ('𑖸', '𑗀'),
+  ('𑗘', '𑗝'), ('𑘀', '𑙀'), ('𑙄', '𑙄'), ('𑙐', '𑙙'),
+  ('𑚀', '𑚷'), ('𑛀', '𑛉'), ('𑜀', '𑜙'), ('𑜝', '𑜫'),
+  ('𑜰', '𑜹'), ('𑢠', '𑣩'), ('𑣿', '𑣿'), ('𑨀', '𑨾'),
+  ('𑩇', '𑩇'), ('𑩐', '𑪃'), ('𑪆', '𑪙'), ('𑫀', '𑫸'),
+  ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱀'), ('𑱐', '𑱙'),
+  ('𑱲', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'), ('𑴀', '𑴆'),
+  ('𑴈', '𑴉'), ('𑴋', '𑴶'), ('𑴺', '𑴺'), ('𑴼', '𑴽'),
+  ('𑴿', '𑵇'), ('𑵐', '𑵙'), ('𒀀', '𒎙'), ('𒐀', '𒑮'),
+  ('𒒀', '𒕃'), ('𓀀', '𓐮'), ('𔐀', '𔙆'), ('𖠀', '𖨸'),
+  ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖫐', '𖫭'), ('𖫰', '𖫴'),
+  ('𖬀', '𖬶'), ('𖭀', '𖭃'), ('𖭐', '𖭙'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'), ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾏', '𖾟'),
+  ('𖿠', '𖿡'), ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'),
+  ('𛅰', '𛋻'), ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'),
+  ('𛲐', '𛲙'), ('𛲝', '𛲞'), ('𝅥', '𝅩'), ('𝅭', '𝅲'),
+  ('𝅻', '𝆂'), ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('𝉂', '𝉄'),
+  ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'),
+  ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'),
+  ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'),
+  ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'),
+  ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝛀'),
+  ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'), ('𝜖', '𝜴'),
+  ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'), ('𝞊', '𝞨'),
+  ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𝟎', '𝟿'), ('𝨀', '𝨶'),
+  ('𝨻', '𝩬'), ('𝩵', '𝩵'), ('𝪄', '𝪄'), ('𝪛', '𝪟'),
+  ('𝪡', '𝪯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'),
+  ('𞀣', '𞀤'), ('𞀦', '𞀪'), ('𞠀', '𞣄'), ('𞣐', '𞣖'),
+  ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞸀', '𞸃'), ('𞸅', '𞸟'),
+  ('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'),
+  ('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'),
+  ('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'),
+  ('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'),
+  ('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'),
+  ('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'),
+  ('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'),
+  ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𠀀', '𪛖'),
+  ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'),
+  ('丽', '𪘀'), ('󠄀', '󠇯'),
+];
+
+pub const XID_START: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('µ', 'µ'), ('º', 'º'),
+  ('À', 'Ö'), ('Ø', 'ö'), ('ø', 'ˁ'), ('ˆ', 'ˑ'), ('ˠ', 'ˤ'),
+  ('ˬ', 'ˬ'), ('ˮ', 'ˮ'), ('Ͱ', 'ʹ'), ('Ͷ', 'ͷ'), ('ͻ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'),
+  ('Σ', 'ϵ'), ('Ϸ', 'ҁ'), ('Ҋ', 'ԯ'), ('Ա', 'Ֆ'), ('ՙ', 'ՙ'),
+  ('ա', 'և'), ('א', 'ת'), ('װ', 'ײ'), ('ؠ', 'ي'), ('ٮ', 'ٯ'),
+  ('ٱ', 'ۓ'), ('ە', 'ە'), ('ۥ', 'ۦ'), ('ۮ', 'ۯ'), ('ۺ', 'ۼ'),
+  ('ۿ', 'ۿ'), ('ܐ', 'ܐ'), ('ܒ', 'ܯ'), ('ݍ', 'ޥ'), ('ޱ', 'ޱ'),
+  ('ߊ', 'ߪ'), ('ߴ', 'ߵ'), ('ߺ', 'ߺ'), ('ࠀ', 'ࠕ'), ('ࠚ', 'ࠚ'),
+  ('ࠤ', 'ࠤ'), ('ࠨ', 'ࠨ'), ('ࡀ', 'ࡘ'), ('ࡠ', 'ࡪ'),
+  ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ऄ', 'ह'), ('ऽ', 'ऽ'),
+  ('ॐ', 'ॐ'), ('क़', 'ॡ'), ('ॱ', 'ঀ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('ঽ', 'ঽ'), ('ৎ', 'ৎ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৡ'), ('ৰ', 'ৱ'), ('ৼ', 'ৼ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'),
+  ('ੲ', 'ੴ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('ઽ', 'ઽ'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૡ'), ('ૹ', 'ૹ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('ଽ', 'ଽ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୡ'),
+  ('ୱ', 'ୱ'), ('ஃ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'),
+  ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'),
+  ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'), ('ௐ', 'ௐ'),
+  ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'),
+  ('ఽ', 'ఽ'), ('ౘ', 'ౚ'), ('ౠ', 'ౡ'), ('ಀ', 'ಀ'),
+  ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'),
+  ('ವ', 'ಹ'), ('ಽ', 'ಽ'), ('ೞ', 'ೞ'), ('ೠ', 'ೡ'),
+  ('ೱ', 'ೲ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ഺ'),
+  ('ഽ', 'ഽ'), ('ൎ', 'ൎ'), ('ൔ', 'ൖ'), ('ൟ', 'ൡ'),
+  ('ൺ', 'ൿ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('ก', 'ะ'), ('า', 'า'),
+  ('เ', 'ๆ'), ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'),
+  ('ຊ', 'ຊ'), ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'),
+  ('ມ', 'ຣ'), ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'),
+  ('ອ', 'ະ'), ('າ', 'າ'), ('ຽ', 'ຽ'), ('ເ', 'ໄ'),
+  ('ໆ', 'ໆ'), ('ໜ', 'ໟ'), ('ༀ', 'ༀ'), ('ཀ', 'ཇ'),
+  ('ཉ', 'ཬ'), ('ྈ', 'ྌ'), ('က', 'ဪ'), ('ဿ', 'ဿ'),
+  ('ၐ', 'ၕ'), ('ၚ', 'ၝ'), ('ၡ', 'ၡ'), ('ၥ', 'ၦ'),
+  ('ၮ', 'ၰ'), ('ၵ', 'ႁ'), ('ႎ', 'ႎ'), ('Ⴀ', 'Ⴥ'),
+  ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'), ('ჼ', 'ቈ'),
+  ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'), ('ቚ', 'ቝ'),
+  ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'), ('ኲ', 'ኵ'),
+  ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'), ('ወ', 'ዖ'),
+  ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'), ('ᎀ', 'ᎏ'),
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ᐁ', 'ᙬ'), ('ᙯ', 'ᙿ'),
+  ('ᚁ', 'ᚚ'), ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'), ('ᜀ', 'ᜌ'),
+  ('ᜎ', 'ᜑ'), ('ᜠ', 'ᜱ'), ('ᝀ', 'ᝑ'), ('ᝠ', 'ᝬ'),
+  ('ᝮ', 'ᝰ'), ('ក', 'ឳ'), ('ៗ', 'ៗ'), ('ៜ', 'ៜ'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢨ'), ('ᢪ', 'ᢪ'), ('ᢰ', 'ᣵ'),
+  ('ᤀ', 'ᤞ'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'), ('ᦀ', 'ᦫ'),
+  ('ᦰ', 'ᧉ'), ('ᨀ', 'ᨖ'), ('ᨠ', 'ᩔ'), ('ᪧ', 'ᪧ'),
+  ('ᬅ', 'ᬳ'), ('ᭅ', 'ᭋ'), ('ᮃ', 'ᮠ'), ('ᮮ', 'ᮯ'),
+  ('ᮺ', 'ᯥ'), ('ᰀ', 'ᰣ'), ('ᱍ', 'ᱏ'), ('ᱚ', 'ᱽ'),
+  ('ᲀ', 'ᲈ'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳱ'), ('ᳵ', 'ᳶ'),
+  ('ᴀ', 'ᶿ'), ('Ḁ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'),
+  ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ᾼ'),
+  ('ι', 'ι'), ('ῂ', 'ῄ'), ('ῆ', 'ῌ'), ('ῐ', 'ΐ'),
+  ('ῖ', 'Ί'), ('ῠ', 'Ῥ'), ('ῲ', 'ῴ'), ('ῶ', 'ῼ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('ℂ', 'ℂ'),
+  ('ℇ', 'ℇ'), ('ℊ', 'ℓ'), ('ℕ', 'ℕ'), ('℘', 'ℝ'),
+  ('ℤ', 'ℤ'), ('Ω', 'Ω'), ('ℨ', 'ℨ'), ('K', 'ℹ'),
+  ('ℼ', 'ℿ'), ('ⅅ', 'ⅉ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('Ⱡ', 'ⳤ'), ('Ⳬ', 'ⳮ'),
+  ('Ⳳ', 'ⳳ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+  ('ⴰ', 'ⵧ'), ('ⵯ', 'ⵯ'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('々', '〇'),
+  ('〡', '〩'), ('〱', '〵'), ('〸', '〼'), ('ぁ', 'ゖ'),
+  ('ゝ', 'ゟ'), ('ァ', 'ヺ'), ('ー', 'ヿ'), ('ㄅ', 'ㄮ'),
+  ('ㄱ', 'ㆎ'), ('ㆠ', 'ㆺ'), ('ㇰ', 'ㇿ'), ('㐀', '䶵'),
+  ('一', '鿪'), ('ꀀ', 'ꒌ'), ('ꓐ', 'ꓽ'), ('ꔀ', 'ꘌ'),
+  ('ꘐ', 'ꘟ'), ('ꘪ', 'ꘫ'), ('Ꙁ', 'ꙮ'), ('ꙿ', 'ꚝ'),
+  ('ꚠ', 'ꛯ'), ('ꜗ', 'ꜟ'), ('Ꜣ', 'ꞈ'), ('Ꞌ', 'Ɪ'),
+  ('Ʞ', 'ꞷ'), ('ꟷ', 'ꠁ'), ('ꠃ', 'ꠅ'), ('ꠇ', 'ꠊ'),
+  ('ꠌ', 'ꠢ'), ('ꡀ', 'ꡳ'), ('ꢂ', 'ꢳ'), ('ꣲ', 'ꣷ'),
+  ('ꣻ', 'ꣻ'), ('ꣽ', 'ꣽ'), ('ꤊ', 'ꤥ'), ('ꤰ', 'ꥆ'),
+  ('ꥠ', 'ꥼ'), ('ꦄ', 'ꦲ'), ('ꧏ', 'ꧏ'), ('ꧠ', 'ꧤ'),
+  ('ꧦ', 'ꧯ'), ('ꧺ', 'ꧾ'), ('ꨀ', 'ꨨ'), ('ꩀ', 'ꩂ'),
+  ('ꩄ', 'ꩋ'), ('ꩠ', 'ꩶ'), ('ꩺ', 'ꩺ'), ('ꩾ', 'ꪯ'),
+  ('ꪱ', 'ꪱ'), ('ꪵ', 'ꪶ'), ('ꪹ', 'ꪽ'), ('ꫀ', 'ꫀ'),
+  ('ꫂ', 'ꫂ'), ('ꫛ', 'ꫝ'), ('ꫠ', 'ꫪ'), ('ꫲ', 'ꫴ'),
+  ('ꬁ', 'ꬆ'), ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'),
+  ('ꬨ', 'ꬮ'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭥ'), ('ꭰ', 'ꯢ'),
+  ('가', '힣'), ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('豈', '舘'),
+  ('並', '龎'), ('ﬀ', 'ﬆ'), ('ﬓ', 'ﬗ'), ('יִ', 'יִ'),
+  ('ײַ', 'ﬨ'), ('שׁ', 'זּ'), ('טּ', 'לּ'), ('מּ', 'מּ'),
+  ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﮱ'), ('ﯓ', 'ﱝ'),
+  ('ﱤ', 'ﴽ'), ('ﵐ', 'ﶏ'), ('ﶒ', 'ﷇ'), ('ﷰ', 'ﷹ'),
+  ('ﹱ', 'ﹱ'), ('ﹳ', 'ﹳ'), ('ﹷ', 'ﹷ'), ('ﹹ', 'ﹹ'),
+  ('ﹻ', 'ﹻ'), ('ﹽ', 'ﹽ'), ('ﹿ', 'ﻼ'), ('Ａ', 'Ｚ'),
+  ('ａ', 'ｚ'), ('ｦ', 'ﾝ'), ('ﾠ', 'ﾾ'), ('ￂ', 'ￇ'),
+  ('ￊ', 'ￏ'), ('ￒ', 'ￗ'), ('ￚ', 'ￜ'), ('𐀀', '𐀋'),
+  ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'), ('𐀿', '𐁍'),
+  ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐅀', '𐅴'), ('𐊀', '𐊜'),
+  ('𐊠', '𐋐'), ('𐌀', '𐌟'), ('𐌭', '𐍊'), ('𐍐', '𐍵'),
+  ('𐎀', '𐎝'), ('𐎠', '𐏃'), ('𐏈', '𐏏'), ('𐏑', '𐏕'),
+  ('𐐀', '𐒝'), ('𐒰', '𐓓'), ('𐓘', '𐓻'), ('𐔀', '𐔧'),
+  ('𐔰', '𐕣'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'),
+  ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'),
+  ('𐠼', '𐠼'), ('𐠿', '𐡕'), ('𐡠', '𐡶'), ('𐢀', '𐢞'),
+  ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐤀', '𐤕'), ('𐤠', '𐤹'),
+  ('𐦀', '𐦷'), ('𐦾', '𐦿'), ('𐨀', '𐨀'), ('𐨐', '𐨓'),
+  ('𐨕', '𐨗'), ('𐨙', '𐨳'), ('𐩠', '𐩼'), ('𐪀', '𐪜'),
+  ('𐫀', '𐫇'), ('𐫉', '𐫤'), ('𐬀', '𐬵'), ('𐭀', '𐭕'),
+  ('𐭠', '𐭲'), ('𐮀', '𐮑'), ('𐰀', '𐱈'), ('𐲀', '𐲲'),
+  ('𐳀', '𐳲'), ('𑀃', '𑀷'), ('𑂃', '𑂯'), ('𑃐', '𑃨'),
+  ('𑄃', '𑄦'), ('𑅐', '𑅲'), ('𑅶', '𑅶'), ('𑆃', '𑆲'),
+  ('𑇁', '𑇄'), ('𑇚', '𑇚'), ('𑇜', '𑇜'), ('𑈀', '𑈑'),
+  ('𑈓', '𑈫'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊨'), ('𑊰', '𑋞'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌽', '𑌽'), ('𑍐', '𑍐'), ('𑍝', '𑍡'),
+  ('𑐀', '𑐴'), ('𑑇', '𑑊'), ('𑒀', '𑒯'), ('𑓄', '𑓅'),
+  ('𑓇', '𑓇'), ('𑖀', '𑖮'), ('𑗘', '𑗛'), ('𑘀', '𑘯'),
+  ('𑙄', '𑙄'), ('𑚀', '𑚪'), ('𑜀', '𑜙'), ('𑢠', '𑣟'),
+  ('𑣿', '𑣿'), ('𑨀', '𑨀'), ('𑨋', '𑨲'), ('𑨺', '𑨺'),
+  ('𑩐', '𑩐'), ('𑩜', '𑪃'), ('𑪆', '𑪉'), ('𑫀', '𑫸'),
+  ('𑰀', '𑰈'), ('𑰊', '𑰮'), ('𑱀', '𑱀'), ('𑱲', '𑲏'),
+  ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴰'), ('𑵆', '𑵆'),
+  ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒒀', '𒕃'), ('𓀀', '𓐮'),
+  ('𔐀', '𔙆'), ('𖠀', '𖨸'), ('𖩀', '𖩞'), ('𖫐', '𖫭'),
+  ('𖬀', '𖬯'), ('𖭀', '𖭃'), ('𖭣', '𖭷'), ('𖭽', '𖮏'),
+  ('𖼀', '𖽄'), ('𖽐', '𖽐'), ('𖾓', '𖾟'), ('𖿠', '𖿡'),
+  ('𗀀', '𘟬'), ('𘠀', '𘫲'), ('𛀀', '𛄞'), ('𛅰', '𛋻'),
+  ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'),
+  ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'),
+  ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'),
+  ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'),
+  ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'),
+  ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝛀'),
+  ('𝛂', '𝛚'), ('𝛜', '𝛺'), ('𝛼', '𝜔'), ('𝜖', '𝜴'),
+  ('𝜶', '𝝎'), ('𝝐', '𝝮'), ('𝝰', '𝞈'), ('𝞊', '𝞨'),
+  ('𝞪', '𝟂'), ('𝟄', '𝟋'), ('𞠀', '𞣄'), ('𞤀', '𞥃'),
+  ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'), ('𞸤', '𞸤'),
+  ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'), ('𞸹', '𞸹'),
+  ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'), ('𞹉', '𞹉'),
+  ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'), ('𞹔', '𞹔'),
+  ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'), ('𞹝', '𞹝'),
+  ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'), ('𞹧', '𞹪'),
+  ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'), ('𞹾', '𞹾'),
+  ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'), ('𞺥', '𞺩'),
+  ('𞺫', '𞺻'), ('𠀀', '𪛖'), ('𪜀', '𫜴'), ('𫝀', '𫠝'),
+  ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/property_names.rs
@@ -0,0 +1,145 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-names tmp/ucd-10.0.0/
+//
+// ucd-generate is available on crates.io.
+
+pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
+  ("age", "Age"), ("ahex", "ASCII_Hex_Digit"), ("alpha", "Alphabetic"),
+  ("alphabetic", "Alphabetic"), ("asciihexdigit", "ASCII_Hex_Digit"),
+  ("bc", "Bidi_Class"), ("bidic", "Bidi_Control"),
+  ("bidiclass", "Bidi_Class"), ("bidicontrol", "Bidi_Control"),
+  ("bidim", "Bidi_Mirrored"), ("bidimirrored", "Bidi_Mirrored"),
+  ("bidimirroringglyph", "Bidi_Mirroring_Glyph"),
+  ("bidipairedbracket", "Bidi_Paired_Bracket"),
+  ("bidipairedbrackettype", "Bidi_Paired_Bracket_Type"), ("blk", "Block"),
+  ("block", "Block"), ("bmg", "Bidi_Mirroring_Glyph"),
+  ("bpb", "Bidi_Paired_Bracket"), ("bpt", "Bidi_Paired_Bracket_Type"),
+  ("canonicalcombiningclass", "Canonical_Combining_Class"),
+  ("cased", "Cased"), ("casefolding", "Case_Folding"),
+  ("caseignorable", "Case_Ignorable"), ("ccc", "Canonical_Combining_Class"),
+  ("ce", "Composition_Exclusion"), ("cf", "Case_Folding"),
+  ("changeswhencasefolded", "Changes_When_Casefolded"),
+  ("changeswhencasemapped", "Changes_When_Casemapped"),
+  ("changeswhenlowercased", "Changes_When_Lowercased"),
+  ("changeswhennfkccasefolded", "Changes_When_NFKC_Casefolded"),
+  ("changeswhentitlecased", "Changes_When_Titlecased"),
+  ("changeswhenuppercased", "Changes_When_Uppercased"),
+  ("ci", "Case_Ignorable"), ("cjkaccountingnumeric", "kAccountingNumeric"),
+  ("cjkcompatibilityvariant", "kCompatibilityVariant"),
+  ("cjkiicore", "kIICore"), ("cjkirggsource", "kIRG_GSource"),
+  ("cjkirghsource", "kIRG_HSource"), ("cjkirgjsource", "kIRG_JSource"),
+  ("cjkirgkpsource", "kIRG_KPSource"), ("cjkirgksource", "kIRG_KSource"),
+  ("cjkirgmsource", "kIRG_MSource"), ("cjkirgtsource", "kIRG_TSource"),
+  ("cjkirgusource", "kIRG_USource"), ("cjkirgvsource", "kIRG_VSource"),
+  ("cjkothernumeric", "kOtherNumeric"),
+  ("cjkprimarynumeric", "kPrimaryNumeric"), ("cjkrsunicode", "kRSUnicode"),
+  ("compex", "Full_Composition_Exclusion"),
+  ("compositionexclusion", "Composition_Exclusion"),
+  ("cwcf", "Changes_When_Casefolded"), ("cwcm", "Changes_When_Casemapped"),
+  ("cwkcf", "Changes_When_NFKC_Casefolded"),
+  ("cwl", "Changes_When_Lowercased"), ("cwt", "Changes_When_Titlecased"),
+  ("cwu", "Changes_When_Uppercased"), ("dash", "Dash"),
+  ("decompositionmapping", "Decomposition_Mapping"),
+  ("decompositiontype", "Decomposition_Type"),
+  ("defaultignorablecodepoint", "Default_Ignorable_Code_Point"),
+  ("dep", "Deprecated"), ("deprecated", "Deprecated"),
+  ("di", "Default_Ignorable_Code_Point"), ("dia", "Diacritic"),
+  ("diacritic", "Diacritic"), ("dm", "Decomposition_Mapping"),
+  ("dt", "Decomposition_Type"), ("ea", "East_Asian_Width"),
+  ("eastasianwidth", "East_Asian_Width"), ("expandsonnfc", "Expands_On_NFC"),
+  ("expandsonnfd", "Expands_On_NFD"), ("expandsonnfkc", "Expands_On_NFKC"),
+  ("expandsonnfkd", "Expands_On_NFKD"), ("ext", "Extender"),
+  ("extender", "Extender"), ("fcnfkc", "FC_NFKC_Closure"),
+  ("fcnfkcclosure", "FC_NFKC_Closure"),
+  ("fullcompositionexclusion", "Full_Composition_Exclusion"),
+  ("gc", "General_Category"), ("gcb", "Grapheme_Cluster_Break"),
+  ("generalcategory", "General_Category"), ("graphemebase", "Grapheme_Base"),
+  ("graphemeclusterbreak", "Grapheme_Cluster_Break"),
+  ("graphemeextend", "Grapheme_Extend"), ("graphemelink", "Grapheme_Link"),
+  ("grbase", "Grapheme_Base"), ("grext", "Grapheme_Extend"),
+  ("grlink", "Grapheme_Link"), ("hangulsyllabletype", "Hangul_Syllable_Type"),
+  ("hex", "Hex_Digit"), ("hexdigit", "Hex_Digit"),
+  ("hst", "Hangul_Syllable_Type"), ("hyphen", "Hyphen"),
+  ("idc", "ID_Continue"), ("idcontinue", "ID_Continue"),
+  ("ideo", "Ideographic"), ("ideographic", "Ideographic"),
+  ("ids", "ID_Start"), ("idsb", "IDS_Binary_Operator"),
+  ("idsbinaryoperator", "IDS_Binary_Operator"),
+  ("idst", "IDS_Trinary_Operator"), ("idstart", "ID_Start"),
+  ("idstrinaryoperator", "IDS_Trinary_Operator"),
+  ("indicpositionalcategory", "Indic_Positional_Category"),
+  ("indicsyllabiccategory", "Indic_Syllabic_Category"),
+  ("inpc", "Indic_Positional_Category"), ("insc", "Indic_Syllabic_Category"),
+  ("isc", "ISO_Comment"), ("jamoshortname", "Jamo_Short_Name"),
+  ("jg", "Joining_Group"), ("joinc", "Join_Control"),
+  ("joincontrol", "Join_Control"), ("joininggroup", "Joining_Group"),
+  ("joiningtype", "Joining_Type"), ("jsn", "Jamo_Short_Name"),
+  ("jt", "Joining_Type"), ("kaccountingnumeric", "kAccountingNumeric"),
+  ("kcompatibilityvariant", "kCompatibilityVariant"), ("kiicore", "kIICore"),
+  ("kirggsource", "kIRG_GSource"), ("kirghsource", "kIRG_HSource"),
+  ("kirgjsource", "kIRG_JSource"), ("kirgkpsource", "kIRG_KPSource"),
+  ("kirgksource", "kIRG_KSource"), ("kirgmsource", "kIRG_MSource"),
+  ("kirgtsource", "kIRG_TSource"), ("kirgusource", "kIRG_USource"),
+  ("kirgvsource", "kIRG_VSource"), ("kothernumeric", "kOtherNumeric"),
+  ("kprimarynumeric", "kPrimaryNumeric"), ("krsunicode", "kRSUnicode"),
+  ("lb", "Line_Break"), ("lc", "Lowercase_Mapping"),
+  ("linebreak", "Line_Break"), ("loe", "Logical_Order_Exception"),
+  ("logicalorderexception", "Logical_Order_Exception"),
+  ("lower", "Lowercase"), ("lowercase", "Lowercase"),
+  ("lowercasemapping", "Lowercase_Mapping"), ("math", "Math"), ("na", "Name"),
+  ("na1", "Unicode_1_Name"), ("name", "Name"), ("namealias", "Name_Alias"),
+  ("nchar", "Noncharacter_Code_Point"), ("nfcqc", "NFC_Quick_Check"),
+  ("nfcquickcheck", "NFC_Quick_Check"), ("nfdqc", "NFD_Quick_Check"),
+  ("nfdquickcheck", "NFD_Quick_Check"), ("nfkccasefold", "NFKC_Casefold"),
+  ("nfkccf", "NFKC_Casefold"), ("nfkcqc", "NFKC_Quick_Check"),
+  ("nfkcquickcheck", "NFKC_Quick_Check"), ("nfkdqc", "NFKD_Quick_Check"),
+  ("nfkdquickcheck", "NFKD_Quick_Check"),
+  ("noncharactercodepoint", "Noncharacter_Code_Point"),
+  ("nt", "Numeric_Type"), ("numerictype", "Numeric_Type"),
+  ("numericvalue", "Numeric_Value"), ("nv", "Numeric_Value"),
+  ("oalpha", "Other_Alphabetic"), ("ocomment", "ISO_Comment"),
+  ("odi", "Other_Default_Ignorable_Code_Point"),
+  ("ogrext", "Other_Grapheme_Extend"), ("oidc", "Other_ID_Continue"),
+  ("oids", "Other_ID_Start"), ("olower", "Other_Lowercase"),
+  ("omath", "Other_Math"), ("otheralphabetic", "Other_Alphabetic"),
+  ("otherdefaultignorablecodepoint", "Other_Default_Ignorable_Code_Point"),
+  ("othergraphemeextend", "Other_Grapheme_Extend"),
+  ("otheridcontinue", "Other_ID_Continue"),
+  ("otheridstart", "Other_ID_Start"), ("otherlowercase", "Other_Lowercase"),
+  ("othermath", "Other_Math"), ("otheruppercase", "Other_Uppercase"),
+  ("oupper", "Other_Uppercase"), ("patsyn", "Pattern_Syntax"),
+  ("patternsyntax", "Pattern_Syntax"),
+  ("patternwhitespace", "Pattern_White_Space"),
+  ("patws", "Pattern_White_Space"), ("pcm", "Prepended_Concatenation_Mark"),
+  ("prependedconcatenationmark", "Prepended_Concatenation_Mark"),
+  ("qmark", "Quotation_Mark"), ("quotationmark", "Quotation_Mark"),
+  ("radical", "Radical"), ("regionalindicator", "Regional_Indicator"),
+  ("ri", "Regional_Indicator"), ("sb", "Sentence_Break"), ("sc", "Script"),
+  ("scf", "Simple_Case_Folding"), ("script", "Script"),
+  ("scriptextensions", "Script_Extensions"), ("scx", "Script_Extensions"),
+  ("sd", "Soft_Dotted"), ("sentencebreak", "Sentence_Break"),
+  ("sentenceterminal", "Sentence_Terminal"), ("sfc", "Simple_Case_Folding"),
+  ("simplecasefolding", "Simple_Case_Folding"),
+  ("simplelowercasemapping", "Simple_Lowercase_Mapping"),
+  ("simpletitlecasemapping", "Simple_Titlecase_Mapping"),
+  ("simpleuppercasemapping", "Simple_Uppercase_Mapping"),
+  ("slc", "Simple_Lowercase_Mapping"), ("softdotted", "Soft_Dotted"),
+  ("space", "White_Space"), ("stc", "Simple_Titlecase_Mapping"),
+  ("sterm", "Sentence_Terminal"), ("suc", "Simple_Uppercase_Mapping"),
+  ("tc", "Titlecase_Mapping"), ("term", "Terminal_Punctuation"),
+  ("terminalpunctuation", "Terminal_Punctuation"),
+  ("titlecasemapping", "Titlecase_Mapping"), ("uc", "Uppercase_Mapping"),
+  ("uideo", "Unified_Ideograph"), ("unicode1name", "Unicode_1_Name"),
+  ("unicoderadicalstroke", "kRSUnicode"),
+  ("unifiedideograph", "Unified_Ideograph"), ("upper", "Uppercase"),
+  ("uppercase", "Uppercase"), ("uppercasemapping", "Uppercase_Mapping"),
+  ("urs", "kRSUnicode"), ("variationselector", "Variation_Selector"),
+  ("verticalorientation", "Vertical_Orientation"),
+  ("vo", "Vertical_Orientation"), ("vs", "Variation_Selector"),
+  ("wb", "Word_Break"), ("whitespace", "White_Space"),
+  ("wordbreak", "Word_Break"), ("wspace", "White_Space"),
+  ("xidc", "XID_Continue"), ("xidcontinue", "XID_Continue"),
+  ("xids", "XID_Start"), ("xidstart", "XID_Start"),
+  ("xonfc", "Expands_On_NFC"), ("xonfd", "Expands_On_NFD"),
+  ("xonfkc", "Expands_On_NFKC"), ("xonfkd", "Expands_On_NFKD"),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/property_values.rs
@@ -0,0 +1,277 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-values tmp/ucd-10.0.0 --include gc,script,scx,age
+//
+// ucd-generate is available on crates.io.
+
+pub const PROPERTY_VALUES: &'static [(&'static str, &'static [(&'static str, &'static str)])] = &[
+  ("Age", &[("1.1", "V1_1"), ("10.0", "V10_0"), ("2.0", "V2_0"),
+  ("2.1", "V2_1"), ("3.0", "V3_0"), ("3.1", "V3_1"), ("3.2", "V3_2"),
+  ("4.0", "V4_0"), ("4.1", "V4_1"), ("5.0", "V5_0"), ("5.1", "V5_1"),
+  ("5.2", "V5_2"), ("6.0", "V6_0"), ("6.1", "V6_1"), ("6.2", "V6_2"),
+  ("6.3", "V6_3"), ("7.0", "V7_0"), ("8.0", "V8_0"), ("9.0", "V9_0"),
+  ("na", "Unassigned"), ("unassigned", "Unassigned"), ("v100", "V10_0"),
+  ("v11", "V1_1"), ("v20", "V2_0"), ("v21", "V2_1"), ("v30", "V3_0"),
+  ("v31", "V3_1"), ("v32", "V3_2"), ("v40", "V4_0"), ("v41", "V4_1"),
+  ("v50", "V5_0"), ("v51", "V5_1"), ("v52", "V5_2"), ("v60", "V6_0"),
+  ("v61", "V6_1"), ("v62", "V6_2"), ("v63", "V6_3"), ("v70", "V7_0"),
+  ("v80", "V8_0"), ("v90", "V9_0"), ]),
+
+  ("General_Category", &[("c", "Other"), ("casedletter", "Cased_Letter"),
+  ("cc", "Control"), ("cf", "Format"),
+  ("closepunctuation", "Close_Punctuation"), ("cn", "Unassigned"),
+  ("cntrl", "Control"), ("co", "Private_Use"), ("combiningmark", "Mark"),
+  ("connectorpunctuation", "Connector_Punctuation"), ("control", "Control"),
+  ("cs", "Surrogate"), ("currencysymbol", "Currency_Symbol"),
+  ("dashpunctuation", "Dash_Punctuation"),
+  ("decimalnumber", "Decimal_Number"), ("digit", "Decimal_Number"),
+  ("enclosingmark", "Enclosing_Mark"),
+  ("finalpunctuation", "Final_Punctuation"), ("format", "Format"),
+  ("initialpunctuation", "Initial_Punctuation"), ("l", "Letter"),
+  ("lc", "Cased_Letter"), ("letter", "Letter"),
+  ("letternumber", "Letter_Number"), ("lineseparator", "Line_Separator"),
+  ("ll", "Lowercase_Letter"), ("lm", "Modifier_Letter"),
+  ("lo", "Other_Letter"), ("lowercaseletter", "Lowercase_Letter"),
+  ("lt", "Titlecase_Letter"), ("lu", "Uppercase_Letter"), ("m", "Mark"),
+  ("mark", "Mark"), ("mathsymbol", "Math_Symbol"), ("mc", "Spacing_Mark"),
+  ("me", "Enclosing_Mark"), ("mn", "Nonspacing_Mark"),
+  ("modifierletter", "Modifier_Letter"),
+  ("modifiersymbol", "Modifier_Symbol"), ("n", "Number"),
+  ("nd", "Decimal_Number"), ("nl", "Letter_Number"), ("no", "Other_Number"),
+  ("nonspacingmark", "Nonspacing_Mark"), ("number", "Number"),
+  ("openpunctuation", "Open_Punctuation"), ("other", "Other"),
+  ("otherletter", "Other_Letter"), ("othernumber", "Other_Number"),
+  ("otherpunctuation", "Other_Punctuation"), ("othersymbol", "Other_Symbol"),
+  ("p", "Punctuation"), ("paragraphseparator", "Paragraph_Separator"),
+  ("pc", "Connector_Punctuation"), ("pd", "Dash_Punctuation"),
+  ("pe", "Close_Punctuation"), ("pf", "Final_Punctuation"),
+  ("pi", "Initial_Punctuation"), ("po", "Other_Punctuation"),
+  ("privateuse", "Private_Use"), ("ps", "Open_Punctuation"),
+  ("punct", "Punctuation"), ("punctuation", "Punctuation"), ("s", "Symbol"),
+  ("sc", "Currency_Symbol"), ("separator", "Separator"),
+  ("sk", "Modifier_Symbol"), ("sm", "Math_Symbol"), ("so", "Other_Symbol"),
+  ("spaceseparator", "Space_Separator"), ("spacingmark", "Spacing_Mark"),
+  ("surrogate", "Surrogate"), ("symbol", "Symbol"),
+  ("titlecaseletter", "Titlecase_Letter"), ("unassigned", "Unassigned"),
+  ("uppercaseletter", "Uppercase_Letter"), ("z", "Separator"),
+  ("zl", "Line_Separator"), ("zp", "Paragraph_Separator"),
+  ("zs", "Space_Separator"), ]),
+
+  ("Script", &[("adlam", "Adlam"), ("adlm", "Adlam"),
+  ("aghb", "Caucasian_Albanian"), ("ahom", "Ahom"),
+  ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"), ("arab", "Arabic"),
+  ("arabic", "Arabic"), ("armenian", "Armenian"),
+  ("armi", "Imperial_Aramaic"), ("armn", "Armenian"), ("avestan", "Avestan"),
+  ("avst", "Avestan"), ("bali", "Balinese"), ("balinese", "Balinese"),
+  ("bamu", "Bamum"), ("bamum", "Bamum"), ("bass", "Bassa_Vah"),
+  ("bassavah", "Bassa_Vah"), ("batak", "Batak"), ("batk", "Batak"),
+  ("beng", "Bengali"), ("bengali", "Bengali"), ("bhaiksuki", "Bhaiksuki"),
+  ("bhks", "Bhaiksuki"), ("bopo", "Bopomofo"), ("bopomofo", "Bopomofo"),
+  ("brah", "Brahmi"), ("brahmi", "Brahmi"), ("brai", "Braille"),
+  ("braille", "Braille"), ("bugi", "Buginese"), ("buginese", "Buginese"),
+  ("buhd", "Buhid"), ("buhid", "Buhid"), ("cakm", "Chakma"),
+  ("canadianaboriginal", "Canadian_Aboriginal"),
+  ("cans", "Canadian_Aboriginal"), ("cari", "Carian"), ("carian", "Carian"),
+  ("caucasianalbanian", "Caucasian_Albanian"), ("chakma", "Chakma"),
+  ("cham", "Cham"), ("cher", "Cherokee"), ("cherokee", "Cherokee"),
+  ("common", "Common"), ("copt", "Coptic"), ("coptic", "Coptic"),
+  ("cprt", "Cypriot"), ("cuneiform", "Cuneiform"), ("cypriot", "Cypriot"),
+  ("cyrillic", "Cyrillic"), ("cyrl", "Cyrillic"), ("deseret", "Deseret"),
+  ("deva", "Devanagari"), ("devanagari", "Devanagari"), ("dsrt", "Deseret"),
+  ("dupl", "Duployan"), ("duployan", "Duployan"),
+  ("egyp", "Egyptian_Hieroglyphs"),
+  ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"), ("elba", "Elbasan"),
+  ("elbasan", "Elbasan"), ("ethi", "Ethiopic"), ("ethiopic", "Ethiopic"),
+  ("geor", "Georgian"), ("georgian", "Georgian"), ("glag", "Glagolitic"),
+  ("glagolitic", "Glagolitic"), ("gonm", "Masaram_Gondi"), ("goth", "Gothic"),
+  ("gothic", "Gothic"), ("gran", "Grantha"), ("grantha", "Grantha"),
+  ("greek", "Greek"), ("grek", "Greek"), ("gujarati", "Gujarati"),
+  ("gujr", "Gujarati"), ("gurmukhi", "Gurmukhi"), ("guru", "Gurmukhi"),
+  ("han", "Han"), ("hang", "Hangul"), ("hangul", "Hangul"), ("hani", "Han"),
+  ("hano", "Hanunoo"), ("hanunoo", "Hanunoo"), ("hatr", "Hatran"),
+  ("hatran", "Hatran"), ("hebr", "Hebrew"), ("hebrew", "Hebrew"),
+  ("hira", "Hiragana"), ("hiragana", "Hiragana"),
+  ("hluw", "Anatolian_Hieroglyphs"), ("hmng", "Pahawh_Hmong"),
+  ("hrkt", "Katakana_Or_Hiragana"), ("hung", "Old_Hungarian"),
+  ("imperialaramaic", "Imperial_Aramaic"), ("inherited", "Inherited"),
+  ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+  ("inscriptionalparthian", "Inscriptional_Parthian"), ("ital", "Old_Italic"),
+  ("java", "Javanese"), ("javanese", "Javanese"), ("kaithi", "Kaithi"),
+  ("kali", "Kayah_Li"), ("kana", "Katakana"), ("kannada", "Kannada"),
+  ("katakana", "Katakana"), ("katakanaorhiragana", "Katakana_Or_Hiragana"),
+  ("kayahli", "Kayah_Li"), ("khar", "Kharoshthi"),
+  ("kharoshthi", "Kharoshthi"), ("khmer", "Khmer"), ("khmr", "Khmer"),
+  ("khoj", "Khojki"), ("khojki", "Khojki"), ("khudawadi", "Khudawadi"),
+  ("knda", "Kannada"), ("kthi", "Kaithi"), ("lana", "Tai_Tham"),
+  ("lao", "Lao"), ("laoo", "Lao"), ("latin", "Latin"), ("latn", "Latin"),
+  ("lepc", "Lepcha"), ("lepcha", "Lepcha"), ("limb", "Limbu"),
+  ("limbu", "Limbu"), ("lina", "Linear_A"), ("linb", "Linear_B"),
+  ("lineara", "Linear_A"), ("linearb", "Linear_B"), ("lisu", "Lisu"),
+  ("lyci", "Lycian"), ("lycian", "Lycian"), ("lydi", "Lydian"),
+  ("lydian", "Lydian"), ("mahajani", "Mahajani"), ("mahj", "Mahajani"),
+  ("malayalam", "Malayalam"), ("mand", "Mandaic"), ("mandaic", "Mandaic"),
+  ("mani", "Manichaean"), ("manichaean", "Manichaean"), ("marc", "Marchen"),
+  ("marchen", "Marchen"), ("masaramgondi", "Masaram_Gondi"),
+  ("meeteimayek", "Meetei_Mayek"), ("mend", "Mende_Kikakui"),
+  ("mendekikakui", "Mende_Kikakui"), ("merc", "Meroitic_Cursive"),
+  ("mero", "Meroitic_Hieroglyphs"), ("meroiticcursive", "Meroitic_Cursive"),
+  ("meroitichieroglyphs", "Meroitic_Hieroglyphs"), ("miao", "Miao"),
+  ("mlym", "Malayalam"), ("modi", "Modi"), ("mong", "Mongolian"),
+  ("mongolian", "Mongolian"), ("mro", "Mro"), ("mroo", "Mro"),
+  ("mtei", "Meetei_Mayek"), ("mult", "Multani"), ("multani", "Multani"),
+  ("myanmar", "Myanmar"), ("mymr", "Myanmar"), ("nabataean", "Nabataean"),
+  ("narb", "Old_North_Arabian"), ("nbat", "Nabataean"), ("newa", "Newa"),
+  ("newtailue", "New_Tai_Lue"), ("nko", "Nko"), ("nkoo", "Nko"),
+  ("nshu", "Nushu"), ("nushu", "Nushu"), ("ogam", "Ogham"),
+  ("ogham", "Ogham"), ("olchiki", "Ol_Chiki"), ("olck", "Ol_Chiki"),
+  ("oldhungarian", "Old_Hungarian"), ("olditalic", "Old_Italic"),
+  ("oldnortharabian", "Old_North_Arabian"), ("oldpermic", "Old_Permic"),
+  ("oldpersian", "Old_Persian"), ("oldsoutharabian", "Old_South_Arabian"),
+  ("oldturkic", "Old_Turkic"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"),
+  ("orya", "Oriya"), ("osage", "Osage"), ("osge", "Osage"),
+  ("osma", "Osmanya"), ("osmanya", "Osmanya"),
+  ("pahawhhmong", "Pahawh_Hmong"), ("palm", "Palmyrene"),
+  ("palmyrene", "Palmyrene"), ("pauc", "Pau_Cin_Hau"),
+  ("paucinhau", "Pau_Cin_Hau"), ("perm", "Old_Permic"), ("phag", "Phags_Pa"),
+  ("phagspa", "Phags_Pa"), ("phli", "Inscriptional_Pahlavi"),
+  ("phlp", "Psalter_Pahlavi"), ("phnx", "Phoenician"),
+  ("phoenician", "Phoenician"), ("plrd", "Miao"),
+  ("prti", "Inscriptional_Parthian"), ("psalterpahlavi", "Psalter_Pahlavi"),
+  ("qaac", "Coptic"), ("qaai", "Inherited"), ("rejang", "Rejang"),
+  ("rjng", "Rejang"), ("runic", "Runic"), ("runr", "Runic"),
+  ("samaritan", "Samaritan"), ("samr", "Samaritan"),
+  ("sarb", "Old_South_Arabian"), ("saur", "Saurashtra"),
+  ("saurashtra", "Saurashtra"), ("sgnw", "SignWriting"),
+  ("sharada", "Sharada"), ("shavian", "Shavian"), ("shaw", "Shavian"),
+  ("shrd", "Sharada"), ("sidd", "Siddham"), ("siddham", "Siddham"),
+  ("signwriting", "SignWriting"), ("sind", "Khudawadi"), ("sinh", "Sinhala"),
+  ("sinhala", "Sinhala"), ("sora", "Sora_Sompeng"),
+  ("sorasompeng", "Sora_Sompeng"), ("soyo", "Soyombo"),
+  ("soyombo", "Soyombo"), ("sund", "Sundanese"), ("sundanese", "Sundanese"),
+  ("sylo", "Syloti_Nagri"), ("sylotinagri", "Syloti_Nagri"),
+  ("syrc", "Syriac"), ("syriac", "Syriac"), ("tagalog", "Tagalog"),
+  ("tagb", "Tagbanwa"), ("tagbanwa", "Tagbanwa"), ("taile", "Tai_Le"),
+  ("taitham", "Tai_Tham"), ("taiviet", "Tai_Viet"), ("takr", "Takri"),
+  ("takri", "Takri"), ("tale", "Tai_Le"), ("talu", "New_Tai_Lue"),
+  ("tamil", "Tamil"), ("taml", "Tamil"), ("tang", "Tangut"),
+  ("tangut", "Tangut"), ("tavt", "Tai_Viet"), ("telu", "Telugu"),
+  ("telugu", "Telugu"), ("tfng", "Tifinagh"), ("tglg", "Tagalog"),
+  ("thaa", "Thaana"), ("thaana", "Thaana"), ("thai", "Thai"),
+  ("tibetan", "Tibetan"), ("tibt", "Tibetan"), ("tifinagh", "Tifinagh"),
+  ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), ("ugar", "Ugaritic"),
+  ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), ("vai", "Vai"),
+  ("vaii", "Vai"), ("wara", "Warang_Citi"), ("warangciti", "Warang_Citi"),
+  ("xpeo", "Old_Persian"), ("xsux", "Cuneiform"), ("yi", "Yi"),
+  ("yiii", "Yi"), ("zanabazarsquare", "Zanabazar_Square"),
+  ("zanb", "Zanabazar_Square"), ("zinh", "Inherited"), ("zyyy", "Common"),
+  ("zzzz", "Unknown"), ]),
+
+  ("Script_Extensions", &[("adlam", "Adlam"), ("adlm", "Adlam"),
+  ("aghb", "Caucasian_Albanian"), ("ahom", "Ahom"),
+  ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"), ("arab", "Arabic"),
+  ("arabic", "Arabic"), ("armenian", "Armenian"),
+  ("armi", "Imperial_Aramaic"), ("armn", "Armenian"), ("avestan", "Avestan"),
+  ("avst", "Avestan"), ("bali", "Balinese"), ("balinese", "Balinese"),
+  ("bamu", "Bamum"), ("bamum", "Bamum"), ("bass", "Bassa_Vah"),
+  ("bassavah", "Bassa_Vah"), ("batak", "Batak"), ("batk", "Batak"),
+  ("beng", "Bengali"), ("bengali", "Bengali"), ("bhaiksuki", "Bhaiksuki"),
+  ("bhks", "Bhaiksuki"), ("bopo", "Bopomofo"), ("bopomofo", "Bopomofo"),
+  ("brah", "Brahmi"), ("brahmi", "Brahmi"), ("brai", "Braille"),
+  ("braille", "Braille"), ("bugi", "Buginese"), ("buginese", "Buginese"),
+  ("buhd", "Buhid"), ("buhid", "Buhid"), ("cakm", "Chakma"),
+  ("canadianaboriginal", "Canadian_Aboriginal"),
+  ("cans", "Canadian_Aboriginal"), ("cari", "Carian"), ("carian", "Carian"),
+  ("caucasianalbanian", "Caucasian_Albanian"), ("chakma", "Chakma"),
+  ("cham", "Cham"), ("cher", "Cherokee"), ("cherokee", "Cherokee"),
+  ("common", "Common"), ("copt", "Coptic"), ("coptic", "Coptic"),
+  ("cprt", "Cypriot"), ("cuneiform", "Cuneiform"), ("cypriot", "Cypriot"),
+  ("cyrillic", "Cyrillic"), ("cyrl", "Cyrillic"), ("deseret", "Deseret"),
+  ("deva", "Devanagari"), ("devanagari", "Devanagari"), ("dsrt", "Deseret"),
+  ("dupl", "Duployan"), ("duployan", "Duployan"),
+  ("egyp", "Egyptian_Hieroglyphs"),
+  ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"), ("elba", "Elbasan"),
+  ("elbasan", "Elbasan"), ("ethi", "Ethiopic"), ("ethiopic", "Ethiopic"),
+  ("geor", "Georgian"), ("georgian", "Georgian"), ("glag", "Glagolitic"),
+  ("glagolitic", "Glagolitic"), ("gonm", "Masaram_Gondi"), ("goth", "Gothic"),
+  ("gothic", "Gothic"), ("gran", "Grantha"), ("grantha", "Grantha"),
+  ("greek", "Greek"), ("grek", "Greek"), ("gujarati", "Gujarati"),
+  ("gujr", "Gujarati"), ("gurmukhi", "Gurmukhi"), ("guru", "Gurmukhi"),
+  ("han", "Han"), ("hang", "Hangul"), ("hangul", "Hangul"), ("hani", "Han"),
+  ("hano", "Hanunoo"), ("hanunoo", "Hanunoo"), ("hatr", "Hatran"),
+  ("hatran", "Hatran"), ("hebr", "Hebrew"), ("hebrew", "Hebrew"),
+  ("hira", "Hiragana"), ("hiragana", "Hiragana"),
+  ("hluw", "Anatolian_Hieroglyphs"), ("hmng", "Pahawh_Hmong"),
+  ("hrkt", "Katakana_Or_Hiragana"), ("hung", "Old_Hungarian"),
+  ("imperialaramaic", "Imperial_Aramaic"), ("inherited", "Inherited"),
+  ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+  ("inscriptionalparthian", "Inscriptional_Parthian"), ("ital", "Old_Italic"),
+  ("java", "Javanese"), ("javanese", "Javanese"), ("kaithi", "Kaithi"),
+  ("kali", "Kayah_Li"), ("kana", "Katakana"), ("kannada", "Kannada"),
+  ("katakana", "Katakana"), ("katakanaorhiragana", "Katakana_Or_Hiragana"),
+  ("kayahli", "Kayah_Li"), ("khar", "Kharoshthi"),
+  ("kharoshthi", "Kharoshthi"), ("khmer", "Khmer"), ("khmr", "Khmer"),
+  ("khoj", "Khojki"), ("khojki", "Khojki"), ("khudawadi", "Khudawadi"),
+  ("knda", "Kannada"), ("kthi", "Kaithi"), ("lana", "Tai_Tham"),
+  ("lao", "Lao"), ("laoo", "Lao"), ("latin", "Latin"), ("latn", "Latin"),
+  ("lepc", "Lepcha"), ("lepcha", "Lepcha"), ("limb", "Limbu"),
+  ("limbu", "Limbu"), ("lina", "Linear_A"), ("linb", "Linear_B"),
+  ("lineara", "Linear_A"), ("linearb", "Linear_B"), ("lisu", "Lisu"),
+  ("lyci", "Lycian"), ("lycian", "Lycian"), ("lydi", "Lydian"),
+  ("lydian", "Lydian"), ("mahajani", "Mahajani"), ("mahj", "Mahajani"),
+  ("malayalam", "Malayalam"), ("mand", "Mandaic"), ("mandaic", "Mandaic"),
+  ("mani", "Manichaean"), ("manichaean", "Manichaean"), ("marc", "Marchen"),
+  ("marchen", "Marchen"), ("masaramgondi", "Masaram_Gondi"),
+  ("meeteimayek", "Meetei_Mayek"), ("mend", "Mende_Kikakui"),
+  ("mendekikakui", "Mende_Kikakui"), ("merc", "Meroitic_Cursive"),
+  ("mero", "Meroitic_Hieroglyphs"), ("meroiticcursive", "Meroitic_Cursive"),
+  ("meroitichieroglyphs", "Meroitic_Hieroglyphs"), ("miao", "Miao"),
+  ("mlym", "Malayalam"), ("modi", "Modi"), ("mong", "Mongolian"),
+  ("mongolian", "Mongolian"), ("mro", "Mro"), ("mroo", "Mro"),
+  ("mtei", "Meetei_Mayek"), ("mult", "Multani"), ("multani", "Multani"),
+  ("myanmar", "Myanmar"), ("mymr", "Myanmar"), ("nabataean", "Nabataean"),
+  ("narb", "Old_North_Arabian"), ("nbat", "Nabataean"), ("newa", "Newa"),
+  ("newtailue", "New_Tai_Lue"), ("nko", "Nko"), ("nkoo", "Nko"),
+  ("nshu", "Nushu"), ("nushu", "Nushu"), ("ogam", "Ogham"),
+  ("ogham", "Ogham"), ("olchiki", "Ol_Chiki"), ("olck", "Ol_Chiki"),
+  ("oldhungarian", "Old_Hungarian"), ("olditalic", "Old_Italic"),
+  ("oldnortharabian", "Old_North_Arabian"), ("oldpermic", "Old_Permic"),
+  ("oldpersian", "Old_Persian"), ("oldsoutharabian", "Old_South_Arabian"),
+  ("oldturkic", "Old_Turkic"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"),
+  ("orya", "Oriya"), ("osage", "Osage"), ("osge", "Osage"),
+  ("osma", "Osmanya"), ("osmanya", "Osmanya"),
+  ("pahawhhmong", "Pahawh_Hmong"), ("palm", "Palmyrene"),
+  ("palmyrene", "Palmyrene"), ("pauc", "Pau_Cin_Hau"),
+  ("paucinhau", "Pau_Cin_Hau"), ("perm", "Old_Permic"), ("phag", "Phags_Pa"),
+  ("phagspa", "Phags_Pa"), ("phli", "Inscriptional_Pahlavi"),
+  ("phlp", "Psalter_Pahlavi"), ("phnx", "Phoenician"),
+  ("phoenician", "Phoenician"), ("plrd", "Miao"),
+  ("prti", "Inscriptional_Parthian"), ("psalterpahlavi", "Psalter_Pahlavi"),
+  ("qaac", "Coptic"), ("qaai", "Inherited"), ("rejang", "Rejang"),
+  ("rjng", "Rejang"), ("runic", "Runic"), ("runr", "Runic"),
+  ("samaritan", "Samaritan"), ("samr", "Samaritan"),
+  ("sarb", "Old_South_Arabian"), ("saur", "Saurashtra"),
+  ("saurashtra", "Saurashtra"), ("sgnw", "SignWriting"),
+  ("sharada", "Sharada"), ("shavian", "Shavian"), ("shaw", "Shavian"),
+  ("shrd", "Sharada"), ("sidd", "Siddham"), ("siddham", "Siddham"),
+  ("signwriting", "SignWriting"), ("sind", "Khudawadi"), ("sinh", "Sinhala"),
+  ("sinhala", "Sinhala"), ("sora", "Sora_Sompeng"),
+  ("sorasompeng", "Sora_Sompeng"), ("soyo", "Soyombo"),
+  ("soyombo", "Soyombo"), ("sund", "Sundanese"), ("sundanese", "Sundanese"),
+  ("sylo", "Syloti_Nagri"), ("sylotinagri", "Syloti_Nagri"),
+  ("syrc", "Syriac"), ("syriac", "Syriac"), ("tagalog", "Tagalog"),
+  ("tagb", "Tagbanwa"), ("tagbanwa", "Tagbanwa"), ("taile", "Tai_Le"),
+  ("taitham", "Tai_Tham"), ("taiviet", "Tai_Viet"), ("takr", "Takri"),
+  ("takri", "Takri"), ("tale", "Tai_Le"), ("talu", "New_Tai_Lue"),
+  ("tamil", "Tamil"), ("taml", "Tamil"), ("tang", "Tangut"),
+  ("tangut", "Tangut"), ("tavt", "Tai_Viet"), ("telu", "Telugu"),
+  ("telugu", "Telugu"), ("tfng", "Tifinagh"), ("tglg", "Tagalog"),
+  ("thaa", "Thaana"), ("thaana", "Thaana"), ("thai", "Thai"),
+  ("tibetan", "Tibetan"), ("tibt", "Tibetan"), ("tifinagh", "Tifinagh"),
+  ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), ("ugar", "Ugaritic"),
+  ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), ("vai", "Vai"),
+  ("vaii", "Vai"), ("wara", "Warang_Citi"), ("warangciti", "Warang_Citi"),
+  ("xpeo", "Old_Persian"), ("xsux", "Cuneiform"), ("yi", "Yi"),
+  ("yiii", "Yi"), ("zanabazarsquare", "Zanabazar_Square"),
+  ("zanb", "Zanabazar_Square"), ("zinh", "Inherited"), ("zyyy", "Common"),
+  ("zzzz", "Unknown"), ]),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/script.rs
@@ -0,0 +1,765 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate script tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("Adlam", ADLAM), ("Ahom", AHOM),
+  ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS), ("Arabic", ARABIC),
+  ("Armenian", ARMENIAN), ("Avestan", AVESTAN), ("Balinese", BALINESE),
+  ("Bamum", BAMUM), ("Bassa_Vah", BASSA_VAH), ("Batak", BATAK),
+  ("Bengali", BENGALI), ("Bhaiksuki", BHAIKSUKI), ("Bopomofo", BOPOMOFO),
+  ("Brahmi", BRAHMI), ("Braille", BRAILLE), ("Buginese", BUGINESE),
+  ("Buhid", BUHID), ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
+  ("Carian", CARIAN), ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
+  ("Chakma", CHAKMA), ("Cham", CHAM), ("Cherokee", CHEROKEE),
+  ("Common", COMMON), ("Coptic", COPTIC), ("Cuneiform", CUNEIFORM),
+  ("Cypriot", CYPRIOT), ("Cyrillic", CYRILLIC), ("Deseret", DESERET),
+  ("Devanagari", DEVANAGARI), ("Duployan", DUPLOYAN),
+  ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS), ("Elbasan", ELBASAN),
+  ("Ethiopic", ETHIOPIC), ("Georgian", GEORGIAN), ("Glagolitic", GLAGOLITIC),
+  ("Gothic", GOTHIC), ("Grantha", GRANTHA), ("Greek", GREEK),
+  ("Gujarati", GUJARATI), ("Gurmukhi", GURMUKHI), ("Han", HAN),
+  ("Hangul", HANGUL), ("Hanunoo", HANUNOO), ("Hatran", HATRAN),
+  ("Hebrew", HEBREW), ("Hiragana", HIRAGANA),
+  ("Imperial_Aramaic", IMPERIAL_ARAMAIC), ("Inherited", INHERITED),
+  ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
+  ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN), ("Javanese", JAVANESE),
+  ("Kaithi", KAITHI), ("Kannada", KANNADA), ("Katakana", KATAKANA),
+  ("Kayah_Li", KAYAH_LI), ("Kharoshthi", KHAROSHTHI), ("Khmer", KHMER),
+  ("Khojki", KHOJKI), ("Khudawadi", KHUDAWADI), ("Lao", LAO),
+  ("Latin", LATIN), ("Lepcha", LEPCHA), ("Limbu", LIMBU),
+  ("Linear_A", LINEAR_A), ("Linear_B", LINEAR_B), ("Lisu", LISU),
+  ("Lycian", LYCIAN), ("Lydian", LYDIAN), ("Mahajani", MAHAJANI),
+  ("Malayalam", MALAYALAM), ("Mandaic", MANDAIC), ("Manichaean", MANICHAEAN),
+  ("Marchen", MARCHEN), ("Masaram_Gondi", MASARAM_GONDI),
+  ("Meetei_Mayek", MEETEI_MAYEK), ("Mende_Kikakui", MENDE_KIKAKUI),
+  ("Meroitic_Cursive", MEROITIC_CURSIVE),
+  ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS), ("Miao", MIAO),
+  ("Modi", MODI), ("Mongolian", MONGOLIAN), ("Mro", MRO),
+  ("Multani", MULTANI), ("Myanmar", MYANMAR), ("Nabataean", NABATAEAN),
+  ("New_Tai_Lue", NEW_TAI_LUE), ("Newa", NEWA), ("Nko", NKO),
+  ("Nushu", NUSHU), ("Ogham", OGHAM), ("Ol_Chiki", OL_CHIKI),
+  ("Old_Hungarian", OLD_HUNGARIAN), ("Old_Italic", OLD_ITALIC),
+  ("Old_North_Arabian", OLD_NORTH_ARABIAN), ("Old_Permic", OLD_PERMIC),
+  ("Old_Persian", OLD_PERSIAN), ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
+  ("Old_Turkic", OLD_TURKIC), ("Oriya", ORIYA), ("Osage", OSAGE),
+  ("Osmanya", OSMANYA), ("Pahawh_Hmong", PAHAWH_HMONG),
+  ("Palmyrene", PALMYRENE), ("Pau_Cin_Hau", PAU_CIN_HAU),
+  ("Phags_Pa", PHAGS_PA), ("Phoenician", PHOENICIAN),
+  ("Psalter_Pahlavi", PSALTER_PAHLAVI), ("Rejang", REJANG), ("Runic", RUNIC),
+  ("Samaritan", SAMARITAN), ("Saurashtra", SAURASHTRA), ("Sharada", SHARADA),
+  ("Shavian", SHAVIAN), ("Siddham", SIDDHAM), ("SignWriting", SIGNWRITING),
+  ("Sinhala", SINHALA), ("Sora_Sompeng", SORA_SOMPENG), ("Soyombo", SOYOMBO),
+  ("Sundanese", SUNDANESE), ("Syloti_Nagri", SYLOTI_NAGRI),
+  ("Syriac", SYRIAC), ("Tagalog", TAGALOG), ("Tagbanwa", TAGBANWA),
+  ("Tai_Le", TAI_LE), ("Tai_Tham", TAI_THAM), ("Tai_Viet", TAI_VIET),
+  ("Takri", TAKRI), ("Tamil", TAMIL), ("Tangut", TANGUT), ("Telugu", TELUGU),
+  ("Thaana", THAANA), ("Thai", THAI), ("Tibetan", TIBETAN),
+  ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), ("Ugaritic", UGARITIC),
+  ("Vai", VAI), ("Warang_Citi", WARANG_CITI), ("Yi", YI),
+  ("Zanabazar_Square", ZANABAZAR_SQUARE),
+];
+
+pub const ADLAM: &'static [(char, char)] = &[
+  ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞥞', '𞥟'),
+];
+
+pub const AHOM: &'static [(char, char)] = &[
+  ('𑜀', '𑜙'), ('𑜝', '𑜫'), ('𑜰', '𑜿'),
+];
+
+pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𔐀', '𔙆'),
+];
+
+pub const ARABIC: &'static [(char, char)] = &[
+  ('\u{600}', '\u{604}'), ('؆', '؋'), ('؍', 'ؚ'), ('\u{61c}', '\u{61c}'),
+  ('؞', '؞'), ('ؠ', 'ؿ'), ('ف', 'ي'), ('ٖ', 'ٯ'), ('ٱ', 'ۜ'),
+  ('۞', 'ۿ'), ('ݐ', 'ݿ'), ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ࣿ'), ('ﭐ', '﯁'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'),
+  ('ﶒ', 'ﷇ'), ('ﷰ', '﷽'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'),
+  ('𐹠', '𐹾'), ('𞸀', '𞸃'), ('𞸅', '𞸟'), ('𞸡', '𞸢'),
+  ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'), ('𞸴', '𞸷'),
+  ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'), ('𞹇', '𞹇'),
+  ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'), ('𞹑', '𞹒'),
+  ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'), ('𞹛', '𞹛'),
+  ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'), ('𞹤', '𞹤'),
+  ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'), ('𞹹', '𞹼'),
+  ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'), ('𞺡', '𞺣'),
+  ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𞻰', '𞻱'),
+];
+
+pub const ARMENIAN: &'static [(char, char)] = &[
+  ('Ա', 'Ֆ'), ('ՙ', '՟'), ('ա', 'և'), ('֊', '֊'), ('֍', '֏'),
+  ('ﬓ', 'ﬗ'),
+];
+
+pub const AVESTAN: &'static [(char, char)] = &[
+  ('𐬀', '𐬵'), ('𐬹', '𐬿'),
+];
+
+pub const BALINESE: &'static [(char, char)] = &[
+  ('ᬀ', 'ᭋ'), ('᭐', '᭼'),
+];
+
+pub const BAMUM: &'static [(char, char)] = &[
+  ('ꚠ', '꛷'), ('𖠀', '𖨸'),
+];
+
+pub const BASSA_VAH: &'static [(char, char)] = &[
+  ('𖫐', '𖫭'), ('𖫰', '𖫵'),
+];
+
+pub const BATAK: &'static [(char, char)] = &[
+  ('ᯀ', '᯳'), ('᯼', '᯿'),
+];
+
+pub const BENGALI: &'static [(char, char)] = &[
+  ('ঀ', 'ঃ'), ('অ', 'ঌ'), ('এ', 'ঐ'), ('ও', 'ন'),
+  ('প', 'র'), ('ল', 'ল'), ('শ', 'হ'), ('়', 'ৄ'),
+  ('ে', 'ৈ'), ('ো', 'ৎ'), ('ৗ', 'ৗ'), ('ড়', 'ঢ়'),
+  ('য়', 'ৣ'), ('০', '৽'),
+];
+
+pub const BHAIKSUKI: &'static [(char, char)] = &[
+  ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱅'), ('𑱐', '𑱬'),
+];
+
+pub const BOPOMOFO: &'static [(char, char)] = &[
+  ('˪', '˫'), ('ㄅ', 'ㄮ'), ('ㆠ', 'ㆺ'),
+];
+
+pub const BRAHMI: &'static [(char, char)] = &[
+  ('𑀀', '𑁍'), ('𑁒', '𑁯'), ('𑁿', '𑁿'),
+];
+
+pub const BRAILLE: &'static [(char, char)] = &[
+  ('⠀', '⣿'),
+];
+
+pub const BUGINESE: &'static [(char, char)] = &[
+  ('ᨀ', 'ᨛ'), ('᨞', '᨟'),
+];
+
+pub const BUHID: &'static [(char, char)] = &[
+  ('ᝀ', 'ᝓ'),
+];
+
+pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = &[
+  ('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'),
+];
+
+pub const CARIAN: &'static [(char, char)] = &[
+  ('𐊠', '𐋐'),
+];
+
+pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[
+  ('𐔰', '𐕣'), ('𐕯', '𐕯'),
+];
+
+pub const CHAKMA: &'static [(char, char)] = &[
+  ('𑄀', '𑄴'), ('𑄶', '𑅃'),
+];
+
+pub const CHAM: &'static [(char, char)] = &[
+  ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟'),
+];
+
+pub const CHEROKEE: &'static [(char, char)] = &[
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ'),
+];
+
+pub const COMMON: &'static [(char, char)] = &[
+  ('\u{0}', '@'), ('[', '`'), ('{', '©'), ('«', '¹'), ('»', '¿'),
+  ('×', '×'), ('÷', '÷'), ('ʹ', '˟'), ('˥', '˩'), ('ˬ', '˿'),
+  ('ʹ', 'ʹ'), (';', ';'), ('΅', '΅'), ('·', '·'), ('։', '։'),
+  ('\u{605}', '\u{605}'), ('،', '،'), ('؛', '؛'), ('؟', '؟'),
+  ('ـ', 'ـ'), ('\u{6dd}', '\u{6dd}'), ('\u{8e2}', '\u{8e2}'),
+  ('।', '॥'), ('฿', '฿'), ('࿕', '࿘'), ('჻', '჻'),
+  ('᛫', '᛭'), ('᜵', '᜶'), ('᠂', '᠃'), ('᠅', '᠅'),
+  ('᳓', '᳓'), ('᳡', '᳡'), ('ᳩ', 'ᳬ'), ('ᳮ', 'ᳳ'),
+  ('ᳵ', '᳷'), ('\u{2000}', '\u{200b}'), ('\u{200e}', '\u{2064}'),
+  ('\u{2066}', '⁰'), ('⁴', '⁾'), ('₀', '₎'), ('₠', '₿'),
+  ('℀', '℥'), ('℧', '℩'), ('ℬ', 'ℱ'), ('ℳ', '⅍'),
+  ('⅏', '⅟'), ('↉', '↋'), ('←', '␦'), ('⑀', '⑊'),
+  ('①', '⟿'), ('⤀', '⭳'), ('⭶', '⮕'), ('⮘', '⮹'),
+  ('⮽', '⯈'), ('⯊', '⯒'), ('⯬', '⯯'), ('⸀', '⹉'),
+  ('⿰', '⿻'), ('\u{3000}', '〄'), ('〆', '〆'), ('〈', '〠'),
+  ('〰', '〷'), ('〼', '〿'), ('゛', '゜'), ('゠', '゠'),
+  ('・', 'ー'), ('㆐', '㆟'), ('㇀', '㇣'), ('㈠', '㉟'),
+  ('㉿', '㋏'), ('㍘', '㏿'), ('䷀', '䷿'), ('꜀', '꜡'),
+  ('ꞈ', '꞊'), ('꠰', '꠹'), ('꤮', '꤮'), ('ꧏ', 'ꧏ'),
+  ('꭛', '꭛'), ('﴾', '﴿'), ('︐', '︙'), ('︰', '﹒'),
+  ('﹔', '﹦'), ('﹨', '﹫'), ('\u{feff}', '\u{feff}'), ('！', '＠'),
+  ('［', '｀'), ('｛', '･'), ('ｰ', 'ｰ'), ('ﾞ', 'ﾟ'),
+  ('￠', '￦'), ('￨', '￮'), ('\u{fff9}', '�'), ('𐄀', '𐄂'),
+  ('𐄇', '𐄳'), ('𐄷', '𐄿'), ('𐆐', '𐆛'), ('𐇐', '𐇼'),
+  ('𐋡', '𐋻'), ('\u{1bca0}', '\u{1bca3}'), ('𝀀', '𝃵'),
+  ('𝄀', '𝄦'), ('𝄩', '𝅦'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'),
+  ('𝆌', '𝆩'), ('𝆮', '𝇨'), ('𝌀', '𝍖'), ('𝍠', '𝍱'),
+  ('𝐀', '𝑔'), ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'),
+  ('𝒥', '𝒦'), ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'),
+  ('𝒽', '𝓃'), ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'),
+  ('𝔖', '𝔜'), ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'),
+  ('𝕆', '𝕆'), ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝟋'),
+  ('𝟎', '𝟿'), ('🀀', '🀫'), ('🀰', '🂓'), ('🂠', '🂮'),
+  ('🂱', '🂿'), ('🃁', '🃏'), ('🃑', '🃵'), ('🄀', '🄌'),
+  ('🄐', '🄮'), ('🄰', '🅫'), ('🅰', '🆬'), ('🇦', '🇿'),
+  ('🈁', '🈂'), ('🈐', '🈻'), ('🉀', '🉈'), ('🉐', '🉑'),
+  ('🉠', '🉥'), ('🌀', '🛔'), ('🛠', '🛬'), ('🛰', '🛸'),
+  ('🜀', '🝳'), ('🞀', '🟔'), ('🠀', '🠋'), ('🠐', '🡇'),
+  ('🡐', '🡙'), ('🡠', '🢇'), ('🢐', '🢭'), ('🤀', '🤋'),
+  ('🤐', '🤾'), ('🥀', '🥌'), ('🥐', '🥫'), ('🦀', '🦗'),
+  ('🧀', '🧀'), ('🧐', '🧦'), ('\u{e0001}', '\u{e0001}'),
+  ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const COPTIC: &'static [(char, char)] = &[
+  ('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿'),
+];
+
+pub const CUNEIFORM: &'static [(char, char)] = &[
+  ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃'),
+];
+
+pub const CYPRIOT: &'static [(char, char)] = &[
+  ('𐠀', '𐠅'), ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'),
+  ('𐠼', '𐠼'), ('𐠿', '𐠿'),
+];
+
+pub const CYRILLIC: &'static [(char, char)] = &[
+  ('Ѐ', '҄'), ('҇', 'ԯ'), ('ᲀ', 'ᲈ'), ('ᴫ', 'ᴫ'), ('ᵸ', 'ᵸ'),
+  ('ⷠ', 'ⷿ'), ('Ꙁ', 'ꚟ'), ('︮', '︯'),
+];
+
+pub const DESERET: &'static [(char, char)] = &[
+  ('𐐀', '𐑏'),
+];
+
+pub const DEVANAGARI: &'static [(char, char)] = &[
+  ('ऀ', 'ॐ'), ('॓', 'ॣ'), ('०', 'ॿ'), ('꣠', 'ꣽ'),
+];
+
+pub const DUPLOYAN: &'static [(char, char)] = &[
+  ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'),
+  ('𛲜', '𛲟'),
+];
+
+pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𓀀', '𓐮'),
+];
+
+pub const ELBASAN: &'static [(char, char)] = &[
+  ('𐔀', '𐔧'),
+];
+
+pub const ETHIOPIC: &'static [(char, char)] = &[
+  ('ሀ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'),
+  ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'),
+  ('፝', '፼'), ('ᎀ', '᎙'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('ꬁ', 'ꬆ'),
+  ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'),
+];
+
+pub const GEORGIAN: &'static [(char, char)] = &[
+  ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'), ('ა', 'ჺ'),
+  ('ჼ', 'ჿ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+];
+
+pub const GLAGOLITIC: &'static [(char, char)] = &[
+  ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('𞀀', '𞀆'), ('𞀈', '𞀘'),
+  ('𞀛', '𞀡'), ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+];
+
+pub const GOTHIC: &'static [(char, char)] = &[
+  ('𐌰', '𐍊'),
+];
+
+pub const GRANTHA: &'static [(char, char)] = &[
+  ('𑌀', '𑌃'), ('𑌅', '𑌌'), ('𑌏', '𑌐'), ('𑌓', '𑌨'),
+  ('𑌪', '𑌰'), ('𑌲', '𑌳'), ('𑌵', '𑌹'), ('𑌼', '𑍄'),
+  ('𑍇', '𑍈'), ('𑍋', '𑍍'), ('𑍐', '𑍐'), ('𑍗', '𑍗'),
+  ('𑍝', '𑍣'), ('𑍦', '𑍬'), ('𑍰', '𑍴'),
+];
+
+pub const GREEK: &'static [(char, char)] = &[
+  ('Ͱ', 'ͳ'), ('͵', 'ͷ'), ('ͺ', 'ͽ'), ('Ϳ', 'Ϳ'), ('΄', '΄'),
+  ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'), ('Ύ', 'Ρ'), ('Σ', 'ϡ'),
+  ('ϰ', 'Ͽ'), ('ᴦ', 'ᴪ'), ('ᵝ', 'ᵡ'), ('ᵦ', 'ᵪ'),
+  ('ᶿ', 'ᶿ'), ('ἀ', 'ἕ'), ('Ἐ', 'Ἕ'), ('ἠ', 'ὅ'),
+  ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'), ('Ὓ', 'Ὓ'),
+  ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'), ('ᾶ', 'ῄ'),
+  ('ῆ', 'ΐ'), ('ῖ', 'Ί'), ('῝', '`'), ('ῲ', 'ῴ'),
+  ('ῶ', '῾'), ('Ω', 'Ω'), ('ꭥ', 'ꭥ'), ('𐅀', '𐆎'),
+  ('𐆠', '𐆠'), ('𝈀', '𝉅'),
+];
+
+pub const GUJARATI: &'static [(char, char)] = &[
+  ('ઁ', 'ઃ'), ('અ', 'ઍ'), ('એ', 'ઑ'), ('ઓ', 'ન'),
+  ('પ', 'ર'), ('લ', 'ળ'), ('વ', 'હ'), ('઼', 'ૅ'),
+  ('ે', 'ૉ'), ('ો', '્'), ('ૐ', 'ૐ'), ('ૠ', 'ૣ'),
+  ('૦', '૱'), ('ૹ', '૿'),
+];
+
+pub const GURMUKHI: &'static [(char, char)] = &[
+  ('ਁ', 'ਃ'), ('ਅ', 'ਊ'), ('ਏ', 'ਐ'), ('ਓ', 'ਨ'),
+  ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'), ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'),
+  ('਼', '਼'), ('ਾ', 'ੂ'), ('ੇ', 'ੈ'), ('ੋ', '੍'),
+  ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'), ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'),
+];
+
+pub const HAN: &'static [(char, char)] = &[
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), ('々', '々'),
+  ('〇', '〇'), ('〡', '〩'), ('〸', '〻'), ('㐀', '䶵'),
+  ('一', '鿪'), ('豈', '舘'), ('並', '龎'), ('𠀀', '𪛖'),
+  ('𪜀', '𫜴'), ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'),
+  ('丽', '𪘀'),
+];
+
+pub const HANGUL: &'static [(char, char)] = &[
+  ('ᄀ', 'ᇿ'), ('〮', '〯'), ('ㄱ', 'ㆎ'), ('㈀', '㈞'),
+  ('㉠', '㉾'), ('ꥠ', 'ꥼ'), ('가', '힣'), ('ힰ', 'ퟆ'),
+  ('ퟋ', 'ퟻ'), ('ﾠ', 'ﾾ'), ('ￂ', 'ￇ'), ('ￊ', 'ￏ'),
+  ('ￒ', 'ￗ'), ('ￚ', 'ￜ'),
+];
+
+pub const HANUNOO: &'static [(char, char)] = &[
+  ('ᜠ', '᜴'),
+];
+
+pub const HATRAN: &'static [(char, char)] = &[
+  ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿'),
+];
+
+pub const HEBREW: &'static [(char, char)] = &[
+  ('֑', 'ׇ'), ('א', 'ת'), ('װ', '״'), ('יִ', 'זּ'), ('טּ', 'לּ'),
+  ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﭏ'),
+];
+
+pub const HIRAGANA: &'static [(char, char)] = &[
+  ('ぁ', 'ゖ'), ('ゝ', 'ゟ'), ('𛀁', '𛄞'), ('🈀', '🈀'),
+];
+
+pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = &[
+  ('𐡀', '𐡕'), ('𐡗', '𐡟'),
+];
+
+pub const INHERITED: &'static [(char, char)] = &[
+  ('̀', 'ͯ'), ('҅', '҆'), ('ً', 'ٕ'), ('ٰ', 'ٰ'), ('॑', '॒'),
+  ('᪰', '᪾'), ('᳐', '᳒'), ('᳔', '᳠'), ('᳢', '᳨'),
+  ('᳭', '᳭'), ('᳴', '᳴'), ('᳸', '᳹'), ('᷀', '᷹'),
+  ('᷻', '᷿'), ('\u{200c}', '\u{200d}'), ('⃐', '⃰'), ('〪', '〭'),
+  ('゙', '゚'), ('︀', '️'), ('︠', '︭'), ('𐇽', '𐇽'),
+  ('𐋠', '𐋠'), ('𝅧', '𝅩'), ('𝅻', '𝆂'), ('𝆅', '𝆋'),
+  ('𝆪', '𝆭'), ('󠄀', '󠇯'),
+];
+
+pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] = &[
+  ('𐭠', '𐭲'), ('𐭸', '𐭿'),
+];
+
+pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] = &[
+  ('𐭀', '𐭕'), ('𐭘', '𐭟'),
+];
+
+pub const JAVANESE: &'static [(char, char)] = &[
+  ('ꦀ', '꧍'), ('꧐', '꧙'), ('꧞', '꧟'),
+];
+
+pub const KAITHI: &'static [(char, char)] = &[
+  ('𑂀', '𑃁'),
+];
+
+pub const KANNADA: &'static [(char, char)] = &[
+  ('ಀ', 'ಃ'), ('ಅ', 'ಌ'), ('ಎ', 'ಐ'), ('ಒ', 'ನ'),
+  ('ಪ', 'ಳ'), ('ವ', 'ಹ'), ('಼', 'ೄ'), ('ೆ', 'ೈ'),
+  ('ೊ', '್'), ('ೕ', 'ೖ'), ('ೞ', 'ೞ'), ('ೠ', 'ೣ'),
+  ('೦', '೯'), ('ೱ', 'ೲ'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+  ('ァ', 'ヺ'), ('ヽ', 'ヿ'), ('ㇰ', 'ㇿ'), ('㋐', '㋾'),
+  ('㌀', '㍗'), ('ｦ', 'ｯ'), ('ｱ', 'ﾝ'), ('𛀀', '𛀀'),
+];
+
+pub const KAYAH_LI: &'static [(char, char)] = &[
+  ('꤀', '꤭'), ('꤯', '꤯'),
+];
+
+pub const KHAROSHTHI: &'static [(char, char)] = &[
+  ('𐨀', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨓'), ('𐨕', '𐨗'),
+  ('𐨙', '𐨳'), ('𐨸', '𐨺'), ('𐨿', '𐩇'), ('𐩐', '𐩘'),
+];
+
+pub const KHMER: &'static [(char, char)] = &[
+  ('ក', '៝'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿'),
+];
+
+pub const KHOJKI: &'static [(char, char)] = &[
+  ('𑈀', '𑈑'), ('𑈓', '𑈾'),
+];
+
+pub const KHUDAWADI: &'static [(char, char)] = &[
+  ('𑊰', '𑋪'), ('𑋰', '𑋹'),
+];
+
+pub const LAO: &'static [(char, char)] = &[
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໟ'),
+];
+
+pub const LATIN: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('º', 'º'), ('À', 'Ö'),
+  ('Ø', 'ö'), ('ø', 'ʸ'), ('ˠ', 'ˤ'), ('ᴀ', 'ᴥ'), ('ᴬ', 'ᵜ'),
+  ('ᵢ', 'ᵥ'), ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶾ'), ('Ḁ', 'ỿ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('K', 'Å'),
+  ('Ⅎ', 'Ⅎ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'), ('Ⱡ', 'Ɀ'),
+  ('Ꜣ', 'ꞇ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'), ('ꟷ', 'ꟿ'),
+  ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭤ'), ('ﬀ', 'ﬆ'), ('Ａ', 'Ｚ'),
+  ('ａ', 'ｚ'),
+];
+
+pub const LEPCHA: &'static [(char, char)] = &[
+  ('ᰀ', '᰷'), ('᰻', '᱉'), ('ᱍ', 'ᱏ'),
+];
+
+pub const LIMBU: &'static [(char, char)] = &[
+  ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'), ('᥀', '᥀'),
+  ('᥄', '᥏'),
+];
+
+pub const LINEAR_A: &'static [(char, char)] = &[
+  ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'),
+];
+
+pub const LINEAR_B: &'static [(char, char)] = &[
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'),
+];
+
+pub const LISU: &'static [(char, char)] = &[
+  ('ꓐ', '꓿'),
+];
+
+pub const LYCIAN: &'static [(char, char)] = &[
+  ('𐊀', '𐊜'),
+];
+
+pub const LYDIAN: &'static [(char, char)] = &[
+  ('𐤠', '𐤹'), ('𐤿', '𐤿'),
+];
+
+pub const MAHAJANI: &'static [(char, char)] = &[
+  ('𑅐', '𑅶'),
+];
+
+pub const MALAYALAM: &'static [(char, char)] = &[
+  ('ഀ', 'ഃ'), ('അ', 'ഌ'), ('എ', 'ഐ'), ('ഒ', 'ൄ'),
+  ('െ', 'ൈ'), ('ൊ', '൏'), ('ൔ', 'ൣ'), ('൦', 'ൿ'),
+];
+
+pub const MANDAIC: &'static [(char, char)] = &[
+  ('ࡀ', '࡛'), ('࡞', '࡞'),
+];
+
+pub const MANICHAEAN: &'static [(char, char)] = &[
+  ('𐫀', '𐫦'), ('𐫫', '𐫶'),
+];
+
+pub const MARCHEN: &'static [(char, char)] = &[
+  ('𑱰', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+];
+
+pub const MASARAM_GONDI: &'static [(char, char)] = &[
+  ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'), ('𑴺', '𑴺'),
+  ('𑴼', '𑴽'), ('𑴿', '𑵇'), ('𑵐', '𑵙'),
+];
+
+pub const MEETEI_MAYEK: &'static [(char, char)] = &[
+  ('ꫠ', '꫶'), ('ꯀ', '꯭'), ('꯰', '꯹'),
+];
+
+pub const MENDE_KIKAKUI: &'static [(char, char)] = &[
+  ('𞠀', '𞣄'), ('𞣇', '𞣖'),
+];
+
+pub const MEROITIC_CURSIVE: &'static [(char, char)] = &[
+  ('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿'),
+];
+
+pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𐦀', '𐦟'),
+];
+
+pub const MIAO: &'static [(char, char)] = &[
+  ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾏', '𖾟'),
+];
+
+pub const MODI: &'static [(char, char)] = &[
+  ('𑘀', '𑙄'), ('𑙐', '𑙙'),
+];
+
+pub const MONGOLIAN: &'static [(char, char)] = &[
+  ('᠀', '᠁'), ('᠄', '᠄'), ('᠆', '\u{180e}'), ('᠐', '᠙'),
+  ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢪ'), ('𑙠', '𑙬'),
+];
+
+pub const MRO: &'static [(char, char)] = &[
+  ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯'),
+];
+
+pub const MULTANI: &'static [(char, char)] = &[
+  ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'), ('𑊏', '𑊝'),
+  ('𑊟', '𑊩'),
+];
+
+pub const MYANMAR: &'static [(char, char)] = &[
+  ('က', '႟'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'),
+];
+
+pub const NABATAEAN: &'static [(char, char)] = &[
+  ('𐢀', '𐢞'), ('𐢧', '𐢯'),
+];
+
+pub const NEW_TAI_LUE: &'static [(char, char)] = &[
+  ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟'),
+];
+
+pub const NEWA: &'static [(char, char)] = &[
+  ('𑐀', '𑑙'), ('𑑛', '𑑛'), ('𑑝', '𑑝'),
+];
+
+pub const NKO: &'static [(char, char)] = &[
+  ('߀', 'ߺ'),
+];
+
+pub const NUSHU: &'static [(char, char)] = &[
+  ('𖿡', '𖿡'), ('𛅰', '𛋻'),
+];
+
+pub const OGHAM: &'static [(char, char)] = &[
+  ('\u{1680}', '᚜'),
+];
+
+pub const OL_CHIKI: &'static [(char, char)] = &[
+  ('᱐', '᱿'),
+];
+
+pub const OLD_HUNGARIAN: &'static [(char, char)] = &[
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿'),
+];
+
+pub const OLD_ITALIC: &'static [(char, char)] = &[
+  ('𐌀', '𐌣'), ('𐌭', '𐌯'),
+];
+
+pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[
+  ('𐪀', '𐪟'),
+];
+
+pub const OLD_PERMIC: &'static [(char, char)] = &[
+  ('𐍐', '𐍺'),
+];
+
+pub const OLD_PERSIAN: &'static [(char, char)] = &[
+  ('𐎠', '𐏃'), ('𐏈', '𐏕'),
+];
+
+pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[
+  ('𐩠', '𐩿'),
+];
+
+pub const OLD_TURKIC: &'static [(char, char)] = &[
+  ('𐰀', '𐱈'),
+];
+
+pub const ORIYA: &'static [(char, char)] = &[
+  ('ଁ', 'ଃ'), ('ଅ', 'ଌ'), ('ଏ', 'ଐ'), ('ଓ', 'ନ'),
+  ('ପ', 'ର'), ('ଲ', 'ଳ'), ('ଵ', 'ହ'), ('଼', 'ୄ'),
+  ('େ', 'ୈ'), ('ୋ', '୍'), ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'),
+  ('ୟ', 'ୣ'), ('୦', '୷'),
+];
+
+pub const OSAGE: &'static [(char, char)] = &[
+  ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+];
+
+pub const OSMANYA: &'static [(char, char)] = &[
+  ('𐒀', '𐒝'), ('𐒠', '𐒩'),
+];
+
+pub const PAHAWH_HMONG: &'static [(char, char)] = &[
+  ('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'),
+];
+
+pub const PALMYRENE: &'static [(char, char)] = &[
+  ('𐡠', '𐡿'),
+];
+
+pub const PAU_CIN_HAU: &'static [(char, char)] = &[
+  ('𑫀', '𑫸'),
+];
+
+pub const PHAGS_PA: &'static [(char, char)] = &[
+  ('ꡀ', '꡷'),
+];
+
+pub const PHOENICIAN: &'static [(char, char)] = &[
+  ('𐤀', '𐤛'), ('𐤟', '𐤟'),
+];
+
+pub const PSALTER_PAHLAVI: &'static [(char, char)] = &[
+  ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯'),
+];
+
+pub const REJANG: &'static [(char, char)] = &[
+  ('ꤰ', '꥓'), ('꥟', '꥟'),
+];
+
+pub const RUNIC: &'static [(char, char)] = &[
+  ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'),
+];
+
+pub const SAMARITAN: &'static [(char, char)] = &[
+  ('ࠀ', '࠭'), ('࠰', '࠾'),
+];
+
+pub const SAURASHTRA: &'static [(char, char)] = &[
+  ('ꢀ', 'ꣅ'), ('꣎', '꣙'),
+];
+
+pub const SHARADA: &'static [(char, char)] = &[
+  ('𑆀', '𑇍'), ('𑇐', '𑇟'),
+];
+
+pub const SHAVIAN: &'static [(char, char)] = &[
+  ('𐑐', '𐑿'),
+];
+
+pub const SIDDHAM: &'static [(char, char)] = &[
+  ('𑖀', '𑖵'), ('𑖸', '𑗝'),
+];
+
+pub const SIGNWRITING: &'static [(char, char)] = &[
+  ('𝠀', '𝪋'), ('𝪛', '𝪟'), ('𝪡', '𝪯'),
+];
+
+pub const SINHALA: &'static [(char, char)] = &[
+  ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'), ('ඳ', 'ර'),
+  ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'), ('ා', 'ු'),
+  ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'), ('ෲ', '෴'),
+  ('𑇡', '𑇴'),
+];
+
+pub const SORA_SOMPENG: &'static [(char, char)] = &[
+  ('𑃐', '𑃨'), ('𑃰', '𑃹'),
+];
+
+pub const SOYOMBO: &'static [(char, char)] = &[
+  ('𑩐', '𑪃'), ('𑪆', '𑪜'), ('𑪞', '𑪢'),
+];
+
+pub const SUNDANESE: &'static [(char, char)] = &[
+  ('ᮀ', 'ᮿ'), ('᳀', '᳇'),
+];
+
+pub const SYLOTI_NAGRI: &'static [(char, char)] = &[
+  ('ꠀ', '꠫'),
+];
+
+pub const SYRIAC: &'static [(char, char)] = &[
+  ('܀', '܍'), ('\u{70f}', '݊'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ'),
+];
+
+pub const TAGALOG: &'static [(char, char)] = &[
+  ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'),
+];
+
+pub const TAGBANWA: &'static [(char, char)] = &[
+  ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ᝲ', 'ᝳ'),
+];
+
+pub const TAI_LE: &'static [(char, char)] = &[
+  ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'),
+];
+
+pub const TAI_THAM: &'static [(char, char)] = &[
+  ('ᨠ', 'ᩞ'), ('᩠', '᩼'), ('᩿', '᪉'), ('᪐', '᪙'),
+  ('᪠', '᪭'),
+];
+
+pub const TAI_VIET: &'static [(char, char)] = &[
+  ('ꪀ', 'ꫂ'), ('ꫛ', '꫟'),
+];
+
+pub const TAKRI: &'static [(char, char)] = &[
+  ('𑚀', '𑚷'), ('𑛀', '𑛉'),
+];
+
+pub const TAMIL: &'static [(char, char)] = &[
+  ('ஂ', 'ஃ'), ('அ', 'ஊ'), ('எ', 'ஐ'), ('ஒ', 'க'),
+  ('ங', 'ச'), ('ஜ', 'ஜ'), ('ஞ', 'ட'), ('ண', 'த'),
+  ('ந', 'ப'), ('ம', 'ஹ'), ('ா', 'ூ'), ('ெ', 'ை'),
+  ('ொ', '்'), ('ௐ', 'ௐ'), ('ௗ', 'ௗ'), ('௦', '௺'),
+];
+
+pub const TANGUT: &'static [(char, char)] = &[
+  ('𖿠', '𖿠'), ('𗀀', '𘟬'), ('𘠀', '𘫲'),
+];
+
+pub const TELUGU: &'static [(char, char)] = &[
+  ('ఀ', 'ః'), ('అ', 'ఌ'), ('ఎ', 'ఐ'), ('ఒ', 'న'),
+  ('ప', 'హ'), ('ఽ', 'ౄ'), ('ె', 'ై'), ('ొ', '్'),
+  ('ౕ', 'ౖ'), ('ౘ', 'ౚ'), ('ౠ', 'ౣ'), ('౦', '౯'),
+  ('౸', '౿'),
+];
+
+pub const THAANA: &'static [(char, char)] = &[
+  ('ހ', 'ޱ'),
+];
+
+pub const THAI: &'static [(char, char)] = &[
+  ('ก', 'ฺ'), ('เ', '๛'),
+];
+
+pub const TIBETAN: &'static [(char, char)] = &[
+  ('ༀ', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', 'ྗ'), ('ྙ', 'ྼ'),
+  ('྾', '࿌'), ('࿎', '࿔'), ('࿙', '࿚'),
+];
+
+pub const TIFINAGH: &'static [(char, char)] = &[
+  ('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('⵿', '⵿'),
+];
+
+pub const TIRHUTA: &'static [(char, char)] = &[
+  ('𑒀', '𑓇'), ('𑓐', '𑓙'),
+];
+
+pub const UGARITIC: &'static [(char, char)] = &[
+  ('𐎀', '𐎝'), ('𐎟', '𐎟'),
+];
+
+pub const VAI: &'static [(char, char)] = &[
+  ('ꔀ', 'ꘫ'),
+];
+
+pub const WARANG_CITI: &'static [(char, char)] = &[
+  ('𑢠', '𑣲'), ('𑣿', '𑣿'),
+];
+
+pub const YI: &'static [(char, char)] = &[
+  ('ꀀ', 'ꒌ'), ('꒐', '꓆'),
+];
+
+pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[
+  ('𑨀', '𑩇'),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex-syntax/src/unicode_tables/script_extension.rs
@@ -0,0 +1,785 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate script-extension tmp/ucd-10.0.0/ --chars
+//
+// ucd-generate is available on crates.io.
+
+pub const BY_NAME: &'static [(&'static str, &'static [(char, char)])] = &[
+  ("Adlam", ADLAM), ("Ahom", AHOM),
+  ("Anatolian_Hieroglyphs", ANATOLIAN_HIEROGLYPHS), ("Arabic", ARABIC),
+  ("Armenian", ARMENIAN), ("Avestan", AVESTAN), ("Balinese", BALINESE),
+  ("Bamum", BAMUM), ("Bassa_Vah", BASSA_VAH), ("Batak", BATAK),
+  ("Bengali", BENGALI), ("Bhaiksuki", BHAIKSUKI), ("Bopomofo", BOPOMOFO),
+  ("Brahmi", BRAHMI), ("Braille", BRAILLE), ("Buginese", BUGINESE),
+  ("Buhid", BUHID), ("Canadian_Aboriginal", CANADIAN_ABORIGINAL),
+  ("Carian", CARIAN), ("Caucasian_Albanian", CAUCASIAN_ALBANIAN),
+  ("Chakma", CHAKMA), ("Cham", CHAM), ("Cherokee", CHEROKEE),
+  ("Common", COMMON), ("Coptic", COPTIC), ("Cuneiform", CUNEIFORM),
+  ("Cypriot", CYPRIOT), ("Cyrillic", CYRILLIC), ("Deseret", DESERET),
+  ("Devanagari", DEVANAGARI), ("Duployan", DUPLOYAN),
+  ("Egyptian_Hieroglyphs", EGYPTIAN_HIEROGLYPHS), ("Elbasan", ELBASAN),
+  ("Ethiopic", ETHIOPIC), ("Georgian", GEORGIAN), ("Glagolitic", GLAGOLITIC),
+  ("Gothic", GOTHIC), ("Grantha", GRANTHA), ("Greek", GREEK),
+  ("Gujarati", GUJARATI), ("Gurmukhi", GURMUKHI), ("Han", HAN),
+  ("Hangul", HANGUL), ("Hanunoo", HANUNOO), ("Hatran", HATRAN),
+  ("Hebrew", HEBREW), ("Hiragana", HIRAGANA),
+  ("Imperial_Aramaic", IMPERIAL_ARAMAIC), ("Inherited", INHERITED),
+  ("Inscriptional_Pahlavi", INSCRIPTIONAL_PAHLAVI),
+  ("Inscriptional_Parthian", INSCRIPTIONAL_PARTHIAN), ("Javanese", JAVANESE),
+  ("Kaithi", KAITHI), ("Kannada", KANNADA), ("Katakana", KATAKANA),
+  ("Kayah_Li", KAYAH_LI), ("Kharoshthi", KHAROSHTHI), ("Khmer", KHMER),
+  ("Khojki", KHOJKI), ("Khudawadi", KHUDAWADI), ("Lao", LAO),
+  ("Latin", LATIN), ("Lepcha", LEPCHA), ("Limbu", LIMBU),
+  ("Linear_A", LINEAR_A), ("Linear_B", LINEAR_B), ("Lisu", LISU),
+  ("Lycian", LYCIAN), ("Lydian", LYDIAN), ("Mahajani", MAHAJANI),
+  ("Malayalam", MALAYALAM), ("Mandaic", MANDAIC), ("Manichaean", MANICHAEAN),
+  ("Marchen", MARCHEN), ("Masaram_Gondi", MASARAM_GONDI),
+  ("Meetei_Mayek", MEETEI_MAYEK), ("Mende_Kikakui", MENDE_KIKAKUI),
+  ("Meroitic_Cursive", MEROITIC_CURSIVE),
+  ("Meroitic_Hieroglyphs", MEROITIC_HIEROGLYPHS), ("Miao", MIAO),
+  ("Modi", MODI), ("Mongolian", MONGOLIAN), ("Mro", MRO),
+  ("Multani", MULTANI), ("Myanmar", MYANMAR), ("Nabataean", NABATAEAN),
+  ("New_Tai_Lue", NEW_TAI_LUE), ("Newa", NEWA), ("Nko", NKO),
+  ("Nushu", NUSHU), ("Ogham", OGHAM), ("Ol_Chiki", OL_CHIKI),
+  ("Old_Hungarian", OLD_HUNGARIAN), ("Old_Italic", OLD_ITALIC),
+  ("Old_North_Arabian", OLD_NORTH_ARABIAN), ("Old_Permic", OLD_PERMIC),
+  ("Old_Persian", OLD_PERSIAN), ("Old_South_Arabian", OLD_SOUTH_ARABIAN),
+  ("Old_Turkic", OLD_TURKIC), ("Oriya", ORIYA), ("Osage", OSAGE),
+  ("Osmanya", OSMANYA), ("Pahawh_Hmong", PAHAWH_HMONG),
+  ("Palmyrene", PALMYRENE), ("Pau_Cin_Hau", PAU_CIN_HAU),
+  ("Phags_Pa", PHAGS_PA), ("Phoenician", PHOENICIAN),
+  ("Psalter_Pahlavi", PSALTER_PAHLAVI), ("Rejang", REJANG), ("Runic", RUNIC),
+  ("Samaritan", SAMARITAN), ("Saurashtra", SAURASHTRA), ("Sharada", SHARADA),
+  ("Shavian", SHAVIAN), ("Siddham", SIDDHAM), ("SignWriting", SIGNWRITING),
+  ("Sinhala", SINHALA), ("Sora_Sompeng", SORA_SOMPENG), ("Soyombo", SOYOMBO),
+  ("Sundanese", SUNDANESE), ("Syloti_Nagri", SYLOTI_NAGRI),
+  ("Syriac", SYRIAC), ("Tagalog", TAGALOG), ("Tagbanwa", TAGBANWA),
+  ("Tai_Le", TAI_LE), ("Tai_Tham", TAI_THAM), ("Tai_Viet", TAI_VIET),
+  ("Takri", TAKRI), ("Tamil", TAMIL), ("Tangut", TANGUT), ("Telugu", TELUGU),
+  ("Thaana", THAANA), ("Thai", THAI), ("Tibetan", TIBETAN),
+  ("Tifinagh", TIFINAGH), ("Tirhuta", TIRHUTA), ("Ugaritic", UGARITIC),
+  ("Vai", VAI), ("Warang_Citi", WARANG_CITI), ("Yi", YI),
+  ("Zanabazar_Square", ZANABAZAR_SQUARE),
+];
+
+pub const ADLAM: &'static [(char, char)] = &[
+  ('ـ', 'ـ'), ('𞤀', '𞥊'), ('𞥐', '𞥙'), ('𞥞', '𞥟'),
+];
+
+pub const AHOM: &'static [(char, char)] = &[
+  ('𑜀', '𑜙'), ('𑜝', '𑜫'), ('𑜰', '𑜿'),
+];
+
+pub const ANATOLIAN_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𔐀', '𔙆'),
+];
+
+pub const ARABIC: &'static [(char, char)] = &[
+  ('\u{600}', '\u{604}'), ('؆', '\u{61c}'), ('؞', 'ۜ'), ('۞', 'ۿ'),
+  ('ݐ', 'ݿ'), ('ࢠ', 'ࢴ'), ('ࢶ', 'ࢽ'), ('ࣔ', '࣡'),
+  ('ࣣ', 'ࣿ'), ('ﭐ', '﯁'), ('ﯓ', 'ﴽ'), ('ﵐ', 'ﶏ'),
+  ('ﶒ', 'ﷇ'), ('ﷰ', '﷽'), ('ﹰ', 'ﹴ'), ('ﹶ', 'ﻼ'),
+  ('𐋠', '𐋻'), ('𐹠', '𐹾'), ('𞸀', '𞸃'), ('𞸅', '𞸟'),
+  ('𞸡', '𞸢'), ('𞸤', '𞸤'), ('𞸧', '𞸧'), ('𞸩', '𞸲'),
+  ('𞸴', '𞸷'), ('𞸹', '𞸹'), ('𞸻', '𞸻'), ('𞹂', '𞹂'),
+  ('𞹇', '𞹇'), ('𞹉', '𞹉'), ('𞹋', '𞹋'), ('𞹍', '𞹏'),
+  ('𞹑', '𞹒'), ('𞹔', '𞹔'), ('𞹗', '𞹗'), ('𞹙', '𞹙'),
+  ('𞹛', '𞹛'), ('𞹝', '𞹝'), ('𞹟', '𞹟'), ('𞹡', '𞹢'),
+  ('𞹤', '𞹤'), ('𞹧', '𞹪'), ('𞹬', '𞹲'), ('𞹴', '𞹷'),
+  ('𞹹', '𞹼'), ('𞹾', '𞹾'), ('𞺀', '𞺉'), ('𞺋', '𞺛'),
+  ('𞺡', '𞺣'), ('𞺥', '𞺩'), ('𞺫', '𞺻'), ('𞻰', '𞻱'),
+];
+
+pub const ARMENIAN: &'static [(char, char)] = &[
+  ('Ա', 'Ֆ'), ('ՙ', '՟'), ('ա', 'և'), ('։', '֊'), ('֍', '֏'),
+  ('ﬓ', 'ﬗ'),
+];
+
+pub const AVESTAN: &'static [(char, char)] = &[
+  ('𐬀', '𐬵'), ('𐬹', '𐬿'),
+];
+
+pub const BALINESE: &'static [(char, char)] = &[
+  ('ᬀ', 'ᭋ'), ('᭐', '᭼'),
+];
+
+pub const BAMUM: &'static [(char, char)] = &[
+  ('ꚠ', '꛷'), ('𖠀', '𖨸'),
+];
+
+pub const BASSA_VAH: &'static [(char, char)] = &[
+  ('𖫐', '𖫭'), ('𖫰', '𖫵'),
+];
+
+pub const BATAK: &'static [(char, char)] = &[
+  ('ᯀ', '᯳'), ('᯼', '᯿'),
+];
+
+pub const BENGALI: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ঀ', 'ঃ'), ('অ', 'ঌ'),
+  ('এ', 'ঐ'), ('ও', 'ন'), ('প', 'র'), ('ল', 'ল'),
+  ('শ', 'হ'), ('়', 'ৄ'), ('ে', 'ৈ'), ('ো', 'ৎ'),
+  ('ৗ', 'ৗ'), ('ড়', 'ঢ়'), ('য়', 'ৣ'), ('০', '৽'),
+  ('᳷', '᳷'), ('꣱', '꣱'),
+];
+
+pub const BHAIKSUKI: &'static [(char, char)] = &[
+  ('𑰀', '𑰈'), ('𑰊', '𑰶'), ('𑰸', '𑱅'), ('𑱐', '𑱬'),
+];
+
+pub const BOPOMOFO: &'static [(char, char)] = &[
+  ('˪', '˫'), ('、', '〃'), ('〈', '】'), ('〓', '〟'),
+  ('〪', '〭'), ('〰', '〰'), ('〷', '〷'), ('・', '・'),
+  ('ㄅ', 'ㄮ'), ('ㆠ', 'ㆺ'), ('﹅', '﹆'), ('｡', '･'),
+];
+
+pub const BRAHMI: &'static [(char, char)] = &[
+  ('𑀀', '𑁍'), ('𑁒', '𑁯'), ('𑁿', '𑁿'),
+];
+
+pub const BRAILLE: &'static [(char, char)] = &[
+  ('⠀', '⣿'),
+];
+
+pub const BUGINESE: &'static [(char, char)] = &[
+  ('ᨀ', 'ᨛ'), ('᨞', '᨟'), ('ꧏ', 'ꧏ'),
+];
+
+pub const BUHID: &'static [(char, char)] = &[
+  ('᜵', '᜶'), ('ᝀ', 'ᝓ'),
+];
+
+pub const CANADIAN_ABORIGINAL: &'static [(char, char)] = &[
+  ('᐀', 'ᙿ'), ('ᢰ', 'ᣵ'),
+];
+
+pub const CARIAN: &'static [(char, char)] = &[
+  ('𐊠', '𐋐'),
+];
+
+pub const CAUCASIAN_ALBANIAN: &'static [(char, char)] = &[
+  ('𐔰', '𐕣'), ('𐕯', '𐕯'),
+];
+
+pub const CHAKMA: &'static [(char, char)] = &[
+  ('০', '৯'), ('၀', '၉'), ('𑄀', '𑄴'), ('𑄶', '𑅃'),
+];
+
+pub const CHAM: &'static [(char, char)] = &[
+  ('ꨀ', 'ꨶ'), ('ꩀ', 'ꩍ'), ('꩐', '꩙'), ('꩜', '꩟'),
+];
+
+pub const CHEROKEE: &'static [(char, char)] = &[
+  ('Ꭰ', 'Ᏽ'), ('ᏸ', 'ᏽ'), ('ꭰ', 'ꮿ'),
+];
+
+pub const COMMON: &'static [(char, char)] = &[
+  ('\u{0}', '@'), ('[', '`'), ('{', '©'), ('«', '¹'), ('»', '¿'),
+  ('×', '×'), ('÷', '÷'), ('ʹ', '˟'), ('˥', '˩'), ('ˬ', '˿'),
+  ('ʹ', 'ʹ'), (';', ';'), ('΅', '΅'), ('·', '·'),
+  ('\u{605}', '\u{605}'), ('\u{6dd}', '\u{6dd}'), ('\u{8e2}', '\u{8e2}'),
+  ('฿', '฿'), ('࿕', '࿘'), ('᛫', '᛭'), ('\u{2000}', '\u{200b}'),
+  ('\u{200e}', '\u{2064}'), ('\u{2066}', '⁰'), ('⁴', '⁾'),
+  ('₀', '₎'), ('₠', '₿'), ('℀', '℥'), ('℧', '℩'),
+  ('ℬ', 'ℱ'), ('ℳ', '⅍'), ('⅏', '⅟'), ('↉', '↋'),
+  ('←', '␦'), ('⑀', '⑊'), ('①', '⟿'), ('⤀', '⭳'),
+  ('⭶', '⮕'), ('⮘', '⮹'), ('⮽', '⯈'), ('⯊', '⯒'),
+  ('⯬', '⯯'), ('⸀', '⹂'), ('⹄', '⹉'), ('⿰', '⿻'),
+  ('\u{3000}', '\u{3000}'), ('〄', '〄'), ('〒', '〒'), ('〠', '〠'),
+  ('〶', '〶'), ('㉈', '㉟'), ('㉿', '㉿'), ('㊱', '㊿'),
+  ('㋌', '㋏'), ('㍱', '㍺'), ('㎀', '㏟'), ('㏿', '㏿'),
+  ('䷀', '䷿'), ('꜀', '꜡'), ('ꞈ', '꞊'), ('꭛', '꭛'),
+  ('﴾', '﴿'), ('︐', '︙'), ('︰', '﹄'), ('﹇', '﹒'),
+  ('﹔', '﹦'), ('﹨', '﹫'), ('\u{feff}', '\u{feff}'), ('！', '＠'),
+  ('［', '｀'), ('｛', '｠'), ('￠', '￦'), ('￨', '￮'),
+  ('\u{fff9}', '�'), ('𐆐', '𐆛'), ('𐇐', '𐇼'), ('𝀀', '𝃵'),
+  ('𝄀', '𝄦'), ('𝄩', '𝅦'), ('𝅪', '\u{1d17a}'), ('𝆃', '𝆄'),
+  ('𝆌', '𝆩'), ('𝆮', '𝇨'), ('𝌀', '𝍖'), ('𝐀', '𝑔'),
+  ('𝑖', '𝒜'), ('𝒞', '𝒟'), ('𝒢', '𝒢'), ('𝒥', '𝒦'),
+  ('𝒩', '𝒬'), ('𝒮', '𝒹'), ('𝒻', '𝒻'), ('𝒽', '𝓃'),
+  ('𝓅', '𝔅'), ('𝔇', '𝔊'), ('𝔍', '𝔔'), ('𝔖', '𝔜'),
+  ('𝔞', '𝔹'), ('𝔻', '𝔾'), ('𝕀', '𝕄'), ('𝕆', '𝕆'),
+  ('𝕊', '𝕐'), ('𝕒', '𝚥'), ('𝚨', '𝟋'), ('𝟎', '𝟿'),
+  ('🀀', '🀫'), ('🀰', '🂓'), ('🂠', '🂮'), ('🂱', '🂿'),
+  ('🃁', '🃏'), ('🃑', '🃵'), ('🄀', '🄌'), ('🄐', '🄮'),
+  ('🄰', '🅫'), ('🅰', '🆬'), ('🇦', '🇿'), ('🈁', '🈂'),
+  ('🈐', '🈻'), ('🉀', '🉈'), ('🉠', '🉥'), ('🌀', '🛔'),
+  ('🛠', '🛬'), ('🛰', '🛸'), ('🜀', '🝳'), ('🞀', '🟔'),
+  ('🠀', '🠋'), ('🠐', '🡇'), ('🡐', '🡙'), ('🡠', '🢇'),
+  ('🢐', '🢭'), ('🤀', '🤋'), ('🤐', '🤾'), ('🥀', '🥌'),
+  ('🥐', '🥫'), ('🦀', '🦗'), ('🧀', '🧀'), ('🧐', '🧦'),
+  ('\u{e0001}', '\u{e0001}'), ('\u{e0020}', '\u{e007f}'),
+];
+
+pub const COPTIC: &'static [(char, char)] = &[
+  ('Ϣ', 'ϯ'), ('Ⲁ', 'ⳳ'), ('⳹', '⳿'), ('𐋠', '𐋻'),
+];
+
+pub const CUNEIFORM: &'static [(char, char)] = &[
+  ('𒀀', '𒎙'), ('𒐀', '𒑮'), ('𒑰', '𒑴'), ('𒒀', '𒕃'),
+];
+
+pub const CYPRIOT: &'static [(char, char)] = &[
+  ('𐄀', '𐄂'), ('𐄇', '𐄳'), ('𐄷', '𐄿'), ('𐠀', '𐠅'),
+  ('𐠈', '𐠈'), ('𐠊', '𐠵'), ('𐠷', '𐠸'), ('𐠼', '𐠼'),
+  ('𐠿', '𐠿'),
+];
+
+pub const CYRILLIC: &'static [(char, char)] = &[
+  ('Ѐ', 'ԯ'), ('ᲀ', 'ᲈ'), ('ᴫ', 'ᴫ'), ('ᵸ', 'ᵸ'),
+  ('ⷠ', 'ⷿ'), ('⹃', '⹃'), ('Ꙁ', 'ꚟ'), ('︮', '︯'),
+];
+
+pub const DESERET: &'static [(char, char)] = &[
+  ('𐐀', '𐑏'),
+];
+
+pub const DEVANAGARI: &'static [(char, char)] = &[
+  ('ऀ', 'ॿ'), ('᳐', 'ᳶ'), ('᳸', '᳹'), ('⃰', '⃰'),
+  ('꠰', '꠹'), ('꣠', 'ꣽ'),
+];
+
+pub const DUPLOYAN: &'static [(char, char)] = &[
+  ('𛰀', '𛱪'), ('𛱰', '𛱼'), ('𛲀', '𛲈'), ('𛲐', '𛲙'),
+  ('𛲜', '\u{1bca3}'),
+];
+
+pub const EGYPTIAN_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𓀀', '𓐮'),
+];
+
+pub const ELBASAN: &'static [(char, char)] = &[
+  ('𐔀', '𐔧'),
+];
+
+pub const ETHIOPIC: &'static [(char, char)] = &[
+  ('ሀ', 'ቈ'), ('ቊ', 'ቍ'), ('ቐ', 'ቖ'), ('ቘ', 'ቘ'),
+  ('ቚ', 'ቝ'), ('በ', 'ኈ'), ('ኊ', 'ኍ'), ('ነ', 'ኰ'),
+  ('ኲ', 'ኵ'), ('ኸ', 'ኾ'), ('ዀ', 'ዀ'), ('ዂ', 'ዅ'),
+  ('ወ', 'ዖ'), ('ዘ', 'ጐ'), ('ጒ', 'ጕ'), ('ጘ', 'ፚ'),
+  ('፝', '፼'), ('ᎀ', '᎙'), ('ⶀ', 'ⶖ'), ('ⶠ', 'ⶦ'),
+  ('ⶨ', 'ⶮ'), ('ⶰ', 'ⶶ'), ('ⶸ', 'ⶾ'), ('ⷀ', 'ⷆ'),
+  ('ⷈ', 'ⷎ'), ('ⷐ', 'ⷖ'), ('ⷘ', 'ⷞ'), ('ꬁ', 'ꬆ'),
+  ('ꬉ', 'ꬎ'), ('ꬑ', 'ꬖ'), ('ꬠ', 'ꬦ'), ('ꬨ', 'ꬮ'),
+];
+
+pub const GEORGIAN: &'static [(char, char)] = &[
+  ('։', '։'), ('Ⴀ', 'Ⴥ'), ('Ⴧ', 'Ⴧ'), ('Ⴭ', 'Ⴭ'),
+  ('ა', 'ჿ'), ('ⴀ', 'ⴥ'), ('ⴧ', 'ⴧ'), ('ⴭ', 'ⴭ'),
+];
+
+pub const GLAGOLITIC: &'static [(char, char)] = &[
+  ('҄', '҄'), ('҇', '҇'), ('Ⰰ', 'Ⱞ'), ('ⰰ', 'ⱞ'), ('⹃', '⹃'),
+  ('꙯', '꙯'), ('𞀀', '𞀆'), ('𞀈', '𞀘'), ('𞀛', '𞀡'),
+  ('𞀣', '𞀤'), ('𞀦', '𞀪'),
+];
+
+pub const GOTHIC: &'static [(char, char)] = &[
+  ('𐌰', '𐍊'),
+];
+
+pub const GRANTHA: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ப', 'ப'), ('வ', 'வ'),
+  ('௦', '௲'), ('᳐', '᳐'), ('᳒', '᳓'), ('ᳲ', '᳴'),
+  ('᳸', '᳹'), ('⃰', '⃰'), ('𑌀', '𑌃'), ('𑌅', '𑌌'),
+  ('𑌏', '𑌐'), ('𑌓', '𑌨'), ('𑌪', '𑌰'), ('𑌲', '𑌳'),
+  ('𑌵', '𑌹'), ('𑌼', '𑍄'), ('𑍇', '𑍈'), ('𑍋', '𑍍'),
+  ('𑍐', '𑍐'), ('𑍗', '𑍗'), ('𑍝', '𑍣'), ('𑍦', '𑍬'),
+  ('𑍰', '𑍴'),
+];
+
+pub const GREEK: &'static [(char, char)] = &[
+  ('͂', '͂'), ('ͅ', 'ͅ'), ('Ͱ', 'ͳ'), ('͵', 'ͷ'), ('ͺ', 'ͽ'),
+  ('Ϳ', 'Ϳ'), ('΄', '΄'), ('Ά', 'Ά'), ('Έ', 'Ί'), ('Ό', 'Ό'),
+  ('Ύ', 'Ρ'), ('Σ', 'ϡ'), ('ϰ', 'Ͽ'), ('ᴦ', 'ᴪ'), ('ᵝ', 'ᵡ'),
+  ('ᵦ', 'ᵪ'), ('ᶿ', '᷁'), ('ἀ', 'ἕ'), ('Ἐ', 'Ἕ'),
+  ('ἠ', 'ὅ'), ('Ὀ', 'Ὅ'), ('ὐ', 'ὗ'), ('Ὑ', 'Ὑ'),
+  ('Ὓ', 'Ὓ'), ('Ὕ', 'Ὕ'), ('Ὗ', 'ώ'), ('ᾀ', 'ᾴ'),
+  ('ᾶ', 'ῄ'), ('ῆ', 'ΐ'), ('ῖ', 'Ί'), ('῝', '`'),
+  ('ῲ', 'ῴ'), ('ῶ', '῾'), ('Ω', 'Ω'), ('ꭥ', 'ꭥ'),
+  ('𐅀', '𐆎'), ('𐆠', '𐆠'), ('𝈀', '𝉅'),
+];
+
+pub const GUJARATI: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ઁ', 'ઃ'), ('અ', 'ઍ'),
+  ('એ', 'ઑ'), ('ઓ', 'ન'), ('પ', 'ર'), ('લ', 'ળ'),
+  ('વ', 'હ'), ('઼', 'ૅ'), ('ે', 'ૉ'), ('ો', '્'),
+  ('ૐ', 'ૐ'), ('ૠ', 'ૣ'), ('૦', '૱'), ('ૹ', '૿'),
+  ('꠰', '꠹'),
+];
+
+pub const GURMUKHI: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ਁ', 'ਃ'), ('ਅ', 'ਊ'),
+  ('ਏ', 'ਐ'), ('ਓ', 'ਨ'), ('ਪ', 'ਰ'), ('ਲ', 'ਲ਼'),
+  ('ਵ', 'ਸ਼'), ('ਸ', 'ਹ'), ('਼', '਼'), ('ਾ', 'ੂ'),
+  ('ੇ', 'ੈ'), ('ੋ', '੍'), ('ੑ', 'ੑ'), ('ਖ਼', 'ੜ'),
+  ('ਫ਼', 'ਫ਼'), ('੦', 'ੵ'), ('꠰', '꠹'),
+];
+
+pub const HAN: &'static [(char, char)] = &[
+  ('⺀', '⺙'), ('⺛', '⻳'), ('⼀', '⿕'), ('、', '〃'),
+  ('々', '】'), ('〓', '〟'), ('〡', '〭'), ('〰', '〰'),
+  ('〷', '〿'), ('・', '・'), ('㆐', '㆟'), ('㇀', '㇣'),
+  ('㈠', '㉇'), ('㊀', '㊰'), ('㋀', '㋋'), ('㍘', '㍰'),
+  ('㍻', '㍿'), ('㏠', '㏾'), ('㐀', '䶵'), ('一', '鿪'),
+  ('豈', '舘'), ('並', '龎'), ('﹅', '﹆'), ('｡', '･'),
+  ('𝍠', '𝍱'), ('🉐', '🉑'), ('𠀀', '𪛖'), ('𪜀', '𫜴'),
+  ('𫝀', '𫠝'), ('𫠠', '𬺡'), ('𬺰', '𮯠'), ('丽', '𪘀'),
+];
+
+pub const HANGUL: &'static [(char, char)] = &[
+  ('ᄀ', 'ᇿ'), ('、', '〃'), ('〈', '】'), ('〓', '〟'),
+  ('〮', '〰'), ('〷', '〷'), ('・', '・'), ('ㄱ', 'ㆎ'),
+  ('㈀', '㈞'), ('㉠', '㉾'), ('ꥠ', 'ꥼ'), ('가', '힣'),
+  ('ힰ', 'ퟆ'), ('ퟋ', 'ퟻ'), ('﹅', '﹆'), ('｡', '･'),
+  ('ﾠ', 'ﾾ'), ('ￂ', 'ￇ'), ('ￊ', 'ￏ'), ('ￒ', 'ￗ'),
+  ('ￚ', 'ￜ'),
+];
+
+pub const HANUNOO: &'static [(char, char)] = &[
+  ('ᜠ', '᜶'),
+];
+
+pub const HATRAN: &'static [(char, char)] = &[
+  ('𐣠', '𐣲'), ('𐣴', '𐣵'), ('𐣻', '𐣿'),
+];
+
+pub const HEBREW: &'static [(char, char)] = &[
+  ('֑', 'ׇ'), ('א', 'ת'), ('װ', '״'), ('יִ', 'זּ'), ('טּ', 'לּ'),
+  ('מּ', 'מּ'), ('נּ', 'סּ'), ('ףּ', 'פּ'), ('צּ', 'ﭏ'),
+];
+
+pub const HIRAGANA: &'static [(char, char)] = &[
+  ('、', '〃'), ('〈', '】'), ('〓', '〟'), ('〰', '〵'),
+  ('〷', '〷'), ('〼', '〽'), ('ぁ', 'ゖ'), ('゙', '゠'),
+  ('・', 'ー'), ('﹅', '﹆'), ('｡', '･'), ('ｰ', 'ｰ'),
+  ('ﾞ', 'ﾟ'), ('𛀁', '𛄞'), ('🈀', '🈀'),
+];
+
+pub const IMPERIAL_ARAMAIC: &'static [(char, char)] = &[
+  ('𐡀', '𐡕'), ('𐡗', '𐡟'),
+];
+
+pub const INHERITED: &'static [(char, char)] = &[
+  ('̀', '́'), ('̓', '̈́'), ('͆', '͢'), ('᪰', '᪾'), ('᷂', '᷹'),
+  ('᷻', '᷿'), ('\u{200c}', '\u{200d}'), ('⃐', '⃯'), ('︀', '️'),
+  ('︠', '︭'), ('𐇽', '𐇽'), ('𝅧', '𝅩'), ('𝅻', '𝆂'),
+  ('𝆅', '𝆋'), ('𝆪', '𝆭'), ('󠄀', '󠇯'),
+];
+
+pub const INSCRIPTIONAL_PAHLAVI: &'static [(char, char)] = &[
+  ('𐭠', '𐭲'), ('𐭸', '𐭿'),
+];
+
+pub const INSCRIPTIONAL_PARTHIAN: &'static [(char, char)] = &[
+  ('𐭀', '𐭕'), ('𐭘', '𐭟'),
+];
+
+pub const JAVANESE: &'static [(char, char)] = &[
+  ('ꦀ', '꧍'), ('ꧏ', '꧙'), ('꧞', '꧟'),
+];
+
+pub const KAITHI: &'static [(char, char)] = &[
+  ('०', '९'), ('꠰', '꠹'), ('𑂀', '𑃁'),
+];
+
+pub const KANNADA: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ಀ', 'ಃ'), ('ಅ', 'ಌ'),
+  ('ಎ', 'ಐ'), ('ಒ', 'ನ'), ('ಪ', 'ಳ'), ('ವ', 'ಹ'),
+  ('಼', 'ೄ'), ('ೆ', 'ೈ'), ('ೊ', '್'), ('ೕ', 'ೖ'),
+  ('ೞ', 'ೞ'), ('ೠ', 'ೣ'), ('೦', '೯'), ('ೱ', 'ೲ'),
+  ('᳚', '᳚'), ('ᳵ', 'ᳵ'), ('꠰', '꠵'),
+];
+
+pub const KATAKANA: &'static [(char, char)] = &[
+  ('、', '〃'), ('〈', '】'), ('〓', '〟'), ('〰', '〵'),
+  ('〷', '〷'), ('〼', '〽'), ('゙', '゜'), ('゠', 'ヿ'),
+  ('ㇰ', 'ㇿ'), ('㋐', '㋾'), ('㌀', '㍗'), ('﹅', '﹆'),
+  ('｡', 'ﾟ'), ('𛀀', '𛀀'),
+];
+
+pub const KAYAH_LI: &'static [(char, char)] = &[
+  ('꤀', '꤯'),
+];
+
+pub const KHAROSHTHI: &'static [(char, char)] = &[
+  ('𐨀', '𐨃'), ('𐨅', '𐨆'), ('𐨌', '𐨓'), ('𐨕', '𐨗'),
+  ('𐨙', '𐨳'), ('𐨸', '𐨺'), ('𐨿', '𐩇'), ('𐩐', '𐩘'),
+];
+
+pub const KHMER: &'static [(char, char)] = &[
+  ('ក', '៝'), ('០', '៩'), ('៰', '៹'), ('᧠', '᧿'),
+];
+
+pub const KHOJKI: &'static [(char, char)] = &[
+  ('૦', '૯'), ('𑈀', '𑈑'), ('𑈓', '𑈾'),
+];
+
+pub const KHUDAWADI: &'static [(char, char)] = &[
+  ('।', '॥'), ('꠰', '꠹'), ('𑊰', '𑋪'), ('𑋰', '𑋹'),
+];
+
+pub const LAO: &'static [(char, char)] = &[
+  ('ກ', 'ຂ'), ('ຄ', 'ຄ'), ('ງ', 'ຈ'), ('ຊ', 'ຊ'),
+  ('ຍ', 'ຍ'), ('ດ', 'ທ'), ('ນ', 'ຟ'), ('ມ', 'ຣ'),
+  ('ລ', 'ລ'), ('ວ', 'ວ'), ('ສ', 'ຫ'), ('ອ', 'ູ'),
+  ('ົ', 'ຽ'), ('ເ', 'ໄ'), ('ໆ', 'ໆ'), ('່', 'ໍ'),
+  ('໐', '໙'), ('ໜ', 'ໟ'),
+];
+
+pub const LATIN: &'static [(char, char)] = &[
+  ('A', 'Z'), ('a', 'z'), ('ª', 'ª'), ('º', 'º'), ('À', 'Ö'),
+  ('Ø', 'ö'), ('ø', 'ʸ'), ('ˠ', 'ˤ'), ('ͣ', 'ͯ'), ('҅', '҆'),
+  ('॑', '॒'), ('჻', '჻'), ('ᴀ', 'ᴥ'), ('ᴬ', 'ᵜ'),
+  ('ᵢ', 'ᵥ'), ('ᵫ', 'ᵷ'), ('ᵹ', 'ᶾ'), ('Ḁ', 'ỿ'),
+  ('ⁱ', 'ⁱ'), ('ⁿ', 'ⁿ'), ('ₐ', 'ₜ'), ('⃰', '⃰'),
+  ('K', 'Å'), ('Ⅎ', 'Ⅎ'), ('ⅎ', 'ⅎ'), ('Ⅰ', 'ↈ'),
+  ('Ⱡ', 'Ɀ'), ('Ꜣ', 'ꞇ'), ('Ꞌ', 'Ɪ'), ('Ʞ', 'ꞷ'),
+  ('ꟷ', 'ꟿ'), ('꤮', '꤮'), ('ꬰ', 'ꭚ'), ('ꭜ', 'ꭤ'),
+  ('ﬀ', 'ﬆ'), ('Ａ', 'Ｚ'), ('ａ', 'ｚ'),
+];
+
+pub const LEPCHA: &'static [(char, char)] = &[
+  ('ᰀ', '᰷'), ('᰻', '᱉'), ('ᱍ', 'ᱏ'),
+];
+
+pub const LIMBU: &'static [(char, char)] = &[
+  ('॥', '॥'), ('ᤀ', 'ᤞ'), ('ᤠ', 'ᤫ'), ('ᤰ', '᤻'),
+  ('᥀', '᥀'), ('᥄', '᥏'),
+];
+
+pub const LINEAR_A: &'static [(char, char)] = &[
+  ('𐄇', '𐄳'), ('𐘀', '𐜶'), ('𐝀', '𐝕'), ('𐝠', '𐝧'),
+];
+
+pub const LINEAR_B: &'static [(char, char)] = &[
+  ('𐀀', '𐀋'), ('𐀍', '𐀦'), ('𐀨', '𐀺'), ('𐀼', '𐀽'),
+  ('𐀿', '𐁍'), ('𐁐', '𐁝'), ('𐂀', '𐃺'), ('𐄀', '𐄂'),
+  ('𐄇', '𐄳'), ('𐄷', '𐄿'),
+];
+
+pub const LISU: &'static [(char, char)] = &[
+  ('ꓐ', '꓿'),
+];
+
+pub const LYCIAN: &'static [(char, char)] = &[
+  ('𐊀', '𐊜'),
+];
+
+pub const LYDIAN: &'static [(char, char)] = &[
+  ('𐤠', '𐤹'), ('𐤿', '𐤿'),
+];
+
+pub const MAHAJANI: &'static [(char, char)] = &[
+  ('।', '९'), ('꠰', '꠹'), ('𑅐', '𑅶'),
+];
+
+pub const MALAYALAM: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ഀ', 'ഃ'), ('അ', 'ഌ'),
+  ('എ', 'ഐ'), ('ഒ', 'ൄ'), ('െ', 'ൈ'), ('ൊ', '൏'),
+  ('ൔ', 'ൣ'), ('൦', 'ൿ'), ('᳚', '᳚'),
+];
+
+pub const MANDAIC: &'static [(char, char)] = &[
+  ('ـ', 'ـ'), ('ࡀ', '࡛'), ('࡞', '࡞'),
+];
+
+pub const MANICHAEAN: &'static [(char, char)] = &[
+  ('ـ', 'ـ'), ('𐫀', '𐫦'), ('𐫫', '𐫶'),
+];
+
+pub const MARCHEN: &'static [(char, char)] = &[
+  ('𑱰', '𑲏'), ('𑲒', '𑲧'), ('𑲩', '𑲶'),
+];
+
+pub const MASARAM_GONDI: &'static [(char, char)] = &[
+  ('𑴀', '𑴆'), ('𑴈', '𑴉'), ('𑴋', '𑴶'), ('𑴺', '𑴺'),
+  ('𑴼', '𑴽'), ('𑴿', '𑵇'), ('𑵐', '𑵙'),
+];
+
+pub const MEETEI_MAYEK: &'static [(char, char)] = &[
+  ('ꫠ', '꫶'), ('ꯀ', '꯭'), ('꯰', '꯹'),
+];
+
+pub const MENDE_KIKAKUI: &'static [(char, char)] = &[
+  ('𞠀', '𞣄'), ('𞣇', '𞣖'),
+];
+
+pub const MEROITIC_CURSIVE: &'static [(char, char)] = &[
+  ('𐦠', '𐦷'), ('𐦼', '𐧏'), ('𐧒', '𐧿'),
+];
+
+pub const MEROITIC_HIEROGLYPHS: &'static [(char, char)] = &[
+  ('𐦀', '𐦟'),
+];
+
+pub const MIAO: &'static [(char, char)] = &[
+  ('𖼀', '𖽄'), ('𖽐', '𖽾'), ('𖾏', '𖾟'),
+];
+
+pub const MODI: &'static [(char, char)] = &[
+  ('꠰', '꠹'), ('𑘀', '𑙄'), ('𑙐', '𑙙'),
+];
+
+pub const MONGOLIAN: &'static [(char, char)] = &[
+  ('᠀', '\u{180e}'), ('᠐', '᠙'), ('ᠠ', 'ᡷ'), ('ᢀ', 'ᢪ'),
+  ('𑙠', '𑙬'),
+];
+
+pub const MRO: &'static [(char, char)] = &[
+  ('𖩀', '𖩞'), ('𖩠', '𖩩'), ('𖩮', '𖩯'),
+];
+
+pub const MULTANI: &'static [(char, char)] = &[
+  ('੦', '੯'), ('𑊀', '𑊆'), ('𑊈', '𑊈'), ('𑊊', '𑊍'),
+  ('𑊏', '𑊝'), ('𑊟', '𑊩'),
+];
+
+pub const MYANMAR: &'static [(char, char)] = &[
+  ('က', '႟'), ('꤮', '꤮'), ('ꧠ', 'ꧾ'), ('ꩠ', 'ꩿ'),
+];
+
+pub const NABATAEAN: &'static [(char, char)] = &[
+  ('𐢀', '𐢞'), ('𐢧', '𐢯'),
+];
+
+pub const NEW_TAI_LUE: &'static [(char, char)] = &[
+  ('ᦀ', 'ᦫ'), ('ᦰ', 'ᧉ'), ('᧐', '᧚'), ('᧞', '᧟'),
+];
+
+pub const NEWA: &'static [(char, char)] = &[
+  ('𑐀', '𑑙'), ('𑑛', '𑑛'), ('𑑝', '𑑝'),
+];
+
+pub const NKO: &'static [(char, char)] = &[
+  ('߀', 'ߺ'),
+];
+
+pub const NUSHU: &'static [(char, char)] = &[
+  ('𖿡', '𖿡'), ('𛅰', '𛋻'),
+];
+
+pub const OGHAM: &'static [(char, char)] = &[
+  ('\u{1680}', '᚜'),
+];
+
+pub const OL_CHIKI: &'static [(char, char)] = &[
+  ('᱐', '᱿'),
+];
+
+pub const OLD_HUNGARIAN: &'static [(char, char)] = &[
+  ('𐲀', '𐲲'), ('𐳀', '𐳲'), ('𐳺', '𐳿'),
+];
+
+pub const OLD_ITALIC: &'static [(char, char)] = &[
+  ('𐌀', '𐌣'), ('𐌭', '𐌯'),
+];
+
+pub const OLD_NORTH_ARABIAN: &'static [(char, char)] = &[
+  ('𐪀', '𐪟'),
+];
+
+pub const OLD_PERMIC: &'static [(char, char)] = &[
+  ('҃', '҃'), ('𐍐', '𐍺'),
+];
+
+pub const OLD_PERSIAN: &'static [(char, char)] = &[
+  ('𐎠', '𐏃'), ('𐏈', '𐏕'),
+];
+
+pub const OLD_SOUTH_ARABIAN: &'static [(char, char)] = &[
+  ('𐩠', '𐩿'),
+];
+
+pub const OLD_TURKIC: &'static [(char, char)] = &[
+  ('𐰀', '𐱈'),
+];
+
+pub const ORIYA: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ଁ', 'ଃ'), ('ଅ', 'ଌ'),
+  ('ଏ', 'ଐ'), ('ଓ', 'ନ'), ('ପ', 'ର'), ('ଲ', 'ଳ'),
+  ('ଵ', 'ହ'), ('଼', 'ୄ'), ('େ', 'ୈ'), ('ୋ', '୍'),
+  ('ୖ', 'ୗ'), ('ଡ଼', 'ଢ଼'), ('ୟ', 'ୣ'), ('୦', '୷'),
+];
+
+pub const OSAGE: &'static [(char, char)] = &[
+  ('𐒰', '𐓓'), ('𐓘', '𐓻'),
+];
+
+pub const OSMANYA: &'static [(char, char)] = &[
+  ('𐒀', '𐒝'), ('𐒠', '𐒩'),
+];
+
+pub const PAHAWH_HMONG: &'static [(char, char)] = &[
+  ('𖬀', '𖭅'), ('𖭐', '𖭙'), ('𖭛', '𖭡'), ('𖭣', '𖭷'),
+  ('𖭽', '𖮏'),
+];
+
+pub const PALMYRENE: &'static [(char, char)] = &[
+  ('𐡠', '𐡿'),
+];
+
+pub const PAU_CIN_HAU: &'static [(char, char)] = &[
+  ('𑫀', '𑫸'),
+];
+
+pub const PHAGS_PA: &'static [(char, char)] = &[
+  ('᠂', '᠃'), ('᠅', '᠅'), ('ꡀ', '꡷'),
+];
+
+pub const PHOENICIAN: &'static [(char, char)] = &[
+  ('𐤀', '𐤛'), ('𐤟', '𐤟'),
+];
+
+pub const PSALTER_PAHLAVI: &'static [(char, char)] = &[
+  ('ـ', 'ـ'), ('𐮀', '𐮑'), ('𐮙', '𐮜'), ('𐮩', '𐮯'),
+];
+
+pub const REJANG: &'static [(char, char)] = &[
+  ('ꤰ', '꥓'), ('꥟', '꥟'),
+];
+
+pub const RUNIC: &'static [(char, char)] = &[
+  ('ᚠ', 'ᛪ'), ('ᛮ', 'ᛸ'),
+];
+
+pub const SAMARITAN: &'static [(char, char)] = &[
+  ('ࠀ', '࠭'), ('࠰', '࠾'),
+];
+
+pub const SAURASHTRA: &'static [(char, char)] = &[
+  ('ꢀ', 'ꣅ'), ('꣎', '꣙'),
+];
+
+pub const SHARADA: &'static [(char, char)] = &[
+  ('॑', '॑'), ('᳗', '᳗'), ('᳙', '᳙'), ('᳜', '᳝'),
+  ('᳠', '᳠'), ('𑆀', '𑇍'), ('𑇐', '𑇟'),
+];
+
+pub const SHAVIAN: &'static [(char, char)] = &[
+  ('𐑐', '𐑿'),
+];
+
+pub const SIDDHAM: &'static [(char, char)] = &[
+  ('𑖀', '𑖵'), ('𑖸', '𑗝'),
+];
+
+pub const SIGNWRITING: &'static [(char, char)] = &[
+  ('𝠀', '𝪋'), ('𝪛', '𝪟'), ('𝪡', '𝪯'),
+];
+
+pub const SINHALA: &'static [(char, char)] = &[
+  ('।', '॥'), ('ං', 'ඃ'), ('අ', 'ඖ'), ('ක', 'න'),
+  ('ඳ', 'ර'), ('ල', 'ල'), ('ව', 'ෆ'), ('්', '්'),
+  ('ා', 'ු'), ('ූ', 'ූ'), ('ෘ', 'ෟ'), ('෦', '෯'),
+  ('ෲ', '෴'), ('𑇡', '𑇴'),
+];
+
+pub const SORA_SOMPENG: &'static [(char, char)] = &[
+  ('𑃐', '𑃨'), ('𑃰', '𑃹'),
+];
+
+pub const SOYOMBO: &'static [(char, char)] = &[
+  ('𑩐', '𑪃'), ('𑪆', '𑪜'), ('𑪞', '𑪢'),
+];
+
+pub const SUNDANESE: &'static [(char, char)] = &[
+  ('ᮀ', 'ᮿ'), ('᳀', '᳇'),
+];
+
+pub const SYLOTI_NAGRI: &'static [(char, char)] = &[
+  ('।', '॥'), ('০', '৯'), ('ꠀ', '꠫'),
+];
+
+pub const SYRIAC: &'static [(char, char)] = &[
+  ('،', '،'), ('؛', '\u{61c}'), ('؟', '؟'), ('ـ', 'ـ'), ('ً', 'ٕ'),
+  ('ٰ', 'ٰ'), ('܀', '܍'), ('\u{70f}', '݊'), ('ݍ', 'ݏ'), ('ࡠ', 'ࡪ'),
+];
+
+pub const TAGALOG: &'static [(char, char)] = &[
+  ('ᜀ', 'ᜌ'), ('ᜎ', '᜔'), ('᜵', '᜶'),
+];
+
+pub const TAGBANWA: &'static [(char, char)] = &[
+  ('᜵', '᜶'), ('ᝠ', 'ᝬ'), ('ᝮ', 'ᝰ'), ('ᝲ', 'ᝳ'),
+];
+
+pub const TAI_LE: &'static [(char, char)] = &[
+  ('၀', '၉'), ('ᥐ', 'ᥭ'), ('ᥰ', 'ᥴ'),
+];
+
+pub const TAI_THAM: &'static [(char, char)] = &[
+  ('ᨠ', 'ᩞ'), ('᩠', '᩼'), ('᩿', '᪉'), ('᪐', '᪙'),
+  ('᪠', '᪭'),
+];
+
+pub const TAI_VIET: &'static [(char, char)] = &[
+  ('ꪀ', 'ꫂ'), ('ꫛ', '꫟'),
+];
+
+pub const TAKRI: &'static [(char, char)] = &[
+  ('।', '॥'), ('꠰', '꠹'), ('𑚀', '𑚷'), ('𑛀', '𑛉'),
+];
+
+pub const TAMIL: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ஂ', 'ஃ'), ('அ', 'ஊ'),
+  ('எ', 'ஐ'), ('ஒ', 'க'), ('ங', 'ச'), ('ஜ', 'ஜ'),
+  ('ஞ', 'ட'), ('ண', 'த'), ('ந', 'ப'), ('ம', 'ஹ'),
+  ('ா', 'ூ'), ('ெ', 'ை'), ('ொ', '்'), ('ௐ', 'ௐ'),
+  ('ௗ', 'ௗ'), ('௦', '௺'), ('᳚', '᳚'), ('ꣳ', 'ꣳ'),
+  ('𑌁', '𑌁'), ('𑌃', '𑌃'), ('𑌼', '𑌼'),
+];
+
+pub const TANGUT: &'static [(char, char)] = &[
+  ('𖿠', '𖿠'), ('𗀀', '𘟬'), ('𘠀', '𘫲'),
+];
+
+pub const TELUGU: &'static [(char, char)] = &[
+  ('॑', '॒'), ('।', '॥'), ('ఀ', 'ః'), ('అ', 'ఌ'),
+  ('ఎ', 'ఐ'), ('ఒ', 'న'), ('ప', 'హ'), ('ఽ', 'ౄ'),
+  ('ె', 'ై'), ('ొ', '్'), ('ౕ', 'ౖ'), ('ౘ', 'ౚ'),
+  ('ౠ', 'ౣ'), ('౦', '౯'), ('౸', '౿'), ('᳚', '᳚'),
+];
+
+pub const THAANA: &'static [(char, char)] = &[
+  ('،', '،'), ('؛', '\u{61c}'), ('؟', '؟'), ('٠', '٩'), ('ހ', 'ޱ'),
+  ('ﷲ', 'ﷲ'), ('﷽', '﷽'),
+];
+
+pub const THAI: &'static [(char, char)] = &[
+  ('ก', 'ฺ'), ('เ', '๛'),
+];
+
+pub const TIBETAN: &'static [(char, char)] = &[
+  ('ༀ', 'ཇ'), ('ཉ', 'ཬ'), ('ཱ', 'ྗ'), ('ྙ', 'ྼ'),
+  ('྾', '࿌'), ('࿎', '࿔'), ('࿙', '࿚'),
+];
+
+pub const TIFINAGH: &'static [(char, char)] = &[
+  ('ⴰ', 'ⵧ'), ('ⵯ', '⵰'), ('⵿', '⵿'),
+];
+
+pub const TIRHUTA: &'static [(char, char)] = &[
+  ('।', '॥'), ('꠰', '꠹'), ('𑒀', '𑓇'), ('𑓐', '𑓙'),
+];
+
+pub const UGARITIC: &'static [(char, char)] = &[
+  ('𐎀', '𐎝'), ('𐎟', '𐎟'),
+];
+
+pub const VAI: &'static [(char, char)] = &[
+  ('ꔀ', 'ꘫ'),
+];
+
+pub const WARANG_CITI: &'static [(char, char)] = &[
+  ('𑢠', '𑣲'), ('𑣿', '𑣿'),
+];
+
+pub const YI: &'static [(char, char)] = &[
+  ('、', '。'), ('〈', '】'), ('〔', '〛'), ('・', '・'),
+  ('ꀀ', 'ꒌ'), ('꒐', '꓆'), ('｡', '･'),
+];
+
+pub const ZANABAZAR_SQUARE: &'static [(char, char)] = &[
+  ('𑨀', '𑩇'),
+];
--- a/third_party/rust/regex/.cargo-checksum.json
+++ b/third_party/rust/regex/.cargo-checksum.json
@@ -1,1 +1,1 @@
-{"files":{".travis.yml":"28fc87d41a27bc03d9a8c6b5f7bc1bca55444dbd0001100b85060acada1449d7","CHANGELOG.md":"1056eb04d324b46354605713b40b76bb68d29e5d6ff7ab8d67a58f24534d1860","Cargo.toml":"db32762fb82d997942d0ccebdfab132e742a3d384b2f3b10b2e1000515bd40e7","HACKING.md":"37ea34650ce307a1d561aa915ada82cc8b44153085f327aa2b342fcf8e7afc62","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","PERFORMANCE.md":"d23d6dbe3791bc0689c5a876922b8264d6d3069b577772440b8caa01867f0cb4","README.md":"e88b7b2f1982115b1bfa8dd1b305ae1315af6f426c919009d80b63007509ed1c","appveyor.yml":"6855c14a64fec423d67c1ddcc47644593ae3ad1f26e8d157cd5c395efee1efc0","ci/after_success.sh":"6c9562098234f7484e2496918386505185c43018c3f3d17f8df95a59457d8f1f","ci/run-kcov":"721a0ebfb72deb34e4b45141279eae5b4a40c22d47159e01b4e6f176ed3bbc22","ci/run-shootout-test":"be7edb66a4a65eaef6a2b7e6036b4b732effa8dcd931bf4cbd591d0ca32b8264","ci/script.sh":"7f640b7a469c2764f6c12dc84a75fa956c24bc1f611cd72f1ae3a53bacd3ee06","examples/bug347.rs":"93fde9707dd78978dffed46b08bde095a515151bc94f892c0159ac3c0060852d","examples/regexdna-input.txt":"156a49710bb3e1ed4bc2bbb0af0f383b747b3d0281453cfff39c296124c598f8","examples/regexdna-output.txt":"35e85b19b70a893d752fd43e54e1e9da08bac43559191cea85b33387c24c4cc1","examples/shootout-regex-dna-bytes.rs":"3d36b08dd34c51d022e9b3a3dcfbc780bc9dc3d46a601256db5d4d5e2213119c","examples/shootout-regex-dna-cheat.rs":"4b5a590ec88a9ba67cc4a34621cb038a400d45e64a15a354f3e07fdaf41a935b","examples/shootout-regex-dna-replace.rs":"15677e7de356427fe2b983e918de073a7a730337b090d4ab0e815e1e66f74f73","examples/shootout-regex-dna-single-cheat.rs":"5e743b3a6ec968713ce064a289b1fbd11c302add824d1c749a2ffb546c73128e","examples/shootout-regex-dna-single.rs":"021292ec6999be33a9b8e833759bf3509dc0e6b6365fad8e3a882cf3835b0405","examples/shootout-regex-dna.rs":"b892242cea6b6b16f0511ea672c24a49909585224fb69085f0f2fca77ce30aea","examples/shootout-regex-redux-1.rs":"191f47847f5466261a308c8cdc19a3502603a208f40e4e650024dec80241b9e3","examples/shootout-regex-redux-chunked.rs":"6da0b6af66df64dfe45fb2e08532e2b3712cebcf0d3f19ff8d5f87fe712278c9","examples/shootout-regex-redux.rs":"c587bef968feb9c329f5c1896f79ec09e509885dec27091678b28a221a5d40e8","scripts/frequencies.py":"df2cac150bc4ed42d2d97762535a5f260f3fe06d9d516721f68a52671a3b7a3b","scripts/regex-match-tests.py":"f1b3ad870c893510172bd84f8328b66ee99cd7aee7715a95a858468ff3e33635","scripts/unicode.py":"4b1330e733bbc22e677e22855bf8a88ab134aae56a10fda97081320aa12a6228","src/backtrack.rs":"221e12b4afaff096f8a61bb274eade928845e194e1ec37fa75b684b18c126b1b","src/compile.rs":"4c2159961364d001e9191840a878c3c3ca0b641f2356ba2abcc3356581b7b47c","src/dfa.rs":"985b5f880814452edae862347584f2e53c08eaccae0e01ee5c5d8564dd8381ad","src/error.rs":"b4052872f9a7bc48c478fe73e145ac1acf7533140f827999808fadcba09754d3","src/exec.rs":"b696cd65cca692d4ddf55e60bcd29986c5f2fd43a2e32ffea23c0c95548915d9","src/expand.rs":"94d7fb06544639d94338fc426bd15d1883efd4e0204552c9def667b9b415ef63","src/freqs.rs":"e25871eec51d1c02c15abbc04d61dbea71fb2dbc15f370c98279687416c969de","src/input.rs":"c2710458e38398a0eba357c76c38fde42b50c22b6e39764ea68984fab3acfab7","src/lib.rs":"1652707e4c259cd39e5c45011c5c8d001f582944527af8d78637600bfe162b44","src/literals.rs":"7a2efe252c21660794bd27054e997151763c8f0ea68c2be7994e1a36f4811675","src/pattern.rs":"4e820c17a2b983050f47d3fd13282094ce9f27b75fd852fcec438d61f07f0b0b","src/pikevm.rs":"bc9d00bd5aed5463121fc98ce9390381137f842c821da48242b2f6fae1137f22","src/prog.rs":"3b3d472ced5958e140e96d367067ab53edba72804c80009a46131ec904a75f2a","src/re_builder.rs":"d14baf810f1248a030eca31475989115f6254a438acbba1eec14b59ed58d2f12","src/re_bytes.rs":"a5b7afee1460fb1957284a2483e6708f6357acc7c13288dee409b6aa0fa15b74","src/re_plugin.rs":"74999c35abc02cb7c18adb74c1d8ebd08b56141f66f174c67b7157c5c27e2a49","src/re_set.rs":"43f40dba1273b8b359d4010616be62b41f71b1d803f0c6e4b04169dc57cb6318","src/re_trait.rs":"37d791af9cc737e0a144be64ddb7104114222e5034cfc9c948078b0b2c799295","src/re_unicode.rs":"34cff9ae054084d870803f1c8d092847fe32a34c9572e67bfc524da03cbfc905","src/simd_accel/mod.rs":"a3eb2c7fcc296137cfc135da47cdfe745606e8159c3263591bebced2c09fdd54","src/simd_accel/teddy128.rs":"4b913c67f5b7105f75b02fff092c3d76895612425c3f8b3e5ded2c41aae4279c","src/simd_fallback/mod.rs":"4cb8a77e2d3e167e9bfc47cb7e4734179f743c2f727e26838864d9959275239b","src/simd_fallback/teddy128.rs":"502d3bff4c78963f343875aa00b15e3625f3ee2ba1de01f029211292a5721912","src/sparse.rs":"04e70bb0bd006f806e8c9cf19825625e907036304823bc03b6c8f2e5046a38ef","src/testdata/LICENSE":"58cf078acc03da3e280a938c2bd9943f554fc9b6ced89ad93ba35ca436872899","src/testdata/README":"45f869e37f798905c773bfbe0ef19a5fb7e585cbf0b7c21b5b5a784e8cec3c14","src/testdata/basic.dat":"3756a5bdd6f387ed34731197fbdaab8521b0ae1726250100ba235756cb42b4b1","src/testdata/nullsubexpr.dat":"496ac0278eec3b6d9170faace14554569032dd3d909618364d9326156de39ecf","src/testdata/repetition.dat":"1f7959063015b284b18a4a2c1c8b416d438a2d6c4b1a362da43406b865f50e69","src/utf8.rs":"75a4516d636566938e896f10687fc16b3ecd8b09de0093015359eb0d11471d5d","tests/api.rs":"803faacd9ac5efc149959ff63a2132f5ca155f75a246b79807c549b464b5c497","tests/api_str.rs":"aef1388c9de8fe9044539745d1975c734e9d268ff02a2dbb1edc5b754cc56d77","tests/bytes.rs":"ae7601bf69307c541b56d85b6f77369051f55096dddfa9d81d470adb54f42a5d","tests/crazy.rs":"bf3a1c3b8620d7d4c9aa72ab5e027fec02ef4bcec45a884e89ad70e82c445a8d","tests/flags.rs":"cd3788760defc667b32adb2732b6657b289372e1051fc240dfd0e435e2835353","tests/fowler.rs":"e0b7420fa5b636301d4c11cd4dfb803ec09fa7b27be47c594d231167de1241e3","tests/macros.rs":"e0329eedbe073e9dca649e651bc869ef2e6030b4b2a2d5959033b58bfddeb858","tests/macros_bytes.rs":"647c42525b8df5151f4f4cab40f515dd2fa5a05246562f5575902e162b0af2c0","tests/macros_str.rs":"124767f6ff33766502e89050ad498d3aba21e975aefeaf64ae76b0abe13b4fdb","tests/misc.rs":"c8cc85ac916980ebd053df2444fe9b795a00f2ac42c5cd828fc3df487f689265","tests/multiline.rs":"4e872a9473bc229d955716c18c77aa3530625a8f3a28ecaefdb70b9aff0f0a8b","tests/noparse.rs":"9c5acf252655f8daba67d5aa15a98f556a8bb5de87de9ecc8e8e3b50614a65c2","tests/plugin.rs":"9a51dfcbdad4e2a19f43598d74e0dd745a862a01b4165fce387a94083e5b588f","tests/regression.rs":"28bd9e3b6df7b8b48b4c2e069f72f4f59d5e64b091ed2559bd3b0516e27f626a","tests/replace.rs":"4a65b863ad012366328062784e323f13c4bbccce89ff709196e4d84d94ef1636","tests/searcher.rs":"124c9909a1e8fcfddee8ecfae3a1fb9d76f9ddac62fda23d9b895744728a0aa8","tests/set.rs":"0cecf5d2acb3d0df2b051e0ab56a4a853bb58c0e922480f5e460b9a05a0d16af","tests/shortest_match.rs":"7ca223e0a61b1a26a1f36465ab49de021475295e18f4d6421497461af1e107be","tests/suffix_reverse.rs":"cd497c1a51246a8fc727062133470e0c6300620ad80333d20d63c0ee4224ef1c","tests/test_backtrack.rs":"b07a114b2eb7f1f17357629be9c8458e31f7952fb2c327d66d9415f08855c624","tests/test_backtrack_bytes.rs":"dd3cec3c630d6f41892c9111bee87227bf47126651b2402672c30b084fa9c28c","tests/test_backtrack_utf8bytes.rs":"b576b933d6be21f8cedb281e456441d4278350b0145a139dbccb1861639a54f9","tests/test_default.rs":"768a1fabafc7eb815bfaf55c22606dc884e1dbb88d7fc40fd561e8faaa61e6d9","tests/test_default_bytes.rs":"c0b66b63abd263f3fc7e5fcacf4a93cb7fc40c17b764edf8700ae9ba1ab950ff","tests/test_nfa.rs":"aad36ca01f3f7eb23633a1207056e9056d686be2ef6e3661fad83805fa482927","tests/test_nfa_bytes.rs":"198f7b58c5c7dd0a05f16ddb3b9b63dab29ef2a56448378ac602c5d087c01e4e","tests/test_nfa_utf8bytes.rs":"854d80114ca1bed14d4ad3f2b3bf292ff0fa44e12d7d3f2ec6dd17cbbaa82175","tests/test_plugin.rs":"84be9cabe1cf8fb208638475436b020a75e9ec3e7f885af39e5404adb6fcae03","tests/unicode.rs":"7bd3095678fa227dc722f2b5f60a072c2b1752a5ac8df234cd023ece34c80d8a","tests/word_boundary.rs":"7081317ddcec1e82dd4a2090a571c6abf2ff4bbfa8cd10395e1eb3f386157fae","tests/word_boundary_ascii.rs":"cd0be5b5b485de0ba7994b42e2864585556c3d2d8bf5eab05b58931d9aaf4b87","tests/word_boundary_unicode.rs":"ae4ac0689c6b42ff7628a681d6d99a124d254f35eb1b809137859d3a8afe84fc"},"package":"1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b"}
\ No newline at end of file
+{"files":{".travis.yml":"665e0da54606747aff058ac4ef16d93c64d80b8ff4bf25ccb90d56b78ec80063","CHANGELOG.md":"9c14dc291bd9c0d9834b2e7675d634066e2e077ccf5b6b36ac14b0e3565d956e","Cargo.toml":"ed770dfa3a16e64bef3b19f2032bf3121745d44de899b7e2199c34c8788ec5a8","HACKING.md":"2e32478f02437c77376e79c697bc871e364d55ef296d246e8e9cc23e68384b5e","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"6485b8ed310d3f0340bf1ad1f47645069ce4069dcc6bb46c7d5c6faf41de1fdb","PERFORMANCE.md":"c776b18758b6dc8f2f9d37e9a95261f75c5f744925e8ddf216b83953bf7467b7","README.md":"0cac07f6d9c515387c53bf327f89626fb52770e71bc579f83408ea088d90295d","UNICODE.md":"d7992c142299cb79f524c964ca8d4cf247b5db99fcf4103e4eebaf6c54cf06f6","appveyor.yml":"3cc9496bd76b8ce1cd4be1c369974fd43798352aad595a36d6f4229bdeeb13d7","build.rs":"2ad6bc9ef140544c1e99a14318459a763d1f9f0d659e769439968589d5137626","ci/after_success.sh":"9a6089e0a64057e97b11158219921c664a6ac956c0fad20f007da29ef11c19af","ci/run-kcov":"a81cad2c3de4c6d704502cdc4227d7432b7765613c7b8c426db66ab03fe23bfb","ci/run-shootout-test":"be7edb66a4a65eaef6a2b7e6036b4b732effa8dcd931bf4cbd591d0ca32b8264","ci/script.sh":"b045557bd690a11b93e02bbacf7e84477378f3403dded9bb59746b24691a7ccf","examples/regexdna-input.txt":"156a49710bb3e1ed4bc2bbb0af0f383b747b3d0281453cfff39c296124c598f8","examples/regexdna-output.txt":"35e85b19b70a893d752fd43e54e1e9da08bac43559191cea85b33387c24c4cc1","examples/shootout-regex-dna-bytes.rs":"f097a923fef2e9bd9a9abd3686eea84830e8050bb750bcab840b6575873227ff","examples/shootout-regex-dna-cheat.rs":"4b5a590ec88a9ba67cc4a34621cb038a400d45e64a15a354f3e07fdaf41a935b","examples/shootout-regex-dna-replace.rs":"15677e7de356427fe2b983e918de073a7a730337b090d4ab0e815e1e66f74f73","examples/shootout-regex-dna-single-cheat.rs":"5e743b3a6ec968713ce064a289b1fbd11c302add824d1c749a2ffb546c73128e","examples/shootout-regex-dna-single.rs":"c86969adb0fb08c5e23f749ec6922a58404da7d0e1925c848957a5144ddc8815","examples/shootout-regex-dna.rs":"e627f1e5e24c1cf4e9c9d52b844b6d971c59b7169b0e8762d8d9d1cb7debc6e6","scripts/frequencies.py":"df2cac150bc4ed42d2d97762535a5f260f3fe06d9d516721f68a52671a3b7a3b","scripts/regex-match-tests.py":"f1b3ad870c893510172bd84f8328b66ee99cd7aee7715a95a858468ff3e33635","src/backtrack.rs":"0372ada608dca9c8d3e1fabf56e33498f8ff4fbdfd445873c4a4be6657465c68","src/compile.rs":"32667252ac4b687d9ef1c9dd0f055f0616ab49050870daca338809d32616409b","src/dfa.rs":"163aa4ec0dcdb1ffc7dd687debc1d64acffeeca7861cb4bb303d2da0ad33b3a6","src/error.rs":"d26673a95e372665863c7e01558836ef6e957aadf421d5056525720e16a9683d","src/exec.rs":"94d4f72ead069720aee90625929a2aad237f978e431359f1c3c5ae73580d3266","src/expand.rs":"4333da32d6fbb05ec05bead810eb659c5b012e2d361a9bb9e24bb30e8939fb88","src/freqs.rs":"e25871eec51d1c02c15abbc04d61dbea71fb2dbc15f370c98279687416c969de","src/input.rs":"9b092fe88a66b46dee0dfe0a2e67e4b78b01a32f53b28443b5f382b8514d0cfb","src/lib.rs":"5a31b01bc633a724d9429ed5b455d75600b5d787a352609da2c9e7b23e13201c","src/literal/mod.rs":"c395221efe19f36edd9d92c8f8283e5b30553e0e9b408ccb1430d11f038730f8","src/literal/teddy_avx2/fallback.rs":"f24b70f9dd028eefe2802b7bbc6b2dca97663b03a931058e3a59750f0693a15c","src/literal/teddy_avx2/imp.rs":"b0e4634bc9fbc793e3b0938b97a236df4b8fb17a0ac08040a6394bc73042aa8b","src/literal/teddy_avx2/mod.rs":"2feb1a41e3df8a77e8dbb3a08492ee3cf0bac2f05abddc0e35b964d6f5a86c0c","src/literal/teddy_ssse3/fallback.rs":"f24b70f9dd028eefe2802b7bbc6b2dca97663b03a931058e3a59750f0693a15c","src/literal/teddy_ssse3/imp.rs":"be4565c47a3807ab09343f2da4ef0e3297d4776719472efb60dee8e55380c2ec","src/literal/teddy_ssse3/mod.rs":"7554e6598326f3662f7632cc0c7d05aa48baed4a765f4b99647fab3c1b40f750","src/pattern.rs":"4e820c17a2b983050f47d3fd13282094ce9f27b75fd852fcec438d61f07f0b0b","src/pikevm.rs":"9600a8b135075f8cf62683e6b08c73834873a203347d275f1e9ca3a97221dbeb","src/prog.rs":"7fdfa764562867c3b051554921fbf6452d08e6f860ade6657cf021b6a734b899","src/re_builder.rs":"45ce0a6b243d63f6ebb53ea3082df4bf45a9e20fbb00aa477dbd95f4bdbcdf2f","src/re_bytes.rs":"07e5a01581e304072dc710458211eefa84fa5984b9a33f46c605e4f7695f568e","src/re_set.rs":"f0fa1a98f8aa1eeee209fae70f9efd3f9043557de30dc1a940279555b21b688b","src/re_trait.rs":"8ecbdd8db8c8e4194acd71e4a751aa8409bbd495015a01bb7c8b425677f3fa39","src/re_unicode.rs":"02c95c63842144aacb6c74bc17e75266ad621b0fe993f29c8d6144fe24ff6179","src/sparse.rs":"04e70bb0bd006f806e8c9cf19825625e907036304823bc03b6c8f2e5046a38ef","src/testdata/LICENSE":"58cf078acc03da3e280a938c2bd9943f554fc9b6ced89ad93ba35ca436872899","src/testdata/README":"45f869e37f798905c773bfbe0ef19a5fb7e585cbf0b7c21b5b5a784e8cec3c14","src/testdata/basic.dat":"3756a5bdd6f387ed34731197fbdaab8521b0ae1726250100ba235756cb42b4b1","src/testdata/nullsubexpr.dat":"496ac0278eec3b6d9170faace14554569032dd3d909618364d9326156de39ecf","src/testdata/repetition.dat":"1f7959063015b284b18a4a2c1c8b416d438a2d6c4b1a362da43406b865f50e69","src/utf8.rs":"1a2d7bc43ab139bfc2b2610627efd59a7ead2a959492ba73eebb743d482c7957","src/vector/avx2.rs":"1ef39371a9215144b69bf72df9aa3533ea6be44d7425cdc5c8192de79dfc5e96","src/vector/mod.rs":"8fe24ee6848eb7bf14c33ed26f13221ea00a868c9857fda3784a71147087cd55","src/vector/ssse3.rs":"f8549244fddfcb73f538761c0ae5a81c535e5ae5e129cebf8e3e6f9db4fa79f4","tests/api.rs":"803faacd9ac5efc149959ff63a2132f5ca155f75a246b79807c549b464b5c497","tests/api_str.rs":"8ccb6c3dfe0220229ad33fcaf9dea420b414aea17c7498d978efd1b8aceaa980","tests/bytes.rs":"3564a933f12f84d65488c220d71067a9902ed9860522a7d9139a713f818a0b1c","tests/crazy.rs":"b0c593be5022341606ddb15b3d28aa39d2d52977db4b1b72c41c6133a7f7a7e0","tests/flags.rs":"cd3788760defc667b32adb2732b6657b289372e1051fc240dfd0e435e2835353","tests/fowler.rs":"e0b7420fa5b636301d4c11cd4dfb803ec09fa7b27be47c594d231167de1241e3","tests/macros.rs":"f021b62e3ce8d122f6450a5aab36972eccf4c21c62a53c604924d4d01d90c0d8","tests/macros_bytes.rs":"dffa91e2e572f5b223b83eba4d32732b1d76cfcfebfbd7f836eeaa929f637657","tests/macros_str.rs":"4f665f039cd5408fc7b4c0fab2983ef3ebd3dc1cd52f7b7e46d7644a2e85b428","tests/misc.rs":"c8cc85ac916980ebd053df2444fe9b795a00f2ac42c5cd828fc3df487f689265","tests/multiline.rs":"4e872a9473bc229d955716c18c77aa3530625a8f3a28ecaefdb70b9aff0f0a8b","tests/noparse.rs":"8850d31cb95e413e3a67edecce8590cd158f49779abcc2e5722381113346179c","tests/regression.rs":"c8293a778591a7862de4d526d856843b4e3f105bba0b7ebca0ed7ae5e0346232","tests/replace.rs":"f1b8ce1a6fede222c6151d37416458515541da285ec2720d6ac950cbad7991e2","tests/searcher.rs":"124c9909a1e8fcfddee8ecfae3a1fb9d76f9ddac62fda23d9b895744728a0aa8","tests/set.rs":"0cecf5d2acb3d0df2b051e0ab56a4a853bb58c0e922480f5e460b9a05a0d16af","tests/shortest_match.rs":"7ca223e0a61b1a26a1f36465ab49de021475295e18f4d6421497461af1e107be","tests/suffix_reverse.rs":"cd497c1a51246a8fc727062133470e0c6300620ad80333d20d63c0ee4224ef1c","tests/test_backtrack.rs":"b07a114b2eb7f1f17357629be9c8458e31f7952fb2c327d66d9415f08855c624","tests/test_backtrack_bytes.rs":"dd3cec3c630d6f41892c9111bee87227bf47126651b2402672c30b084fa9c28c","tests/test_backtrack_utf8bytes.rs":"b576b933d6be21f8cedb281e456441d4278350b0145a139dbccb1861639a54f9","tests/test_default.rs":"dc819a5973dd03a58165afa37da20727e8bf7085f1482414290654483fe20aa1","tests/test_default_bytes.rs":"c5228278e0a1d8fab5157dfd2b52642fd0ec68db346dc133f4c16f178d63e856","tests/test_nfa.rs":"aad36ca01f3f7eb23633a1207056e9056d686be2ef6e3661fad83805fa482927","tests/test_nfa_bytes.rs":"198f7b58c5c7dd0a05f16ddb3b9b63dab29ef2a56448378ac602c5d087c01e4e","tests/test_nfa_utf8bytes.rs":"854d80114ca1bed14d4ad3f2b3bf292ff0fa44e12d7d3f2ec6dd17cbbaa82175","tests/unicode.rs":"cadd620a5d70216d6a3100d18f21598bd3ed6c343023697a7d3c57b7dc45149e","tests/word_boundary.rs":"7081317ddcec1e82dd4a2090a571c6abf2ff4bbfa8cd10395e1eb3f386157fae","tests/word_boundary_ascii.rs":"cd0be5b5b485de0ba7994b42e2864585556c3d2d8bf5eab05b58931d9aaf4b87","tests/word_boundary_unicode.rs":"75dbcc35d3abc0f9795c2ea99e216dc227b0a5b58e9ca5eef767815ff0513921"},"package":"75ecf88252dce580404a22444fc7d626c01815debba56a7f4f536772a5ff19d3"}
\ No newline at end of file
--- a/third_party/rust/regex/.travis.yml
+++ b/third_party/rust/regex/.travis.yml
@@ -1,28 +1,29 @@
 dist: trusty
 sudo: false
 language: rust
 rust:
-  - 1.12.0
-  - stable
-  - beta
-  - nightly
+- 1.20.0
+- stable
+- beta
+- nightly
 script: ci/script.sh
 addons:
   apt:
     packages:
-      - libcurl4-openssl-dev
-      - libelf-dev
-      - libdw-dev
-      - binutils-dev
-      - wget
-      - clang-3.6
-      - cmake
-      - python
-      - python-virtualenv
+    - libcurl4-openssl-dev
+    - libelf-dev
+    - libdw-dev
+    - binutils-dev
+    - wget
+    - clang-3.6
+    - cmake
+    - python
+    - python-virtualenv
 after_success: ci/after_success.sh
-env:
-  global:
-    secure: "GdGE0kFYiJGccVZligur7JlWRCy49eH6uWPl71mrnaaW8jLHmkva0jVb0LB9e5ol3YMaMCgrwM2pcj3Uk2C08YcGIoEKOLlDVho351KaD6XfclZ29CQTAPMz4Xut2IcB4YeuZlDrOnM26guTIOtn2NAjeQgOhpM/ErzVBgkB+wQ="
 notifications:
   email:
     on_success: never
+branches:
+  only:
+  - master
+  - auto
--- a/third_party/rust/regex/CHANGELOG.md
+++ b/third_party/rust/regex/CHANGELOG.md
@@ -1,8 +1,209 @@
+1.0.0 (2018-05-01)
+==================
+This release marks the 1.0 release of regex.
+
+While this release includes some breaking changes, most users of older versions
+of the regex library should be able to migrate to 1.0 by simply bumping the
+version number. The important changes are as follows:
+
+* We adopt Rust 1.20 as the new minimum supported version of Rust for regex.
+  We also tentativley adopt a policy that permits bumping the minimum supported
+  version of Rust in minor version releases of regex, but no patch releases.
+  That is, with respect to semver, we do not strictly consider bumping the
+  minimum version of Rust to be a breaking change, but adopt a conservative
+  stance as a compromise.
+* Octal syntax in regular expressions has been disabled by default. This
+  permits better error messages that inform users that backreferences aren't
+  available. Octal syntax can be re-enabled via the corresponding option on
+  `RegexBuilder`.
+* `(?-u:\B)` is no longer allowed in Unicode regexes since it can match at
+  invalid UTF-8 code unit boundaries. `(?-u:\b)` is still allowed in Unicode
+  regexes.
+* The `From<regex_syntax::Error>` impl has been removed. This formally removes
+  the public dependency on `regex-syntax`.
+* A new feature, `use_std`, has been added and enabled by default. Disabling
+  the feature will result in a compilation error. In the future, this may
+  permit us to support `no_std` environments (w/ `alloc`) in a backwards
+  compatible way.
+
+For more information and discussion, please see
+[1.0 release tracking issue](https://github.com/rust-lang/regex/issues/457).
+
+
+0.2.11 (2018-05-01)
+===================
+This release primarily contains bug fixes. Some of them resolve bugs where
+the parser could panic.
+
+New features:
+
+* [FEATURE #459](https://github.com/rust-lang/regex/pull/459):
+  Include C++'s standard regex library and Boost's regex library in the
+  benchmark harness. We now include D/libphobos, C++/std, C++/boost, Oniguruma,
+  PCRE1, PCRE2, RE2 and Tcl in the harness.
+
+Bug fixes:
+
+* [BUG #445](https://github.com/rust-lang/regex/issues/445):
+  Clarify order of indices returned by RegexSet match iterator.
+* [BUG #461](https://github.com/rust-lang/regex/issues/461):
+  Improve error messages for invalid regexes like `[\d-a]`.
+* [BUG #464](https://github.com/rust-lang/regex/issues/464):
+  Fix a bug in the error message pretty printer that could cause a panic when
+  a regex contained a literal `\n` character.
+* [BUG #465](https://github.com/rust-lang/regex/issues/465):
+  Fix a panic in the parser that was caused by applying a repetition operator
+  to `(?flags)`.
+* [BUG #466](https://github.com/rust-lang/regex/issues/466):
+  Fix a bug where `\pC` was not recognized as an alias for `\p{Other}`.
+* [BUG #470](https://github.com/rust-lang/regex/pull/470):
+  Fix a bug where literal searches did more work than necessary for anchored
+  regexes.
+
+
+0.2.10 (2018-03-16)
+===================
+This release primarily updates the regex crate to changes made in `std::arch`
+on nightly Rust.
+
+New features:
+
+* [FEATURE #458](https://github.com/rust-lang/regex/pull/458):
+  The `Hir` type in `regex-syntax` now has a printer.
+
+
+0.2.9 (2018-03-12)
+==================
+This release introduces a new nightly only feature, `unstable`, which enables
+SIMD optimizations for certain types of regexes. No additional compile time
+options are necessary, and the regex crate will automatically choose the
+best CPU features at run time. As a result, the `simd` (nightly only) crate
+dependency has been dropped.
+
+New features:
+
+* [FEATURE #456](https://github.com/rust-lang/regex/pull/456):
+  The regex crate now includes AVX2 optimizations in addition to the extant
+  SSSE3 optimization.
+
+Bug fixes:
+
+* [BUG #455](https://github.com/rust-lang/regex/pull/455):
+  Fix a bug where `(?x)[ / - ]` failed to parse.
+
+
+0.2.8 (2018-03-12)
+==================
+Bug gixes:
+
+* [BUG #454](https://github.com/rust-lang/regex/pull/454):
+  Fix a bug in the nest limit checker being too aggressive.
+
+
+0.2.7 (2018-03-07)
+==================
+This release includes a ground-up rewrite of the regex-syntax crate, which has
+been in development for over a year.
+
+New features:
+
+* Error messages for invalid regexes have been greatly improved. You get these
+  automatically; you don't need to do anything. In addition to better
+  formatting, error messages will now explicitly call out the use of look
+  around. When regex 1.0 is released, this will happen for backreferences as
+  well.
+* Full support for intersection, difference and symmetric difference of
+  character classes. These can be used via the `&&`, `--` and `~~` binary
+  operators within classes.
+* A Unicode Level 1 conformat implementation of `\p{..}` character classes.
+  Things like `\p{scx:Hira}`, `\p{age:3.2}` or `\p{Changes_When_Casefolded}`
+  now work. All property name and value aliases are supported, and properties
+  are selected via loose matching. e.g., `\p{Greek}` is the same as
+  `\p{G r E e K}`.
+* A new `UNICODE.md` document has been added to this repository that
+  exhaustively documents support for UTS#18.
+* Empty sub-expressions are now permitted in most places. That is, `()+` is
+  now a valid regex.
+* Almost everything in regex-syntax now uses constant stack space, even when
+  performing anaylsis that requires structural induction. This reduces the risk
+  of a user provided regular expression causing a stack overflow.
+* [FEATURE #174](https://github.com/rust-lang/regex/issues/174):
+  The `Ast` type in `regex-syntax` now contains span information.
+* [FEATURE #424](https://github.com/rust-lang/regex/issues/424):
+  Support `\u`, `\u{...}`, `\U` and `\U{...}` syntax for specifying code points
+  in a regular expression.
+* [FEATURE #449](https://github.com/rust-lang/regex/pull/449):
+  Add a `Replace::by_ref` adapter for use of a replacer without consuming it.
+
+Bug fixes:
+
+* [BUG #446](https://github.com/rust-lang/regex/issues/446):
+  We re-enable the Boyer-Moore literal matcher.
+
+
+0.2.6 (2018-02-08)
+==================
+Bug fixes:
+
+* [BUG #446](https://github.com/rust-lang/regex/issues/446):
+  Fixes a bug in the new Boyer-Moore searcher that results in a match failure.
+  We fix this bug by temporarily disabling Boyer-Moore.
+
+
+0.2.5 (2017-12-30)
+==================
+Bug fixes:
+
+* [BUG #437](https://github.com/rust-lang/regex/issues/437):
+  Fixes a bug in the new Boyer-Moore searcher that results in a panic.
+
+
+0.2.4 (2017-12-30)
+==================
+New features:
+
+* [FEATURE #348](https://github.com/rust-lang/regex/pull/348):
+  Improve performance for capture searches on anchored regex.
+  (Contributed by @ethanpailes. Nice work!)
+* [FEATURE #419](https://github.com/rust-lang/regex/pull/419):
+  Expand literal searching to include Tuned Boyer-Moore in some cases.
+  (Contributed by @ethanpailes. Nice work!)
+
+Bug fixes:
+
+* [BUG](https://github.com/rust-lang/regex/pull/436):
+  The regex compiler plugin has been removed.
+* [BUG](https://github.com/rust-lang/regex/pull/436):
+  `simd` has been bumped to `0.2.1`, which fixes a Rust nightly build error.
+* [BUG](https://github.com/rust-lang/regex/pull/436):
+  Bring the benchmark harness up to date.
+
+
+0.2.3 (2017-11-30)
+==================
+New features:
+
+* [FEATURE #374](https://github.com/rust-lang/regex/pull/374):
+  Add `impl From<Match> for &str`.
+* [FEATURE #380](https://github.com/rust-lang/regex/pull/380):
+  Derive `Clone` and `PartialEq` on `Error`.
+* [FEATURE #400](https://github.com/rust-lang/regex/pull/400):
+  Update to Unicode 10.
+
+Bug fixes:
+
+* [BUG #375](https://github.com/rust-lang/regex/issues/375):
+  Fix a bug that prevented the bounded backtracker from terminating.
+* [BUG #393](https://github.com/rust-lang/regex/issues/393),
+  [BUG #394](https://github.com/rust-lang/regex/issues/394):
+  Fix bug with `replace` methods for empty matches.
+
+
 0.2.2 (2017-05-21)
 ==================
 New features:
 
 * [FEATURE #341](https://github.com/rust-lang/regex/issues/341):
   Support nested character classes and intersection operation.
   For example, `[\p{Greek}&&\pL]` matches greek letters and
   `[[0-9]&&[^4]]` matches every decimal digit except `4`.
--- a/third_party/rust/regex/Cargo.toml
+++ b/third_party/rust/regex/Cargo.toml
@@ -1,94 +1,100 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g. crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
 [package]
 name = "regex"
-version = "0.2.2"  #:version
+version = "1.0.0"
 authors = ["The Rust Project Developers"]
+description = "An implementation of regular expressions for Rust. This implementation uses\nfinite automata and guarantees linear time matching on all inputs.\n"
+homepage = "https://github.com/rust-lang/regex"
+documentation = "https://docs.rs/regex"
+readme = "README.md"
+categories = ["text-processing"]
 license = "MIT/Apache-2.0"
-readme = "README.md"
 repository = "https://github.com/rust-lang/regex"
-documentation = "https://doc.rust-lang.org/regex"
-homepage = "https://github.com/rust-lang/regex"
-description = """
-An implementation of regular expressions for Rust. This implementation uses
-finite automata and guarantees linear time matching on all inputs.
-"""
-categories = ["text-processing"]
+[profile.test]
+debug = true
 
-[dependencies]
-# For very fast prefix literal matching.
-aho-corasick = "0.6.0"
-# For skipping along search text quickly when a leading byte is known.
-memchr = "1.0.0"
-# For managing regex caches quickly across multiple threads.
-thread_local = "0.3.2"
-# For parsing regular expressions.
-regex-syntax = { path = "regex-syntax", version = "0.4.1" }
-# For accelerating text search.
-simd = { version = "0.1.1", optional = true }
-# For compiling UTF-8 decoding into automata.
-utf8-ranges = "1.0.0"
+[profile.bench]
+debug = true
 
-[dev-dependencies]
-# For examples.
-lazy_static = "0.2.2"
-# For property based tests.
-quickcheck = { version = "0.4.1", default-features = false }
-# For generating random test data.
-rand = "0.3.15"
-
-[features]
-# Enable to use the unstable pattern traits defined in std.
-pattern = []
-# Enable to use simd acceleration.
-simd-accel = ["simd"]
+[profile.release]
+debug = true
 
 [lib]
-# There are no benchmarks in the library code itself
 bench = false
 
-# Run the test suite on the default behavior of Regex::new.
-# This includes a mish mash of NFAs and DFAs, which are chosen automatically
-# based on the regex. We test both of the NFA implementations by forcing their
-# usage with the test definitions below. (We can't test the DFA implementations
-# in the same way since they can't be used for every regex tested.)
 [[test]]
+name = "default"
 path = "tests/test_default.rs"
-name = "default"
 
-# The same as the default tests, but run on bytes::Regex.
 [[test]]
+name = "default-bytes"
 path = "tests/test_default_bytes.rs"
-name = "default-bytes"
 
-# Run the test suite on the NFA algorithm over Unicode codepoints.
 [[test]]
-path = "tests/test_nfa.rs"
 name = "nfa"
+path = "tests/test_nfa.rs"
 
-# Run the test suite on the NFA algorithm over bytes that match UTF-8 only.
+[[test]]
+name = "nfa-utf8bytes"
+path = "tests/test_nfa_utf8bytes.rs"
+
 [[test]]
-path = "tests/test_nfa_utf8bytes.rs"
-name = "nfa-utf8bytes"
+name = "nfa-bytes"
+path = "tests/test_nfa_bytes.rs"
+
+[[test]]
+name = "backtrack"
+path = "tests/test_backtrack.rs"
+
+[[test]]
+name = "backtrack-utf8bytes"
+path = "tests/test_backtrack_utf8bytes.rs"
 
-# Run the test suite on the NFA algorithm over arbitrary bytes.
 [[test]]
-path = "tests/test_nfa_bytes.rs"
-name = "nfa-bytes"
+name = "backtrack-bytes"
+path = "tests/test_backtrack_bytes.rs"
+[dependencies.aho-corasick]
+version = "0.6.0"
 
-# Run the test suite on the backtracking engine over Unicode codepoints.
-[[test]]
-path = "tests/test_backtrack.rs"
-name = "backtrack"
+[dependencies.memchr]
+version = "2.0.0"
+
+[dependencies.regex-syntax]
+version = "0.6.0"
+
+[dependencies.thread_local]
+version = "0.3.2"
 
-# Run the test suite on the backtracking engine over bytes that match UTF-8
-# only.
-[[test]]
-path = "tests/test_backtrack_utf8bytes.rs"
-name = "backtrack-utf8bytes"
+[dependencies.utf8-ranges]
+version = "1.0.0"
+[dev-dependencies.lazy_static]
+version = "1"
+
+[dev-dependencies.quickcheck]
+version = "0.6"
+default-features = false
 
-# Run the test suite on the backtracking engine over arbitrary bytes.
-[[test]]
-path = "tests/test_backtrack_bytes.rs"
-name = "backtrack-bytes"
+[dev-dependencies.rand]
+version = "0.4"
 
-[profile.test]
-debug = true
+[features]
+default = ["use_std"]
+pattern = []
+unstable = ["pattern"]
+use_std = []
+[badges.appveyor]
+repository = "rust-lang-libs/regex"
+
+[badges.travis-ci]
+repository = "rust-lang/regex"
--- a/third_party/rust/regex/HACKING.md
+++ b/third_party/rust/regex/HACKING.md
@@ -180,47 +180,46 @@ engine. This scratch space is used durin
 DFA, it contains compiled states that are reused on subsequent searches).
 
 ### Programs
 
 A regular expression program is essentially a sequence of opcodes produced by
 the compiler plus various facts about the regular expression (such as whether
 it is anchored, its capture names, etc.).
 
-### The regex! macro (or why `regex::internal` exists)
-
-The `regex!` macro is defined in the `regex_macros` crate as a compiler plugin,
-which is maintained in this repository. The `regex!` macro compiles a regular
-expression at compile time into specialized Rust code.
+### The regex! macro
 
-The `regex!` macro was written when this library was first conceived and
-unfortunately hasn't changed much since then. In particular, it encodes the
-entire Pike VM into stack allocated space (no heap allocation is done). When
-`regex!` was first written, this provided a substantial speed boost over
-so-called "dynamic" regexes compiled at runtime, and in particular had much
-lower overhead per match. This was because the only matching engine at the
-time was the Pike VM. The addition of other matching engines has inverted
-the relationship; the `regex!` macro is almost never faster than the dynamic
-variant. (In fact, it is typically substantially slower.)
+The `regex!` macro no longer exists. It was developed in a bygone era as a
+compiler plugin during the infancy of the regex crate. Back then, then only
+matching engine in the crate was the Pike VM. The `regex!` macro was, itself,
+also a Pike VM. The only advantages it offered over the dynamic Pike VM that
+was built at runtime were the following:
+
+  1. Syntax checking was done at compile time. Your Rust program wouldn't
+     compile if your regex didn't compile.
+  2. Reduction of overhead that was proportional to the size of the regex.
+     For the most part, this overhead consisted of heap allocation, which
+     was nearly eliminated in the compiler plugin.
 
-In order to build the `regex!` macro this way, it must have access to some
-internals of the regex library, which is in a distinct crate. (Compiler plugins
-must be part of a distinct crate.) Namely, it must be able to compile a regular
-expression and access its opcodes. The necessary internals are exported as part
-of the top-level `internal` module in the regex library, but is hidden from
-public documentation. In order to present a uniform API between programs build
-by the `regex!` macro and their dynamic analoges, the `Regex` type is an enum
-whose variants are hidden from public documentation.
+The main takeaway here is that the compiler plugin was a marginally faster
+version of a slow regex engine. As the regex crate evolved, it grew other regex
+engines (DFA, bounded backtracker) and sophisticated literal optimizations.
+The regex macro didn't keep pace, and it therefore became (dramatically) slower
+than the dynamic engines. The only reason left to use it was for the compile
+time guarantee that your regex is correct. Fortunately, Clippy (the Rust lint
+tool) has a lint that checks your regular expression validity, which mostly
+replaces that use case.
 
-In the future, the `regex!` macro should probably work more like Ragel, but
-it's not clear how hard this is. In particular, the `regex!` macro should be
-able to support all the features of dynamic regexes, which may be hard to do
-with a Ragel-style implementation approach. (Which somewhat suggests that the
-`regex!` macro may also need to grow conditional execution logic like the
-dynamic variants, which seems rather grotesque.)
+Additionally, the regex compiler plugin stopped receiving maintenance. Nobody
+complained. At that point, it seemed prudent to just remove it.
+
+Will a compiler plugin be brought back? The future is murky, but there is
+definitely an opportunity there to build something that is faster than the
+dynamic engines in some cases. But it will be challenging! As of now, there
+are no plans to work on this.
 
 
 ## Testing
 
 A key aspect of any mature regex library is its test suite. A subset of the
 tests in this library come from Glenn Fowler's AT&T test suite (its online
 presence seems gone at the time of writing). The source of the test suite is
 located in src/testdata. The scripts/regex-match-tests.py takes the test suite
@@ -231,17 +230,16 @@ tests/tests.rs. Some of these tests were
 
 The biggest source of complexity in the tests is related to answering this
 question: how can we reuse the tests to check all of our matching engines? One
 approach would have been to encode every test into some kind of format (like
 the AT&T test suite) and code generate tests for each matching engine. The
 approach we use in this library is to create a Cargo.toml entry point for each
 matching engine we want to test. The entry points are:
 
-* `tests/test_plugin.rs` - tests the `regex!` macro
 * `tests/test_default.rs` - tests `Regex::new`
 * `tests/test_default_bytes.rs` - tests `bytes::Regex::new`
 * `tests/test_nfa.rs` - tests `Regex::new`, forced to use the NFA
   algorithm on every regex.
 * `tests/test_nfa_bytes.rs` - tests `Regex::new`, forced to use the NFA
   algorithm on every regex and use *arbitrary* byte based programs.
 * `tests/test_nfa_utf8bytes.rs` - tests `Regex::new`, forced to use the NFA
   algorithm on every regex and use *UTF-8* byte based programs.
@@ -256,60 +254,77 @@ The lazy DFA and pure literal engines ar
 they cannot be used on every regular expression. Instead, we rely on
 `tests/test_dynamic.rs` to test the lazy DFA and literal engines when possible.
 
 Since the tests are repeated several times, and because `cargo test` runs all
 entry points, it can take a while to compile everything. To reduce compile
 times slightly, try using `cargo test --test default`, which will only use the
 `tests/test_default.rs` entry point.
 
-N.B. To run tests for the `regex!` macro, use:
-
-    cargo test --manifest-path regex_macros/Cargo.toml
-
 
 ## Benchmarking
 
 The benchmarking in this crate is made up of many micro-benchmarks. Currently,
 there are two primary sets of benchmarks: the benchmarks that were adopted
-at this library's inception (in `benches/src/misc.rs`) and a newer set of
+at this library's inception (in `bench/src/misc.rs`) and a newer set of
 benchmarks meant to test various optimizations. Specifically, the latter set
-contain some analysis and are in `benches/src/sherlock.rs`. Also, the latter
+contain some analysis and are in `bench/src/sherlock.rs`. Also, the latter
 set are all executed on the same lengthy input whereas the former benchmarks
 are executed on strings of varying length.
 
 There is also a smattering of benchmarks for parsing and compilation.
 
 Benchmarks are in a separate crate so that its dependencies can be managed
 separately from the main regex crate.
 
 Benchmarking follows a similarly wonky setup as tests. There are multiple entry
 points:
 
-* `bench_rust_plugin.rs` - benchmarks the `regex!` macro
 * `bench_rust.rs` - benchmarks `Regex::new`
 * `bench_rust_bytes.rs` benchmarks `bytes::Regex::new`
 * `bench_pcre.rs` - benchmarks PCRE
 * `bench_onig.rs` - benchmarks Oniguruma
 
 The PCRE and Oniguruma benchmarks exist as a comparison point to a mature
 regular expression library. In general, this regex library compares favorably
 (there are even a few benchmarks that PCRE simply runs too slowly on or
 outright can't execute at all). I would love to add other regular expression
 library benchmarks (especially RE2).
 
 If you're hacking on one of the matching engines and just want to see
 benchmarks, then all you need to run is:
 
-    $ ./run-bench rust
+    $ ./bench/run rust
 
 If you want to compare your results with older benchmarks, then try:
 
-    $ ./run-bench rust | tee old
+    $ ./bench/run rust | tee old
     $ ... make it faster
-    $ ./run-bench rust | tee new
-    $ cargo-benchcmp old new --improvements
+    $ ./bench/run rust | tee new
+    $ cargo benchcmp old new --improvements
 
 The `cargo-benchcmp` utility is available here:
 https://github.com/BurntSushi/cargo-benchcmp
 
-The `run-bench` utility can run benchmarks for PCRE and Oniguruma too. See
-`./run-bench --help`.
+The `./bench/run` utility can run benchmarks for PCRE and Oniguruma too. See
+`./bench/bench --help`.
+
+## Dev Docs
+
+When digging your teeth into the codebase for the first time, the
+crate documentation can be a great resource. By default `rustdoc`
+will strip out all documentation of private crate members in an
+effort to help consumers of the crate focus on the *interface*
+without having to concern themselves with the *implementation*.
+Normally this is a great thing, but if you want to start hacking
+on regex internals it is not what you want. Many of the private members
+of this crate are well documented with rustdoc style comments, and
+it would be a shame to miss out on the opportunity that presents.
+You can generate the private docs with:
+
+```
+$ rustdoc --crate-name docs src/lib.rs -o target/doc -L target/debug/deps --no-defaults --passes collapse-docs --passes unindent-comments
+```
+
+Then just point your browser at `target/doc/regex/index.html`.
+
+See https://github.com/rust-lang/rust/issues/15347 for more info
+about generating developer docs for internal use.
--- a/third_party/rust/regex/PERFORMANCE.md
+++ b/third_party/rust/regex/PERFORMANCE.md
@@ -1,13 +1,13 @@
 Your friendly guide to understanding the performance characteristics of this
 crate.
 
 This guide assumes some familiarity with the public API of this crate, which
-can be found here: http://doc.rust-lang.org/regex/regex/index.html
+can be found here: https://docs.rs/regex
 
 ## Theory vs. Practice
 
 One of the design goals of this crate is to provide worst case linear time
 behavior with respect to the text searched using finite state automata. This
 means that, *in theory*, the performance of this crate is much better than most
 regex implementations, which typically use backtracking which has worst case
 exponential time.
--- a/third_party/rust/regex/README.md
+++ b/third_party/rust/regex/README.md
@@ -6,26 +6,27 @@ like look around and backreferences. In 
 linear time with respect to the size of the regular expression and search text.
 Much of the syntax and implementation is inspired
 by [RE2](https://github.com/google/re2).
 
 [![Build Status](https://travis-ci.org/rust-lang/regex.svg?branch=master)](https://travis-ci.org/rust-lang/regex)
 [![Build status](https://ci.appveyor.com/api/projects/status/github/rust-lang/regex?svg=true)](https://ci.appveyor.com/project/rust-lang-libs/regex)
 [![Coverage Status](https://coveralls.io/repos/github/rust-lang/regex/badge.svg?branch=master)](https://coveralls.io/github/rust-lang/regex?branch=master)
 [![](http://meritbadge.herokuapp.com/regex)](https://crates.io/crates/regex)
+[![Rust](https://img.shields.io/badge/rust-1.20%2B-blue.svg?maxAge=3600)](https://github.com/rust-lang/regex)
 
 ### Documentation
 
-[Module documentation with examples](https://doc.rust-lang.org/regex).
-The module documentation also include a comprehensive description of the syntax
-supported.
+[Module documentation with examples](https://docs.rs/regex).
+The module documentation also includes a comprehensive description of the
+syntax supported.
 
 Documentation with examples for the various matching functions and iterators
 can be found on the
-[`Regex` type](https://doc.rust-lang.org/regex/regex/struct.Regex.html).
+[`Regex` type](https://docs.rs/regex/*/regex/struct.Regex.html).
 
 ### Usage
 
 Add this to your `Cargo.toml`:
 
 ```toml
 [dependencies]
 regex = "0.2"
@@ -183,58 +184,55 @@ let matches: Vec<_> = set.matches("fooba
 assert_eq!(matches, vec![0, 2, 3, 4, 6]);
 
 // You can also test whether a particular regex matched:
 let matches = set.matches("foobar");
 assert!(!matches.matched(5));
 assert!(matches.matched(6));
 ```
 
-### Usage: `regex!` compiler plugin
-
-**WARNING**: The `regex!` compiler plugin is orders of magnitude slower than
-the normal `Regex::new(...)` usage. You should not use the compiler plugin
-unless you have a very special reason for doing so. The performance difference
-may be the temporary, but the path forward at this point isn't clear.
-
-The `regex!` compiler plugin will compile your regexes at compile time. **This
-only works with a nightly compiler.**
-
-Here is a small example:
-
-```rust
-#![feature(plugin)]
+### Usage: enable SIMD optimizations
 
-#![plugin(regex_macros)]
-extern crate regex;
-
-fn main() {
-    let re = regex!(r"(\d{4})-(\d{2})-(\d{2})");
-    let caps = re.captures("2010-03-14").unwrap();
+This crate provides an `unstable` feature that can only be enabled on nightly
+Rust. When this feature is enabled, the regex crate will use SIMD optimizations
+if your CPU supports them. No additional compile time flags are required; the
+regex crate will detect your CPU support at runtime.
 
-    assert_eq!("2010", caps[1]);
-    assert_eq!("03", caps[2]);
-    assert_eq!("14", caps[3]);
-}
-```
-
-Notice that we never `unwrap` the result of `regex!`. This is because your
-*program* won't compile if the regex doesn't compile. (Try `regex!("(")`.)
+When `std::arch` becomes stable, then these optimizations will be enabled
+automatically.
 
 
 ### Usage: a regular expression parser
 
 This repository contains a crate that provides a well tested regular expression
-parser and abstract syntax. It provides no facilities for compilation or
-execution. This may be useful if you're implementing your own regex engine or
-otherwise need to do analysis on the syntax of a regular expression. It is
-otherwise not recommended for general use.
+parser, abstract syntax and a high-level intermediate representation for
+convenient analysis. It provides no facilities for compilation or execution.
+This may be useful if you're implementing your own regex engine or otherwise
+need to do analysis on the syntax of a regular expression. It is otherwise not
+recommended for general use.
 
-[Documentation for `regex-syntax` with
-examples](https://doc.rust-lang.org/regex/regex_syntax/index.html).
+[Documentation `regex-syntax`.](https://docs.rs/regex-syntax)
+
+
+### Minimum Rust version policy
+
+This crate's minimum supported `rustc` version is `1.20.0`.
 
-# License
+The current **tentative** policy is that the minimum Rust version required to
+use this crate can be increased in minor version updates. For example, if
+regex 1.0.0 requires Rust 1.20.0, then regex 1.0.z for all values of `z` will
+also require Rust 1.20.0 or newer. However, regex 1.y for `y > 0` may require
+a newer minimum version of Rust.
+
+In general, this crate will be conservative with respect to the minimum
+supported version of Rust.
+
 
-`regex` is primarily distributed under the terms of both the MIT license and
-the Apache License (Version 2.0), with portions covered by various BSD-like
-licenses.
+### License
+
+This project is licensed under either of
 
-See LICENSE-APACHE, and LICENSE-MIT for details.
+ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+   http://www.apache.org/licenses/LICENSE-2.0)
+ * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+   http://opensource.org/licenses/MIT)
+
+at your option.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/UNICODE.md
@@ -0,0 +1,250 @@
+# Unicode conformance
+
+This document describes the regex crate's conformance to Unicode's
+[UTS#18](http://unicode.org/reports/tr18/)
+report, which lays out 3 levels of support: Basic, Extended and Tailored.
+
+Full support for Level 1 ("Basic Unicode Support") is provided with two
+exceptions:
+
+1. Line boundaries are not Unicode aware. Namely, only the `\n`
+   (`END OF LINE`) character is recognized as a line boundary.
+2. The compatibility properties specified by
+   [RL1.2a](http://unicode.org/reports/tr18/#RL1.2a)
+   are ASCII-only definitions.
+
+Little to no support is provided for either Level 2 or Level 3. For the most
+part, this is because the features are either complex/hard to implement, or at
+the very least, very difficult to implement without sacrificing performance.
+For example, tackling canonical equivalence such that matching worked as one
+would expect regardless of normalization form would be a significant
+undertaking. This is at least partially a result of the fact that this regex
+engine is based on finite automata, which admits less flexibility normally
+associated with backtracking implementations.
+
+
+## RL1.1 Hex Notation
+
+[UTS#18 RL1.1](https://unicode.org/reports/tr18/#Hex_notation)
+
+Hex Notation refers to the ability to specify a Unicode code point in a regular
+expression via its hexadecimal code point representation. This is useful in
+environments that have poor Unicode font rendering or if you need to express a
+code point that is not normally displayable. All forms of hexadecimal notation
+are supported
+
+    \x7F        hex character code (exactly two digits)
+    \x{10FFFF}  any hex character code corresponding to a Unicode code point
+    \u007F      hex character code (exactly four digits)
+    \u{7F}      any hex character code corresponding to a Unicode code point
+    \U0000007F  hex character code (exactly eight digits)
+    \U{7F}      any hex character code corresponding to a Unicode code point
+
+Briefly, the `\x{...}`, `\u{...}` and `\U{...}` are all exactly equivalent ways
+of expressing hexadecimal code points. Any number of digits can be written
+within the brackets. In contrast, `\xNN`, `\uNNNN`, `\UNNNNNNNN` are all
+fixed-width variants of the same idea.
+
+Note that when Unicode mode is disabled, any non-ASCII Unicode codepoint is
+banned. Additionally, the `\xNN` syntax represents arbitrary bytes when Unicode
+mode is disabled. That is, the regex `\xFF` matches the Unicode codepoint
+U+00FF (encoded as `\xC3\xBF` in UTF-8) while the regex `(?-u)\xFF` matches
+the literal byte `\xFF`.
+
+
+## RL1.2 Properties
+
+[UTS#18 RL1.2](https://unicode.org/reports/tr18/#Categories)
+
+Full support for Unicode property syntax is provided. Unicode properties
+provide a convenient way to construct character classes of groups of code
+points specified by Unicode. The regex crate does not provide exhaustive
+support, but covers a useful subset. In particular:
+
+* [General categories](http://unicode.org/reports/tr18/#General_Category_Property)
+* [Scripts and Script Extensions](http://unicode.org/reports/tr18/#Script_Property)
+* [Age](http://unicode.org/reports/tr18/#Age)
+* A smattering of boolean properties, including all of those specified by
+  [RL1.2](http://unicode.org/reports/tr18/#RL1.2) explicitly.
+
+In all cases, property name and value abbreviations are supported, and all
+names/values are matched loosely without regard for case, whitespace or
+underscores. Property name aliases can be found in Unicode's
+[`PropertyAliases.txt`](http://www.unicode.org/Public/UCD/latest/ucd/PropertyAliases.txt)
+file, while property value aliases can be found in Unicode's
+[`PropertyValueAliases.txt`](http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt)
+file.
+
+The syntax supported is also consistent with the UTS#18 recommendation:
+
+* `\p{Greek}` selects the `Greek` script. Equivalent expressions follow:
+  `\p{sc:Greek}`, `\p{Script:Greek}`, `\p{Sc=Greek}`, `\p{script=Greek}`,
+  `\P{sc!=Greek}`. Similarly for `General_Category` (or `gc` for short) and
+  `Script_Extensions` (or `scx` for short).
+* `\p{age:3.2}` selects all code points in Unicode 3.2.
+* `\p{Alphabetic}` selects the "alphabetic" property and can be abbreviated
+  via `\p{alpha}` (for example).
+* Single letter variants for properties with single letter abbreviations.
+  For example, `\p{Letter}` can be equivalently written as `\pL`.
+
+The following is a list of all properties supported by the regex crate (starred
+properties correspond to properties required by RL1.2):
+
+* `General_Category` \* (including `Any`, `ASCII` and `Assigned`)
+* `Script` \*
+* `Script_Extensions` \*
+* `Age`
+* `ASCII_Hex_Digit`
+* `Alphabetic` \*
+* `Bidi_Control`
+* `Case_Ignorable`
+* `Cased`
+* `Changes_When_Casefolded`
+* `Changes_When_Casemapped`
+* `Changes_When_Lowercased`
+* `Changes_When_Titlecased`
+* `Changes_When_Uppercased`
+* `Dash`
+* `Default_Ignorable_Code_Point` \*
+* `Deprecated`
+* `Diacritic`
+* `Extender`
+* `Grapheme_Base`
+* `Grapheme_Extend`
+* `Hex_Digit`
+* `IDS_Binary_Operator`
+* `IDS_Trinary_Operator`
+* `ID_Continue`
+* `ID_Start`
+* `Join_Control`
+* `Logical_Order_Exception`
+* `Lowercase` \*
+* `Math`
+* `Noncharacter_Code_Point` \*
+* `Pattern_Syntax`
+* `Pattern_White_Space`
+* `Prepended_Concatenation_Mark`
+* `Quotation_Mark`
+* `Radical`
+* `Regional_Indicator`
+* `Sentence_Terminal`
+* `Soft_Dotted`
+* `Terminal_Punctuation`
+* `Unified_Ideograph`
+* `Uppercase` \*
+* `Variation_Selector`
+* `White_Space` \*
+* `XID_Continue`
+* `XID_Start`
+
+
+## RL1.2a Compatibility Properties
+
+[UTS#18 RL1.2a](http://unicode.org/reports/tr18/#RL1.2a)
+
+The regex crate only provides ASCII definitions of the
+[compatibility properties documented in UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties)
+(sans the `\X` class, for matching grapheme clusters, which isn't provided
+at all). This is because it seems to be consistent with most other regular
+expression engines, and in particular, because these are often referred to as
+"ASCII" or "POSIX" character classes.
+
+Note that the `\w`, `\s` and `\d` character classes **are** Unicode aware.
+Their traditional ASCII definition can be used by disabling Unicode. That is,
+`[[:word:]]` and `(?-u)\w` are equivalent.
+
+
+## RL1.3 Subtraction and Intersection
+
+[UTS#18 RL1.3](http://unicode.org/reports/tr18/#Subtraction_and_Intersection)
+
+The regex crate provides full support for nested character classes, along with
+union, intersection (`&&`), difference (`--`) and symmetric difference (`~~`)
+operations on arbitrary character classes.
+
+For example, to match all non-ASCII letters, you could use either
+`[\p{Letter}--\p{Ascii}]` (difference) or `[\p{Letter}&&[^\p{Ascii}]]`
+(intersecting the negation).
+
+
+## RL1.4 Simple Word Boundaries
+
+[UTS#18 RL1.4](http://unicode.org/reports/tr18/#Simple_Word_Boundaries)
+
+The regex crate provides basic Unicode aware word boundary assertions. A word
+boundary assertion can be written as `\b`, or `\B` as its negation. A word
+boundary negation corresponds to a zero-width match, where its adjacent
+characters correspond to word and non-word, or non-word and word characters.
+
+Conformance in this case chooses to define word character in the same way that
+the `\w` character class is defined: a code point that is a member of one of
+the following classes:
+
+* `\p{Alphabetic}`
+* `\p{Join_Control}`
+* `\p{gc:Mark}`
+* `\p{gc:Decimal_Number}`
+* `\p{gc:Connector_Punctuation}`
+
+In particular, this differs slightly from the
+[prescription given in RL1.4](http://unicode.org/reports/tr18/#Simple_Word_Boundaries)
+but is permissible according to
+[UTS#18 Annex C](http://unicode.org/reports/tr18/#Compatibility_Properties).
+Namely, it is convenient and simpler to have `\w` and `\b` be in sync with
+one another.
+
+Finally, Unicode word boundaries can be disabled, which will cause ASCII word
+boundaries to be used instead. That is, `\b` is a Unicode word boundary while
+`(?-u)\b` is an ASCII-only word boundary. This can occasionally be beneficial
+if performance is important, since the implementation of Unicode word
+boundaries is currently sub-optimal on non-ASCII text.
+
+
+## RL1.5 Simple Loose Matches
+
+[UTS#18 RL1.5](http://unicode.org/reports/tr18/#Simple_Loose_Matches)
+
+The regex crate provides full support for case insensitive matching in
+accordance with RL1.5. That is, it uses the "simple" case folding mapping. The
+"simple" mapping was chosen because of a key convenient property: every
+"simple" mapping is a mapping from exactly one code point to exactly one other
+code point. This makes case insensitive matching of character classes, for
+example, straight-forward to implement.
+
+When case insensitive mode is enabled (e.g., `(?i)[a]` is equivalent to `a|A`),
+then all characters classes are case folded as well.
+
+
+## RL1.6 Line Boundaries
+
+[UTS#18 RL1.6](http://unicode.org/reports/tr18/#Line_Boundaries)
+
+The regex crate only provides support for recognizing the `\n` (`END OF LINE`)
+character as a line boundary. This choice was made mostly for implementation
+convenience, and to avoid performance cliffs that Unicode word boundaries are
+subject to.
+
+Ideally, it would be nice to at least support `\r\n` as a line boundary as
+well, and in theory, this could be done efficiently.
+
+
+## RL1.7 Code Points
+
+[UTS#18 RL1.7](http://unicode.org/reports/tr18/#Supplementary_Characters)
+
+The regex crate provides full support for Unicode code point matching. Namely,
+the fundamental atom of any match is always a single code point.
+
+Given Rust's strong ties to UTF-8, the following guarantees are also provided:
+
+* All matches are reported on valid UTF-8 code unit boundaries. That is, any
+  match range returned by the public regex API is guaranteed to successfully
+  slice the string that was searched.
+* By consequence of the above, it is impossible to match surrogode code points.
+  No support for UTF-16 is provided, so this is never necessary.
+
+Note that when Unicode mode is disabled, the fundamental atom of matching is
+no longer a code point but a single byte. When Unicode mode is disabled, many
+Unicode features are disabled as well. For example, `(?-u)\pL` is not a valid
+regex but `\pL(?-u)\xFF` (matches any Unicode `Letter` followed by the literal
+byte `\xFF`) is, for example.
--- a/third_party/rust/regex/appveyor.yml
+++ b/third_party/rust/regex/appveyor.yml
@@ -5,13 +5,15 @@ environment:
   - TARGET: i686-pc-windows-gnu
 install:
   - ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-nightly-${env:TARGET}.exe"
   - rust-nightly-%TARGET%.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
   - SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin
   - SET PATH=%PATH%;C:\MinGW\bin
   - rustc -V
   - cargo -V
-
 build: false
-
 test_script:
   - cargo test --verbose --jobs 4
+branches:
+  only:
+    - master
+    - auto
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/build.rs
@@ -0,0 +1,27 @@
+use std::env;
+use std::ffi::OsString;
+use std::process::Command;
+
+fn main() {
+    let rustc = env::var_os("RUSTC").unwrap_or(OsString::from("rustc"));
+    let output = Command::new(&rustc)
+        .arg("--version")
+        .output()
+        .unwrap()
+        .stdout;
+    let version = String::from_utf8(output).unwrap();
+
+    // If we're using nightly Rust, then we can enable vector optimizations.
+    // Note that these aren't actually activated unless the `unstable` feature
+    // is enabled.
+    //
+    // We also don't activate these if we've explicitly disabled auto
+    // optimizations. Disabling auto optimizations is intended for use in
+    // tests, so that we can reliably test fallback implementations.
+    if env::var_os("CARGO_CFG_REGEX_DISABLE_AUTO_OPTIMIZATIONS").is_none() {
+        if version.contains("nightly") {
+            println!("cargo:rustc-cfg=regex_runtime_teddy_ssse3");
+            println!("cargo:rustc-cfg=regex_runtime_teddy_avx2");
+        }
+    }
+}
--- a/third_party/rust/regex/ci/after_success.sh
+++ b/third_party/rust/regex/ci/after_success.sh
@@ -6,25 +6,16 @@
 set -e
 
 if [ "$TRAVIS_RUST_VERSION" != "nightly" ] || [ "$TRAVIS_PULL_REQUEST" != "false" ] || [ "$TRAVIS_BRANCH" != "master" ]; then
   exit 0
 fi
 
 env
 
-# Build and upload docs.
-echo '<meta http-equiv=refresh content=0;url=regex/index.html>' > target/doc/index.html
-ve=$(mktemp -d)
-virtualenv "$ve"
-"$ve"/bin/pip install --upgrade pip
-"$ve"/bin/pip install ghp-import
-"$ve"/bin/ghp-import -n target/doc
-git push -qf https://${TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
-
 # Install kcov.
 tmp=$(mktemp -d)
 pushd "$tmp"
 wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz
 tar zxf master.tar.gz
 mkdir kcov-master/build
 cd kcov-master/build
 cmake ..
--- a/third_party/rust/regex/ci/run-kcov
+++ b/third_party/rust/regex/ci/run-kcov
@@ -9,45 +9,31 @@ tests=(
   backtrack_utf8bytes
   backtrack_bytes
   nfa
   nfa_utf8bytes
   nfa_bytes
   regex
 )
 tmpdir=$(mktemp -d)
-with_plugin=
 coveralls_id=
 
 while true; do
   case "$1" in
-    --with-plugin)
-      with_plugin=yes
-      shift
-      ;;
     --coveralls-id)
       coveralls_id="$2"
       shift 2
       ;;
     *)
       break
       ;;
   esac
 done
 
-if [ -n "$with_plugin" ]; then
-  cargo test --manifest-path regex_macros/Cargo.toml --no-run --verbose
-  kcov \
-    --verify \
-    --include-pattern '/regex/src/' \
-    "$tmpdir/plugin" \
-    $(ls -t ./regex_macros/target/debug/plugin-* | head -n1)
-fi
-
-cargo test --no-run --verbose --jobs 4
+cargo test --no-run --verbose --jobs 4 --features unstable
 for t in ${tests[@]}; do
   kcov \
     --verify \
     --include-pattern '/regex/src/' \
     "$tmpdir/$t" \
     $(find ./target/debug -executable -wholename "./target/debug/$t-*" | head -n1)
 done
 
--- a/third_party/rust/regex/ci/script.sh
+++ b/third_party/rust/regex/ci/script.sh
@@ -1,35 +1,37 @@
 #!/bin/sh
 
 # This is the main CI script for testing the regex crate and its sub-crates.
 
-set -e
+set -ex
 
 # Builds the regex crate and runs tests.
 cargo build --verbose
 cargo doc --verbose
+
+# Run tests. If we have nightly, then enable our nightly features.
 if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
-  cargo build --verbose --manifest-path regex-debug/Cargo.toml
-  RUSTFLAGS="-C target-feature=+ssse3" cargo test --verbose --features 'simd-accel pattern' --jobs 4
+  cargo test --verbose --features unstable
 else
-  cargo test --verbose --jobs 4
+  cargo test --verbose
 fi
 
 # Run a test that confirms the shootout benchmarks are correct.
 ci/run-shootout-test
 
 # Run tests on regex-syntax crate.
 cargo test --verbose --manifest-path regex-syntax/Cargo.toml
 cargo doc --verbose --manifest-path regex-syntax/Cargo.toml
 
 # Run tests on regex-capi crate.
 cargo build --verbose --manifest-path regex-capi/Cargo.toml
-(cd regex-capi/ctest && ./compile && LD_LIBRARY_PATH=../target/debug ./test)
-(cd regex-capi/examples && ./compile && LD_LIBRARY_PATH=../target/debug ./iter)
+(cd regex-capi/ctest && ./compile && LD_LIBRARY_PATH=../../target/debug ./test)
+(cd regex-capi/examples && ./compile && LD_LIBRARY_PATH=../../target/debug ./iter)
 
 # Make sure benchmarks compile. Don't run them though because they take a
-# very long time.
+# very long time. Also, check that we can build the regex-debug tool.
 if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
+  cargo build --verbose --manifest-path regex-debug/Cargo.toml
   for x in rust rust-bytes pcre1 onig; do
-    (cd bench && ./run $x --no-run)
+    (cd bench && ./run $x --no-run --verbose)
   done
 fi
--- a/third_party/rust/regex/examples/shootout-regex-dna-bytes.rs
+++ b/third_party/rust/regex/examples/shootout-regex-dna-bytes.rs
@@ -50,17 +50,17 @@ fn main() {
         (regex!("N"), &b"(a|c|g|t)"[..]),
         (regex!("R"), &b"(a|g)"[..]),
         (regex!("S"), &b"(c|g)"[..]),
         (regex!("V"), &b"(a|c|g)"[..]),
         (regex!("W"), &b"(a|t)"[..]),
         (regex!("Y"), &b"(c|t)"[..]),
     ];
     let mut seq = seq;
-    for (re, replacement) in substs.into_iter() {
+    for (re, replacement) in substs {
         seq = re.replace_all(&seq, replacement).into_owned();
     }
 
     for (variant, count) in counts {
         println!("{} {}", variant, count.join().unwrap());
     }
     println!("\n{}\n{}\n{}", ilen, clen, seq.len());
 }
--- a/third_party/rust/regex/examples/shootout-regex-dna-single.rs
+++ b/third_party/rust/regex/examples/shootout-regex-dna-single.rs
@@ -43,13 +43,13 @@ fn main() {
         (regex!("N"), "(a|c|g|t)"),
         (regex!("R"), "(a|g)"),
         (regex!("S"), "(c|g)"),
         (regex!("V"), "(a|c|g)"),
         (regex!("W"), "(a|t)"),
         (regex!("Y"), "(c|t)"),
     ];
     let mut seq = seq;
-    for (re, replacement) in substs.into_iter() {
+    for (re, replacement) in substs {
         seq = re.replace_all(&seq, replacement).into_owned();
     }
     println!("\n{}\n{}\n{}", ilen, clen, seq.len());
 }
--- a/third_party/rust/regex/examples/shootout-regex-dna.rs
+++ b/third_party/rust/regex/examples/shootout-regex-dna.rs
@@ -50,17 +50,17 @@ fn main() {
         (regex!("N"), "(a|c|g|t)"),
         (regex!("R"), "(a|g)"),
         (regex!("S"), "(c|g)"),
         (regex!("V"), "(a|c|g)"),
         (regex!("W"), "(a|t)"),
         (regex!("Y"), "(c|t)"),
     ];
     let mut seq = seq;
-    for (re, replacement) in substs.into_iter() {
+    for (re, replacement) in substs {
         seq = re.replace_all(&seq, replacement).into_owned();
     }
 
     for (variant, count) in counts {
         println!("{} {}", variant, count.join().unwrap());
     }
     println!("\n{}\n{}\n{}", ilen, clen, seq.len());
 }
--- a/third_party/rust/regex/src/backtrack.rs
+++ b/third_party/rust/regex/src/backtrack.rs
@@ -95,17 +95,17 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'
         prog: &'r Program,
         cache: &ProgramCache,
         matches: &'m mut [bool],
         slots: &'s mut [Slot],
         input: I,
         start: usize,
     ) -> bool {
         let mut cache = cache.borrow_mut();
-        let mut cache = &mut cache.backtrack;
+        let cache = &mut cache.backtrack;
         let start = input.at(start);
         let mut b = Bounded {
             prog: prog,
             input: input,
             matches: matches,
             slots: slots,
             m: cache,
         };
@@ -211,16 +211,19 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'
 
     fn step(&mut self, mut ip: InstPtr, mut at: InputAt) -> bool {
         use prog::Inst::*;
         loop {
             // This loop is an optimization to avoid constantly pushing/popping
             // from the stack. Namely, if we're pushing a job only to run it
             // next, avoid the push and just mutate `ip` (and possibly `at`)
             // in place.
+            if self.has_visited(ip, at) {
+                return false;
+            }
             match self.prog[ip] {
                 Match(slot) => {
                     if slot < self.matches.len() {
                         self.matches[slot] = true;
                     }
                     return true;
                 }
                 Save(ref inst) => {
@@ -270,19 +273,16 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'
                             ip = inst.goto;
                             at = self.input.at(at.next_pos());
                             continue;
                         }
                     }
                     return false;
                 }
             }
-            if self.has_visited(ip, at) {
-                return false;
-            }
         }
     }
 
     fn has_visited(&mut self, ip: InstPtr, at: InputAt) -> bool {
         let k = ip * (self.input.len() + 1) + at.pos();
         let k1 = k / BIT_SIZE;
         let k2 = usize_to_u32(1 << (k & (BIT_SIZE - 1)));
         if self.m.visited[k1] & k2 == 0 {
--- a/third_party/rust/regex/src/compile.rs
+++ b/third_party/rust/regex/src/compile.rs
@@ -8,20 +8,18 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
 use std::collections::HashMap;
 use std::iter;
 use std::result;
 use std::sync::Arc;
 
-use syntax::{
-    Expr, Repeater, CharClass, ClassRange, ByteClass, ByteRange,
-    is_word_byte,
-};
+use syntax::is_word_byte;
+use syntax::hir::{self, Hir};
 use utf8_ranges::{Utf8Range, Utf8Sequence, Utf8Sequences};
 
 use prog::{
     Program, Inst, InstPtr, EmptyLook,
     InstSave, InstSplit, InstEmptyLook, InstChar, InstRanges, InstBytes,
 };
 
 use Error;
@@ -118,478 +116,526 @@ impl Compiler {
 
     /// Compile a regular expression given its AST.
     ///
     /// The compiler is guaranteed to succeed unless the program exceeds the
     /// specified size limit. If the size limit is exceeded, then compilation
     /// stops and returns an error.
     pub fn compile(
         mut self,
-        exprs: &[Expr],
+        exprs: &[Hir],
     ) -> result::Result<Program, Error> {
         debug_assert!(exprs.len() >= 1);
         self.num_exprs = exprs.len();
         if exprs.len() == 1 {
             self.compile_one(&exprs[0])
         } else {
             self.compile_many(exprs)
         }
     }
 
-    fn compile_one(mut self, expr: &Expr) -> result::Result<Program, Error> {
+    fn compile_one(mut self, expr: &Hir) -> result::Result<Program, Error> {
         // If we're compiling a forward DFA and we aren't anchored, then
         // add a `.*?` before the first capture group.
         // Other matching engines handle this by baking the logic into the
         // matching engine itself.
         let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
         self.compiled.is_anchored_start = expr.is_anchored_start();
         self.compiled.is_anchored_end = expr.is_anchored_end();
         if self.compiled.needs_dotstar() {
-            dotstar_patch = try!(self.c_dotstar());
+            dotstar_patch = self.c_dotstar()?;
             self.compiled.start = dotstar_patch.entry;
         }
         self.compiled.captures = vec![None];
-        let patch = try!(self.c_capture(0, expr));
+        let patch = self.c_capture(0, expr)?;
         if self.compiled.needs_dotstar() {
             self.fill(dotstar_patch.hole, patch.entry);
         } else {
             self.compiled.start = patch.entry;
         }
         self.fill_to_next(patch.hole);
         self.compiled.matches = vec![self.insts.len()];
         self.push_compiled(Inst::Match(0));
         self.compile_finish()
     }
 
     fn compile_many(
         mut self,
-        exprs: &[Expr],
+        exprs: &[Hir],
     ) -> result::Result<Program, Error> {
         debug_assert!(exprs.len() > 1);
 
         self.compiled.is_anchored_start =
             exprs.iter().all(|e| e.is_anchored_start());
         self.compiled.is_anchored_end =
             exprs.iter().all(|e| e.is_anchored_end());
         let mut dotstar_patch = Patch { hole: Hole::None, entry: 0 };
         if self.compiled.needs_dotstar() {
-            dotstar_patch = try!(self.c_dotstar());
+            dotstar_patch = self.c_dotstar()?;
             self.compiled.start = dotstar_patch.entry;
         } else {
             self.compiled.start = 0; // first instruction is always split
         }
         self.fill_to_next(dotstar_patch.hole);
 
         let mut prev_hole = Hole::None;
         for (i, expr) in exprs[0..exprs.len() - 1].iter().enumerate() {
             self.fill_to_next(prev_hole);
             let split = self.push_split_hole();
-            let Patch { hole, entry } = try!(self.c_capture(0, expr));
+            let Patch { hole, entry } = self.c_capture(0, expr)?;
             self.fill_to_next(hole);
             self.compiled.matches.push(self.insts.len());
             self.push_compiled(Inst::Match(i));
             prev_hole = self.fill_split(split, Some(entry), None);
         }
         let i = exprs.len() - 1;
-        let Patch { hole, entry } = try!(self.c_capture(0, &exprs[i]));
+        let Patch { hole, entry } = self.c_capture(0, &exprs[i])?;
         self.fill(prev_hole, entry);
         self.fill_to_next(hole);
         self.compiled.matches.push(self.insts.len());
         self.push_compiled(Inst::Match(i));
         self.compile_finish()
     }
 
     fn compile_finish(mut self) -> result::Result<Program, Error> {
         self.compiled.insts =
             self.insts.into_iter().map(|inst| inst.unwrap()).collect();
         self.compiled.byte_classes = self.byte_classes.byte_classes();
         self.compiled.capture_name_idx = Arc::new(self.capture_name_idx);
         Ok(self.compiled)
     }
 
-    fn c(&mut self, expr: &Expr) -> Result {
+    /// Compile expr into self.insts, returning a patch on success,
+    /// or an error if we run out of memory.
+    ///
+    /// All of the c_* methods of the compiler share the contract outlined
+    /// here.
+    ///
+    /// The main thing that a c_* method does is mutate `self.insts`
+    /// to add a list of mostly compiled instructions required to execute
+    /// the given expression. `self.insts` contains MaybeInsts rather than
+    /// Insts because there is some backpatching required.
+    ///
+    /// The `Patch` value returned by each c_* method provides metadata
+    /// about the compiled instructions emitted to `self.insts`. The
+    /// `entry` member of the patch refers to the first instruction
+    /// (the entry point), while the `hole` member contains zero or
+    /// more offsets to partial instructions that need to be backpatched.
+    /// The c_* routine can't know where its list of instructions are going to
+    /// jump to after execution, so it is up to the caller to patch
+    /// these jumps to point to the right place. So compiling some
+    /// expression, e, we would end up with a situation that looked like:
+    ///
+    /// ```text
+    /// self.insts = [ ..., i1, i2, ..., iexit1, ..., iexitn, ...]
+    ///                     ^              ^             ^
+    ///                     |                \         /
+    ///                   entry                \     /
+    ///                                         hole
+    /// ```
+    ///
+    /// To compile two expressions, e1 and e2, concatinated together we
+    /// would do:
+    ///
+    /// ```ignore
+    /// let patch1 = self.c(e1);
+    /// let patch2 = self.c(e2);
+    /// ```
+    ///
+    /// while leaves us with a situation that looks like
+    ///
+    /// ```text
+    /// self.insts = [ ..., i1, ..., iexit1, ..., i2, ..., iexit2 ]
+    ///                     ^        ^            ^        ^
+    ///                     |        |            |        |
+    ///                entry1        hole1   entry2        hole2
+    /// ```
+    ///
+    /// Then to merge the two patches together into one we would backpatch
+    /// hole1 with entry2 and return a new patch that enters at entry1
+    /// and has hole2 for a hole. In fact, if you look at the c_concat
+    /// method you will see that it does exactly this, though it handles
+    /// a list of expressions rather than just the two that we use for
+    /// an example.
+    fn c(&mut self, expr: &Hir) -> Result {
         use prog;
-        use syntax::Expr::*;
+        use syntax::hir::HirKind::*;
 
-        try!(self.check_size());
-        match *expr {
+        self.check_size()?;
+        match *expr.kind() {
             Empty => Ok(Patch { hole: Hole::None, entry: self.insts.len() }),
-            Literal { ref chars, casei } => self.c_literal(chars, casei),
-            LiteralBytes { ref bytes, casei } => self.c_bytes(bytes, casei),
-            AnyChar => self.c_class(&[ClassRange {
-                start: '\x00',
-                end: '\u{10ffff}',
-            }]),
-            AnyCharNoNL => {
-                self.c_class(&[
-                    ClassRange { start: '\x00', end: '\x09' },
-                    ClassRange { start: '\x0b', end: '\u{10ffff}' },
-                ])
+            Literal(hir::Literal::Unicode(c)) => {
+                self.c_literal(&[c])
+            }
+            Literal(hir::Literal::Byte(b)) => {
+                assert!(self.compiled.uses_bytes());
+                self.c_bytes(&[b])
+            }
+            Class(hir::Class::Unicode(ref cls)) => {
+                self.c_class(cls.ranges())
             }
-            AnyByte => {
-                self.c_class_bytes(&[ByteRange { start: 0, end: 0xFF }])
+            Class(hir::Class::Bytes(ref cls)) => {
+                if self.compiled.uses_bytes() {
+                    self.c_class_bytes(cls.ranges())
+                } else {
+                    assert!(cls.is_all_ascii());
+                    let mut char_ranges = vec![];
+                    for r in cls.iter() {
+                        let (s, e) = (r.start() as char, r.end() as char);
+                        char_ranges.push(hir::ClassUnicodeRange::new(s, e));
+                    }
+                    self.c_class(&char_ranges)
+                }
             }
-            AnyByteNoNL => {
-                self.c_class_bytes(&[
-                    ByteRange { start: 0, end: 0x9 },
-                    ByteRange { start: 0xB, end: 0xFF },
-                ])
-            }
-            Class(ref cls) => {
-                self.c_class(cls)
-            }
-            ClassBytes(ref cls) => {
-                self.c_class_bytes(cls)
-            }
-            StartLine if self.compiled.is_reverse => {
+            Anchor(hir::Anchor::StartLine) if self.compiled.is_reverse => {
                 self.byte_classes.set_range(b'\n', b'\n');
                 self.c_empty_look(prog::EmptyLook::EndLine)
             }
-            StartLine => {
+            Anchor(hir::Anchor::StartLine) => {
                 self.byte_classes.set_range(b'\n', b'\n');
                 self.c_empty_look(prog::EmptyLook::StartLine)
             }
-            EndLine if self.compiled.is_reverse => {
+            Anchor(hir::Anchor::EndLine) if self.compiled.is_reverse => {
                 self.byte_classes.set_range(b'\n', b'\n');
                 self.c_empty_look(prog::EmptyLook::StartLine)
             }
-            EndLine => {
+            Anchor(hir::Anchor::EndLine) => {
                 self.byte_classes.set_range(b'\n', b'\n');
                 self.c_empty_look(prog::EmptyLook::EndLine)
             }
-            StartText if self.compiled.is_reverse => {
+            Anchor(hir::Anchor::StartText) if self.compiled.is_reverse => {
                 self.c_empty_look(prog::EmptyLook::EndText)
             }
-            StartText => {
+            Anchor(hir::Anchor::StartText) => {
                 self.c_empty_look(prog::EmptyLook::StartText)
             }
-            EndText if self.compiled.is_reverse => {
+            Anchor(hir::Anchor::EndText) if self.compiled.is_reverse => {
                 self.c_empty_look(prog::EmptyLook::StartText)
             }
-            EndText => {
+            Anchor(hir::Anchor::EndText) => {
                 self.c_empty_look(prog::EmptyLook::EndText)
             }
-            WordBoundary => {
+            WordBoundary(hir::WordBoundary::Unicode) => {
                 self.compiled.has_unicode_word_boundary = true;
                 self.byte_classes.set_word_boundary();
                 self.c_empty_look(prog::EmptyLook::WordBoundary)
             }
-            NotWordBoundary => {
+            WordBoundary(hir::WordBoundary::UnicodeNegate) => {
                 self.compiled.has_unicode_word_boundary = true;
                 self.byte_classes.set_word_boundary();
                 self.c_empty_look(prog::EmptyLook::NotWordBoundary)
             }
-            WordBoundaryAscii => {
+            WordBoundary(hir::WordBoundary::Ascii) => {
                 self.byte_classes.set_word_boundary();
                 self.c_empty_look(prog::EmptyLook::WordBoundaryAscii)
             }
-            NotWordBoundaryAscii => {
+            WordBoundary(hir::WordBoundary::AsciiNegate) => {
                 self.byte_classes.set_word_boundary();
                 self.c_empty_look(prog::EmptyLook::NotWordBoundaryAscii)
             }
-            Group { ref e, i: None, name: None } => self.c(e),
-            Group { ref e, i, ref name } => {
-                // it's impossible to have a named capture without an index
-                let i = i.expect("capture index");
-                if i >= self.compiled.captures.len() {
-                    self.compiled.captures.push(name.clone());
-                    if let Some(ref name) = *name {
-                        self.capture_name_idx.insert(name.to_owned(), i);
+            Group(ref g) => {
+                match g.kind {
+                    hir::GroupKind::NonCapturing => self.c(&g.hir),
+                    hir::GroupKind::CaptureIndex(index) => {
+                        if index as usize >= self.compiled.captures.len() {
+                            self.compiled.captures.push(None);
+                        }
+                        self.c_capture(2 * index as usize, &g.hir)
+                    }
+                    hir::GroupKind::CaptureName { index, ref name } => {
+                        if index as usize >= self.compiled.captures.len() {
+                            let n = name.to_string();
+                            self.compiled.captures.push(Some(n.clone()));
+                            self.capture_name_idx.insert(n, index as usize);
+                        }
+                        self.c_capture(2 * index as usize, &g.hir)
                     }
                 }
-                self.c_capture(2 * i, e)
             }
             Concat(ref es) => {
                 if self.compiled.is_reverse {
                     self.c_concat(es.iter().rev())
                 } else {
                     self.c_concat(es)
                 }
             }
-            Alternate(ref es) => self.c_alternate(&**es),
-            Repeat { ref e, r, greedy } => self.c_repeat(e, r, greedy),
+            Alternation(ref es) => self.c_alternate(&**es),
+            Repetition(ref rep) => self.c_repeat(rep),
         }
     }
 
-    fn c_capture(&mut self, first_slot: usize, expr: &Expr) -> Result {
+    fn c_capture(&mut self, first_slot: usize, expr: &Hir) -> Result {
         if self.num_exprs > 1 || self.compiled.is_dfa {
             // Don't ever compile Save instructions for regex sets because
             // they are never used. They are also never used in DFA programs
             // because DFAs can't handle captures.
             self.c(expr)
         } else {
             let entry = self.insts.len();
             let hole = self.push_hole(InstHole::Save { slot: first_slot });
-            let patch = try!(self.c(expr));
+            let patch = self.c(expr)?;
             self.fill(hole, patch.entry);
             self.fill_to_next(patch.hole);
             let hole = self.push_hole(InstHole::Save { slot: first_slot + 1 });
             Ok(Patch { hole: hole, entry: entry })
         }
     }
 
     fn c_dotstar(&mut self) -> Result {
         Ok(if !self.compiled.only_utf8() {
-            try!(self.c(&Expr::Repeat {
-                e: Box::new(Expr::AnyByte),
-                r: Repeater::ZeroOrMore,
+            self.c(&Hir::repetition(hir::Repetition {
+                kind: hir::RepetitionKind::ZeroOrMore,
                 greedy: false,
-            }))
+                hir: Box::new(Hir::any(true)),
+            }))?
         } else {
-            try!(self.c(&Expr::Repeat {
-                e: Box::new(Expr::AnyChar),
-                r: Repeater::ZeroOrMore,
+            self.c(&Hir::repetition(hir::Repetition {
+                kind: hir::RepetitionKind::ZeroOrMore,
                 greedy: false,
-            }))
+                hir: Box::new(Hir::any(false)),
+            }))?
         })
     }
 
-    fn c_literal(&mut self, chars: &[char], casei: bool) -> Result {
+    fn c_literal(&mut self, chars: &[char]) -> Result {
         debug_assert!(!chars.is_empty());
         let mut chars: Box<Iterator<Item=&char>> =
             if self.compiled.is_reverse {
                 Box::new(chars.iter().rev())
             } else {
                 Box::new(chars.iter())
             };
         let first = *chars.next().expect("non-empty literal");
-        let Patch { mut hole, entry } = try!(self.c_char(first, casei));
+        let Patch { mut hole, entry } = self.c_char(first)?;
         for &c in chars {
-            let p = try!(self.c_char(c, casei));
+            let p = self.c_char(c)?;
             self.fill(hole, p.entry);
             hole = p.hole;
         }
         Ok(Patch { hole: hole, entry: entry })
     }
 
-    fn c_char(&mut self, c: char, casei: bool) -> Result {
-        if casei {
-            self.c_class(&CharClass::new(vec![
-                ClassRange { start: c, end: c },
-            ]).case_fold())
-        } else {
-            self.c_class(&[ClassRange { start: c, end: c }])
-        }
+    fn c_char(&mut self, c: char) -> Result {
+        self.c_class(&[hir::ClassUnicodeRange::new(c, c)])
     }
 
-    fn c_class(&mut self, ranges: &[ClassRange]) -> Result {
+    fn c_class(&mut self, ranges: &[hir::ClassUnicodeRange]) -> Result {
         assert!(!ranges.is_empty());
         if self.compiled.uses_bytes() {
             CompileClass {
                 c: self,
                 ranges: ranges,
             }.compile()
         } else {
             let ranges: Vec<(char, char)> =
-                ranges.iter().map(|r| (r.start, r.end)).collect();
+                ranges.iter().map(|r| (r.start(), r.end())).collect();
             let hole = if ranges.len() == 1 && ranges[0].0 == ranges[0].1 {
                 self.push_hole(InstHole::Char { c: ranges[0].0 })
             } else {
                 self.push_hole(InstHole::Ranges { ranges: ranges })
             };
             Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
         }
     }
 
-    fn c_bytes(&mut self, bytes: &[u8], casei: bool) -> Result {
+    fn c_bytes(&mut self, bytes: &[u8]) -> Result {
         debug_assert!(!bytes.is_empty());
         let mut bytes: Box<Iterator<Item=&u8>> =
             if self.compiled.is_reverse {
                 Box::new(bytes.iter().rev())
             } else {
                 Box::new(bytes.iter())
             };
         let first = *bytes.next().expect("non-empty literal");
-        let Patch { mut hole, entry } = try!(self.c_byte(first, casei));
+        let Patch { mut hole, entry } = self.c_byte(first)?;
         for &b in bytes {
-            let p = try!(self.c_byte(b, casei));
+            let p = self.c_byte(b)?;
             self.fill(hole, p.entry);
             hole = p.hole;
         }
         Ok(Patch { hole: hole, entry: entry })
     }
 
-    fn c_byte(&mut self, b: u8, casei: bool) -> Result {
-        if casei {
-            self.c_class_bytes(&ByteClass::new(vec![
-                ByteRange { start: b, end: b },
-            ]).case_fold())
-        } else {
-            self.c_class_bytes(&[ByteRange { start: b, end: b }])
-        }
+    fn c_byte(&mut self, b: u8) -> Result {
+        self.c_class_bytes(&[hir::ClassBytesRange::new(b, b)])
     }
 
-    fn c_class_bytes(&mut self, ranges: &[ByteRange]) -> Result {
+    fn c_class_bytes(&mut self, ranges: &[hir::ClassBytesRange]) -> Result {
         debug_assert!(!ranges.is_empty());
 
         let first_split_entry = self.insts.len();
         let mut holes = vec![];
         let mut prev_hole = Hole::None;
         for r in &ranges[0..ranges.len() - 1] {
             self.fill_to_next(prev_hole);
             let split = self.push_split_hole();
             let next = self.insts.len();
-            self.byte_classes.set_range(r.start, r.end);
+            self.byte_classes.set_range(r.start(), r.end());
             holes.push(self.push_hole(InstHole::Bytes {
-                start: r.start, end: r.end,
+                start: r.start(), end: r.end(),
             }));
             prev_hole = self.fill_split(split, Some(next), None);
         }
         let next = self.insts.len();
         let r = &ranges[ranges.len() - 1];
-        self.byte_classes.set_range(r.start, r.end);
+        self.byte_classes.set_range(r.start(), r.end());
         holes.push(self.push_hole(InstHole::Bytes {
-            start: r.start, end: r.end,
+            start: r.start(), end: r.end(),
         }));
         self.fill(prev_hole, next);
         Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
     }
 
     fn c_empty_look(&mut self, look: EmptyLook) -> Result {
         let hole = self.push_hole(InstHole::EmptyLook { look: look });
         Ok(Patch { hole: hole, entry: self.insts.len() - 1 })
     }
 
     fn c_concat<'a, I>(&mut self, exprs: I) -> Result
-            where I: IntoIterator<Item=&'a Expr> {
+            where I: IntoIterator<Item=&'a Hir> {
         let mut exprs = exprs.into_iter();
         let first = match exprs.next() {
             Some(expr) => expr,
             None => {
                 return Ok(Patch { hole: Hole::None, entry: self.insts.len() })
             }
         };
-        let Patch { mut hole, entry } = try!(self.c(first));
+        let Patch { mut hole, entry } = self.c(first)?;
         for e in exprs {
-            let p = try!(self.c(e));
+            let p = self.c(e)?;
             self.fill(hole, p.entry);
             hole = p.hole;
         }
         Ok(Patch { hole: hole, entry: entry })
     }
 
-    fn c_alternate(&mut self, exprs: &[Expr]) -> Result {
+    fn c_alternate(&mut self, exprs: &[Hir]) -> Result {
         debug_assert!(
             exprs.len() >= 2, "alternates must have at least 2 exprs");
 
         // Initial entry point is always the first split.
         let first_split_entry = self.insts.len();
 
         // Save up all of the holes from each alternate. They will all get
         // patched to point to the same location.
         let mut holes = vec![];
 
         let mut prev_hole = Hole::None;
         for e in &exprs[0..exprs.len() - 1] {
             self.fill_to_next(prev_hole);
             let split = self.push_split_hole();
-            let Patch { hole, entry } = try!(self.c(e));
+            let prev_entry = self.insts.len();
+            let Patch { hole, entry } = self.c(e)?;
+            if prev_entry == self.insts.len() {
+                // TODO(burntsushi): It is kind of silly that we don't support
+                // empty-subexpressions in alternates, but it is supremely
+                // awkward to support them in the existing compiler
+                // infrastructure. This entire compiler needs to be thrown out
+                // anyway, so don't feel too bad.
+                return Err(Error::Syntax(
+                    "alternations cannot currently contain \
+                     empty sub-expressions".to_string()));
+            }
             holes.push(hole);
             prev_hole = self.fill_split(split, Some(entry), None);
         }
-        let Patch { hole, entry } = try!(self.c(&exprs[exprs.len() - 1]));
+        let prev_entry = self.insts.len();
+        let Patch { hole, entry } = self.c(&exprs[exprs.len() - 1])?;
+        if prev_entry == self.insts.len() {
+            // TODO(burntsushi): See TODO above.
+            return Err(Error::Syntax(
+                "alternations cannot currently contain \
+                 empty sub-expressions".to_string()));
+        }
         holes.push(hole);
         self.fill(prev_hole, entry);
         Ok(Patch { hole: Hole::Many(holes), entry: first_split_entry })
     }
 
-    fn c_repeat(
-        &mut self,
-        expr: &Expr,
-        kind: Repeater,
-        greedy: bool,
-    ) -> Result {
-        match kind {
-            Repeater::ZeroOrOne => self.c_repeat_zero_or_one(expr, greedy),
-            Repeater::ZeroOrMore => self.c_repeat_zero_or_more(expr, greedy),
-            Repeater::OneOrMore => self.c_repeat_one_or_more(expr, greedy),
-            Repeater::Range { min, max: None } => {
-                self.c_repeat_range_min_or_more(expr, greedy, min)
+    fn c_repeat(&mut self, rep: &hir::Repetition) -> Result {
+        use syntax::hir::RepetitionKind::*;
+        match rep.kind {
+            ZeroOrOne => self.c_repeat_zero_or_one(&rep.hir, rep.greedy),
+            ZeroOrMore => self.c_repeat_zero_or_more(&rep.hir, rep.greedy),
+            OneOrMore => self.c_repeat_one_or_more(&rep.hir, rep.greedy),
+            Range(hir::RepetitionRange::Exactly(min_max)) => {
+                self.c_repeat_range(&rep.hir, rep.greedy, min_max, min_max)
             }
-            Repeater::Range { min, max: Some(max) } => {
-                self.c_repeat_range(expr, greedy, min, max)
+            Range(hir::RepetitionRange::AtLeast(min)) => {
+                self.c_repeat_range_min_or_more(&rep.hir, rep.greedy, min)
+            }
+            Range(hir::RepetitionRange::Bounded(min, max)) => {
+                self.c_repeat_range(&rep.hir, rep.greedy, min, max)
             }
         }
     }
 
-    fn c_repeat_zero_or_one(
-        &mut self,
-        expr: &Expr,
-        greedy: bool,
-    ) -> Result {
+    fn c_repeat_zero_or_one(&mut self, expr: &Hir, greedy: bool) -> Result {
         let split_entry = self.insts.len();
         let split = self.push_split_hole();
-        let Patch { hole: hole_rep, entry: entry_rep } = try!(self.c(expr));
+        let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
 
         let split_hole = if greedy {
             self.fill_split(split, Some(entry_rep), None)
         } else {
             self.fill_split(split, None, Some(entry_rep))
         };
         let holes = vec![hole_rep, split_hole];
         Ok(Patch { hole: Hole::Many(holes), entry: split_entry })
     }
 
-    fn c_repeat_zero_or_more(
-        &mut self,
-        expr: &Expr,
-        greedy: bool,
-    ) -> Result {
+    fn c_repeat_zero_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
         let split_entry = self.insts.len();
         let split = self.push_split_hole();
-        let Patch { hole: hole_rep, entry: entry_rep } = try!(self.c(expr));
+        let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
 
         self.fill(hole_rep, split_entry);
         let split_hole = if greedy {
             self.fill_split(split, Some(entry_rep), None)
         } else {
             self.fill_split(split, None, Some(entry_rep))
         };
         Ok(Patch { hole: split_hole, entry: split_entry })
     }
 
-    fn c_repeat_one_or_more(
-        &mut self,
-        expr: &Expr,
-        greedy: bool,
-    ) -> Result {
-        let Patch { hole: hole_rep, entry: entry_rep } = try!(self.c(expr));
+    fn c_repeat_one_or_more(&mut self, expr: &Hir, greedy: bool) -> Result {
+        let Patch { hole: hole_rep, entry: entry_rep } = self.c(expr)?;
         self.fill_to_next(hole_rep);
         let split = self.push_split_hole();
 
         let split_hole = if greedy {
             self.fill_split(split, Some(entry_rep), None)
         } else {
             self.fill_split(split, None, Some(entry_rep))
         };
         Ok(Patch { hole: split_hole, entry: entry_rep })
     }
 
     fn c_repeat_range_min_or_more(
         &mut self,
-        expr: &Expr,
+        expr: &Hir,
         greedy: bool,
         min: u32,
     ) -> Result {
         let min = u32_to_usize(min);
-        let patch_concat = try!(self.c_concat(iter::repeat(expr).take(min)));
-        let patch_rep = try!(self.c_repeat_zero_or_more(expr, greedy));
+        let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
+        let patch_rep = self.c_repeat_zero_or_more(expr, greedy)?;
         self.fill(patch_concat.hole, patch_rep.entry);
         Ok(Patch { hole: patch_rep.hole, entry: patch_concat.entry })
     }
 
     fn c_repeat_range(
         &mut self,
-        expr: &Expr,
+        expr: &Hir,
         greedy: bool,
         min: u32,
         max: u32,
     ) -> Result {
         let (min, max) = (u32_to_usize(min), u32_to_usize(max));
-        let patch_concat = try!(self.c_concat(iter::repeat(expr).take(min)));
+        let patch_concat = self.c_concat(iter::repeat(expr).take(min))?;
         let initial_entry = patch_concat.entry;
         if min == max {
             return Ok(patch_concat);
         }
         // It is much simpler to compile, e.g., `a{2,5}` as:
         //
         //     aaa?a?a?
         //
@@ -608,17 +654,17 @@ impl Compiler {
         // This is *incredibly* inefficient because the splits end
         // up forming a chain, which has to be resolved everything a
         // transition is followed.
         let mut holes = vec![];
         let mut prev_hole = patch_concat.hole;
         for _ in min..max {
             self.fill_to_next(prev_hole);
             let split = self.push_split_hole();
-            let Patch { hole, entry } = try!(self.c(expr));
+            let Patch { hole, entry } = self.c(expr)?;
             prev_hole = hole;
             if greedy {
                 holes.push(self.fill_split(split, Some(entry), None));
             } else {
                 holes.push(self.fill_split(split, None, Some(entry)));
             }
         }
         holes.push(prev_hole);
@@ -817,51 +863,51 @@ impl InstHole {
                 end: end,
             }),
         }
     }
 }
 
 struct CompileClass<'a, 'b> {
     c: &'a mut Compiler,
-    ranges: &'b [ClassRange],
+    ranges: &'b [hir::ClassUnicodeRange],
 }
 
 impl<'a, 'b> CompileClass<'a, 'b> {
     fn compile(mut self) -> Result {
         let mut holes = vec![];
         let mut initial_entry = None;
         let mut last_split = Hole::None;
         let mut utf8_seqs = self.c.utf8_seqs.take().unwrap();
         self.c.suffix_cache.clear();
 
-        for (i, ref range) in self.ranges.iter().enumerate() {
+        for (i, range) in self.ranges.iter().enumerate() {
             let is_last_range = i + 1 == self.ranges.len();
-            utf8_seqs.reset(range.start, range.end);
+            utf8_seqs.reset(range.start(), range.end());
             let mut it = (&mut utf8_seqs).peekable();
             loop {
                 let utf8_seq = match it.next() {
                     None => break,
                     Some(utf8_seq) => utf8_seq,
                 };
                 if is_last_range && it.peek().is_none() {
-                    let Patch { hole, entry } = try!(self.c_utf8_seq(&utf8_seq));
+                    let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?;
                     holes.push(hole);
                     self.c.fill(last_split, entry);
                     last_split = Hole::None;
                     if initial_entry.is_none() {
                         initial_entry = Some(entry);
                     }
                 } else {
                     if initial_entry.is_none() {
                         initial_entry = Some(self.c.insts.len());
                     }
                     self.c.fill_to_next(last_split);
                     last_split = self.c.push_split_hole();
-                    let Patch { hole, entry } = try!(self.c_utf8_seq(&utf8_seq));
+                    let Patch { hole, entry } = self.c_utf8_seq(&utf8_seq)?;
                     holes.push(hole);
                     last_split = self.c.fill_split(last_split, Some(entry), None);
                 }
             }
         }
         self.c.utf8_seqs = Some(utf8_seqs);
         Ok(Patch {
             hole: Hole::Many(holes),
@@ -911,17 +957,17 @@ impl<'a, 'b> CompileClass<'a, 'b> {
             from_inst = self.c.insts.len().checked_sub(1).unwrap();
             debug_assert!(from_inst < ::std::usize::MAX);
         }
         debug_assert!(from_inst < ::std::usize::MAX);
         Ok(Patch { hole: last_hole, entry: from_inst })
     }
 }
 
-/// SuffixCache is a simple bounded hash map for caching suffix entries in
+/// `SuffixCache` is a simple bounded hash map for caching suffix entries in
 /// UTF-8 automata. For example, consider the Unicode range \u{0}-\u{FFFF}.
 /// The set of byte ranges looks like this:
 ///
 /// [0-7F]
 /// [C2-DF][80-BF]
 /// [E0][A0-BF][80-BF]
 /// [E1-EC][80-BF][80-BF]
 /// [ED][80-9F][80-BF]
@@ -1045,16 +1091,19 @@ impl ByteClassSet {
             }
             i += 1;
         }
         byte_classes
     }
 }
 
 fn u32_to_usize(n: u32) -> usize {
+    // In case usize is less than 32 bits, we need to guard against overflow.
+    // On most platforms this compiles to nothing.
+    // TODO Use `std::convert::TryFrom` once it's stable.
     if (n as u64) > (::std::usize::MAX as u64) {
         panic!("BUG: {} is too big to be pointer sized", n)
     }
     n as usize
 }
 
 #[cfg(test)]
 mod tests {
--- a/third_party/rust/regex/src/dfa.rs
+++ b/third_party/rust/regex/src/dfa.rs
@@ -53,30 +53,30 @@ use std::mem;
 
 use exec::ProgramCache;
 use prog::{Inst, Program};
 use sparse::SparseSet;
 
 /// Return true if and only if the given program can be executed by a DFA.
 ///
 /// Generally, a DFA is always possible. A pathological case where it is not
-/// possible is if the number of NFA states exceeds u32::MAX, in which case,
+/// possible is if the number of NFA states exceeds `u32::MAX`, in which case,
 /// this function will return false.
 ///
 /// This function will also return false if the given program has any Unicode
 /// instructions (Char or Ranges) since the DFA operates on bytes only.
 pub fn can_exec(insts: &Program) -> bool {
     use prog::Inst::*;
     // If for some reason we manage to allocate a regex program with more
     // than i32::MAX instructions, then we can't execute the DFA because we
     // use 32 bit instruction pointer deltas for memory savings.
     // If i32::MAX is the largest positive delta,
     // then -i32::MAX == i32::MIN + 1 is the largest negative delta,
     // and we are OK to use 32 bits.
-    if insts.len() > ::std::i32::MAX as usize {
+    if insts.dfa_size_limit == 0 || insts.len() > ::std::i32::MAX as usize {
         return false;
     }
     for inst in insts {
         match *inst {
             Char(_) | Ranges(_) => return false,
             EmptyLook(_) | Match(_) | Save(_) | Split(_) | Bytes(_) => {}
         }
     }
@@ -99,17 +99,17 @@ pub struct Cache {
     /// cached DFA states. In particular, the order of the NFA states matters
     /// for leftmost-first style matching. Namely, when computing a cached
     /// state, the set of NFA states stops growing as soon as the first Match
     /// instruction is observed.
     qcur: SparseSet,
     qnext: SparseSet,
 }
 
-/// CacheInner is logically just a part of Cache, but groups together fields
+/// `CacheInner` is logically just a part of Cache, but groups together fields
 /// that aren't passed as function parameters throughout search. (This split
 /// is mostly an artifact of the borrow checker. It is happily paid.)
 #[derive(Clone, Debug)]
 struct CacheInner {
     /// A cache of pre-compiled DFA states, keyed by the set of NFA states
     /// and the set of empty-width flags set at the byte in the input when the
     /// state was observed.
     ///
@@ -157,18 +157,18 @@ struct CacheInner {
     size: usize,
 }
 
 /// The transition table.
 ///
 /// It is laid out in row-major order, with states as rows and byte class
 /// transitions as columns.
 ///
-/// The transition table is responsible for producing valid StatePtrs. A
-/// StatePtr points to the start of a particular row in this table. When
+/// The transition table is responsible for producing valid `StatePtrs`. A
+/// `StatePtr` points to the start of a particular row in this table. When
 /// indexing to find the next state this allows us to avoid a multiplication
 /// when computing an index into the table.
 #[derive(Clone)]
 struct Transitions {
     /// The table.
     table: Vec<StatePtr>,
     /// The stride.
     num_byte_classes: usize,
@@ -247,17 +247,17 @@ impl<T> Result<T> {
     fn set_non_match(self, at: usize) -> Result<T> {
         match self {
             Result::NoMatch(_) => Result::NoMatch(at),
             r => r,
         }
     }
 }
 
-/// State is a DFA state. It contains an ordered set of NFA states (not
+/// `State` is a DFA state. It contains an ordered set of NFA states (not
 /// necessarily complete) and a smattering of flags.
 ///
 /// The flags are packed into the first byte of data.
 ///
 /// States don't carry their transitions. Instead, transitions are stored in
 /// a single row-major table.
 ///
 /// Delta encoding is used to store the instruction pointers.
@@ -266,17 +266,17 @@ impl<T> Result<T> {
 /// to the previous one. If a delta is in the range -127..127,
 /// it is packed into a single byte; Otherwise the byte 128 (-128 as an i8)
 /// is coded as a flag, followed by 4 bytes encoding the delta.
 #[derive(Clone, Eq, Hash, PartialEq)]
 struct State{
     data: Box<[u8]>,
 }
 
-/// InstPtr is a 32 bit pointer into a sequence of opcodes (i.e., it indexes
+/// `InstPtr` is a 32 bit pointer into a sequence of opcodes (i.e., it indexes
 /// an NFA state).
 ///
 /// Throughout this library, this is usually set to `usize`, but we force a
 /// `u32` here for the DFA to save on space.
 type InstPtr = u32;
 
 /// Adds ip to data using delta encoding with respect to prev.
 ///
@@ -317,17 +317,18 @@ impl State {
     fn inst_ptrs(&self) -> InstPtrs {
         InstPtrs {
             base: 0,
             data: &self.data[1..],
         }
     }
 }
 
-/// StatePtr is a 32 bit pointer to the start of a row in the transition table.
+/// `StatePtr` is a 32 bit pointer to the start of a row in the transition
+/// table.
 ///
 /// It has many special values. There are two types of special values:
 /// sentinels and flags.
 ///
 /// Sentinels corresponds to special states that carry some kind of
 /// significance. There are three such states: unknown, dead and quit states.
 ///
 /// Unknown states are states that haven't been computed yet. They indicate
@@ -340,17 +341,18 @@ impl State {
 /// immediately and return the longest match it has found thus far.
 ///
 /// Quit states are states that imply the DFA is not capable of matching the
 /// regex correctly. Currently, this is only used when a Unicode word boundary
 /// exists in the regex *and* a non-ASCII byte is observed.
 ///
 /// The other type of state pointer is a state pointer with special flag bits.
 /// There are two flags: a start flag and a match flag. The lower bits of both
-/// kinds always contain a "valid" StatePtr (indicated by the STATE_MAX mask).
+/// kinds always contain a "valid" `StatePtr` (indicated by the `STATE_MAX`
+/// mask).
 ///
 /// The start flag means that the state is a start state, and therefore may be
 /// subject to special prefix scanning optimizations.
 ///
 /// The match flag means that the state is a match state, and therefore the
 /// current position in the input (while searching) should be recorded.
 ///
 /// The above exists mostly in the service of making the inner loop fast.
@@ -457,17 +459,17 @@ impl<'a> Fsm<'a> {
     pub fn forward(
         prog: &'a Program,
         cache: &ProgramCache,
         quit_after_match: bool,
         text: &[u8],
         at: usize,
     ) -> Result<usize> {
         let mut cache = cache.borrow_mut();
-        let mut cache = &mut cache.dfa;
+        let cache = &mut cache.dfa;
         let mut dfa = Fsm {
             prog: prog,
             start: 0, // filled in below
             at: at,
             quit_after_match: quit_after_match,
             last_match_si: STATE_UNKNOWN,
             last_cache_flush: at,
             cache: &mut cache.inner,
@@ -490,17 +492,17 @@ impl<'a> Fsm<'a> {
     pub fn reverse(
         prog: &'a Program,
         cache: &ProgramCache,
         quit_after_match: bool,
         text: &[u8],
         at: usize,
     ) -> Result<usize> {
         let mut cache = cache.borrow_mut();
-        let mut cache = &mut cache.dfa_reverse;
+        let cache = &mut cache.dfa_reverse;
         let mut dfa = Fsm {
             prog: prog,
             start: 0, // filled in below
             at: at,
             quit_after_match: quit_after_match,
             last_match_si: STATE_UNKNOWN,
             last_cache_flush: at,
             cache: &mut cache.inner,
@@ -524,17 +526,17 @@ impl<'a> Fsm<'a> {
         prog: &'a Program,
         cache: &ProgramCache,
         matches: &mut [bool],
         text: &[u8],
         at: usize,
     ) -> Result<usize> {
         debug_assert!(matches.len() == prog.matches.len());
         let mut cache = cache.borrow_mut();
-        let mut cache = &mut cache.dfa;
+        let cache = &mut cache.dfa;
         let mut dfa = Fsm {
             prog: prog,
             start: 0, // filled in below
             at: at,
             quit_after_match: false,
             last_match_si: STATE_UNKNOWN,
             last_cache_flush: at,
             cache: &mut cache.inner,
@@ -993,26 +995,30 @@ impl<'a> Fsm<'a> {
                 Bytes(ref inst) => {
                     if b.as_byte().map_or(false, |b| inst.matches(b)) {
                         self.follow_epsilons(
                             inst.goto as InstPtr, qnext, empty_flags);
                     }
                 }
             }
         }
-        let mut cache = true;
-        if b.is_eof() && self.prog.matches.len() > 1 {
-            // If we're processing the last byte of the input and we're
-            // matching a regex set, then make the next state contain the
-            // previous states transitions. We do this so that the main
-            // matching loop can extract all of the match instructions.
-            mem::swap(qcur, qnext);
-            // And don't cache this state because it's totally bunk.
-            cache = false;
-        }
+
+        let cache =
+            if b.is_eof() && self.prog.matches.len() > 1 {
+                // If we're processing the last byte of the input and we're
+                // matching a regex set, then make the next state contain the
+                // previous states transitions. We do this so that the main
+                // matching loop can extract all of the match instructions.
+                mem::swap(qcur, qnext);
+                // And don't cache this state because it's totally bunk.
+                false
+            } else {
+                true
+            };
+
         // We've now built up the set of NFA states that ought to comprise the
         // next DFA state, so try to find it in the cache, and if it doesn't
         // exist, cache it.
         //
         // N.B. We pass `&mut si` here because the cache may clear itself if
         // it has gotten too full. When that happens, the location of the
         // current state may change.
         let mut next = match self.cached_state(
@@ -1025,17 +1031,17 @@ impl<'a> Fsm<'a> {
         };
         if (self.start & !STATE_START) == next {
             // Start states can never be match states since all matches are
             // delayed by one byte.
             debug_assert!(!self.state(next).flags().is_match());
             next = self.start_ptr(next);
         }
         if next <= STATE_MAX && self.state(next).flags().is_match() {
-            next = STATE_MATCH | next;
+            next |= STATE_MATCH;
         }
         debug_assert!(next != STATE_UNKNOWN);
         // And now store our state in the current state's next list.
         if cache {
             let cls = self.byte_class(b);
             self.cache.trans.set_next(si, cls, next);
         }
         Some(next)
@@ -1071,62 +1077,72 @@ impl<'a> Fsm<'a> {
         flags: EmptyFlags,
     ) {
         use prog::Inst::*;
         use prog::EmptyLook::*;
 
         // We need to traverse the NFA to follow epsilon transitions, so avoid
         // recursion with an explicit stack.
         self.cache.stack.push(ip);
-        while let Some(ip) = self.cache.stack.pop() {
-            // Don't visit states we've already added.
-            if q.contains(ip as usize) {
-                continue;
-            }
-            q.insert(ip as usize);
-            match self.prog[ip as usize] {
-                Char(_) | Ranges(_) => unreachable!(),
-                Match(_) | Bytes(_) => {}
-                EmptyLook(ref inst) => {
-                    // Only follow empty assertion states if our flags satisfy
-                    // the assertion.
-                    match inst.look {
-                        StartLine if flags.start_line => {
-                            self.cache.stack.push(inst.goto as InstPtr);
-                        }
-                        EndLine if flags.end_line => {
-                            self.cache.stack.push(inst.goto as InstPtr);
-                        }
-                        StartText if flags.start => {
-                            self.cache.stack.push(inst.goto as InstPtr);
+        while let Some(mut ip) = self.cache.stack.pop() {
+            // Try to munch through as many states as possible without
+            // pushes/pops to the stack.
+            loop {
+                // Don't visit states we've already added.
+                if q.contains(ip as usize) {
+                    break;
+                }
+                q.insert(ip as usize);
+                match self.prog[ip as usize] {
+                    Char(_) | Ranges(_) => unreachable!(),
+                    Match(_) | Bytes(_) => {
+                        break;
+                    }
+                    EmptyLook(ref inst) => {
+                        // Only follow empty assertion states if our flags
+                        // satisfy the assertion.
+                        match inst.look {
+                            StartLine if flags.start_line => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            EndLine if flags.end_line => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            StartText if flags.start => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            EndText if flags.end => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            WordBoundaryAscii if flags.word_boundary => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            NotWordBoundaryAscii if flags.not_word_boundary => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            WordBoundary if flags.word_boundary => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            NotWordBoundary if flags.not_word_boundary => {
+                                ip = inst.goto as InstPtr;
+                            }
+                            StartLine | EndLine | StartText | EndText
+                            | WordBoundaryAscii | NotWordBoundaryAscii
+                            | WordBoundary | NotWordBoundary => {
+                                break;
+                            }
                         }
-                        EndText if flags.end => {
-                            self.cache.stack.push(inst.goto as InstPtr);
-                        }
-                        WordBoundaryAscii if flags.word_boundary => {
-                            self.cache.stack.push(inst.goto as InstPtr);
-                        }
-                        NotWordBoundaryAscii if flags.not_word_boundary => {
-                            self.cache.stack.push(inst.goto as InstPtr);
-                        }
-                        WordBoundary if flags.word_boundary => {
-                            self.cache.stack.push(inst.goto as InstPtr);
-                        }
-                        NotWordBoundary if flags.not_word_boundary => {
-                            self.cache.stack.push(inst.goto as InstPtr);
-                        }
-                        StartLine | EndLine | StartText | EndText => {}
-                        WordBoundaryAscii | NotWordBoundaryAscii => {}
-                        WordBoundary | NotWordBoundary => {}
+                    }
+                    Save(ref inst) => {
+                        ip = inst.goto as InstPtr;
                     }
-                }
-                Save(ref inst) => self.cache.stack.push(inst.goto as InstPtr),
-                Split(ref inst) => {
-                    self.cache.stack.push(inst.goto2 as InstPtr);
-                    self.cache.stack.push(inst.goto1 as InstPtr);
+                    Split(ref inst) => {
+                        self.cache.stack.push(inst.goto2 as InstPtr);
+                        ip = inst.goto1 as InstPtr;
+                    }
                 }
             }
         }
     }
 
     /// Find a previously computed state matching the given set of instructions
     /// and is_match bool.
     ///
@@ -1162,22 +1178,22 @@ impl<'a> Fsm<'a> {
             None => return Some(STATE_DEAD),
             Some(v) => v,
         };
         // In the cache? Cool. Done.
         if let Some(&si) = self.cache.compiled.get(&key) {
             return Some(si);
         }
         // If the cache has gotten too big, wipe it.
-        if self.approximate_size() > self.prog.dfa_size_limit {
-            if !self.clear_cache_and_save(current_state) {
+        if self.approximate_size() > self.prog.dfa_size_limit
+            && !self.clear_cache_and_save(current_state)
+            {
                 // Ooops. DFA is giving up.
                 return None;
             }
-        }
         // Allocate room for our state and add it.
         self.add_state(key)
     }
 
     /// Produces a key suitable for describing a state in the DFA cache.
     ///
     /// The key invariant here is that equivalent keys are produced for any two
     /// sets of ordered NFA states (and toggling of whether the previous NFA
@@ -1205,18 +1221,17 @@ impl<'a> Fsm<'a> {
 
         // Reserve 1 byte for flags.
         let mut insts = vec![0];
         let mut prev = 0;
         for &ip in q {
             let ip = usize_to_u32(ip);
             match self.prog[ip as usize] {
                 Char(_) | Ranges(_) => unreachable!(),
-                Save(_) => {}
-                Split(_) => {}
+                Save(_) | Split(_) => {}
                 Bytes(_) => push_inst_ptr(&mut insts, &mut prev, ip),
                 EmptyLook(_) => {
                     state_flags.set_empty();
                     push_inst_ptr(&mut insts, &mut prev, ip)
                 }
                 Match(_) => {
                     push_inst_ptr(&mut insts, &mut prev, ip);
                     if !self.continue_past_first_match() {
@@ -1296,17 +1311,17 @@ impl<'a> Fsm<'a> {
             Some(self.state(self.last_match_si).clone())
         } else {
             None
         };
         self.cache.reset_size();
         self.cache.trans.clear();
         self.cache.states.clear();
         self.cache.compiled.clear();
-        for s in self.cache.start_states.iter_mut() {
+        for s in &mut self.cache.start_states {
             *s = STATE_UNKNOWN;
         }
         // The unwraps are OK because we just cleared the cache and therefore
         // know that the next state pointer won't exceed STATE_MAX.
         let start_ptr = self.restore_state(start).unwrap();
         self.start = self.start_ptr(start_ptr);
         if let Some(last_match) = last_match {
             self.last_match_si = self.restore_state(last_match).unwrap();
@@ -1406,19 +1421,19 @@ impl<'a> Fsm<'a> {
     /// Computes the set of starting flags for the given position in text.
     ///
     /// This should only be used when executing the DFA forwards over the
     /// input.
     fn start_flags(&self, text: &[u8], at: usize) -> (EmptyFlags, StateFlags) {
         let mut empty_flags = EmptyFlags::default();
         let mut state_flags = StateFlags::default();
         empty_flags.start = at == 0;
-        empty_flags.end = text.len() == 0;
+        empty_flags.end = text.is_empty();
         empty_flags.start_line = at == 0 || text[at - 1] == b'\n';
-        empty_flags.end_line = text.len() == 0;
+        empty_flags.end_line = text.is_empty();
 
         let is_word_last = at > 0 && Byte::byte(text[at - 1]).is_ascii_word();
         let is_word = at < text.len() && Byte::byte(text[at]).is_ascii_word();
         if is_word_last {
             state_flags.set_word();
         }
         if is_word == is_word_last {
             empty_flags.not_word_boundary = true;
@@ -1435,19 +1450,19 @@ impl<'a> Fsm<'a> {
     fn start_flags_reverse(
         &self,
         text: &[u8],
         at: usize,
     ) -> (EmptyFlags, StateFlags) {
         let mut empty_flags = EmptyFlags::default();
         let mut state_flags = StateFlags::default();
         empty_flags.start = at == text.len();
-        empty_flags.end = text.len() == 0;
+        empty_flags.end = text.is_empty();
         empty_flags.start_line = at == text.len() || text[at] == b'\n';
-        empty_flags.end_line = text.len() == 0;
+        empty_flags.end_line = text.is_empty();
 
         let is_word_last =
             at < text.len() && Byte::byte(text[at]).is_ascii_word();
         let is_word = at > 0 && Byte::byte(text[at - 1]).is_ascii_word();
         if is_word_last {
             state_flags.set_word();
         }
         if is_word == is_word_last {
--- a/third_party/rust/regex/src/error.rs
+++ b/third_party/rust/regex/src/error.rs
@@ -4,21 +4,20 @@
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
 use std::fmt;
-
-use syntax;
+use std::iter::repeat;
 
 /// An error that occurred during parsing or compiling a regular expression.
-#[derive(Debug)]
+#[derive(Clone, PartialEq)]
 pub enum Error {
     /// A syntax error.
     Syntax(String),
     /// The compiled program exceeded the set size limit.
     /// The argument is the size limit imposed.
     CompiledTooBig(usize),
     /// Hints that destructuring should not be exhaustive.
     ///
@@ -51,13 +50,35 @@ impl fmt::Display for Error {
                 write!(f, "Compiled regex exceeds size limit of {} bytes.",
                        limit)
             }
             Error::__Nonexhaustive => unreachable!(),
         }
     }
 }
 
-impl From<syntax::Error> for Error {
-    fn from(err: syntax::Error) -> Error {
-        Error::Syntax(err.to_string())
+// We implement our own Debug implementation so that we show nicer syntax
+// errors when people use `Regex::new(...).unwrap()`. It's a little weird,
+// but the `Syntax` variant is already storing a `String` anyway, so we might
+// as well format it nicely.
+impl fmt::Debug for Error {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match *self {
+            Error::Syntax(ref err) => {
+                let hr: String = repeat('~').take(79).collect();
+                writeln!(f, "Syntax(")?;
+                writeln!(f, "{}", hr)?;
+                writeln!(f, "{}", err)?;
+                writeln!(f, "{}", hr)?;
+                write!(f, ")")?;
+                Ok(())
+            }
+            Error::CompiledTooBig(limit) => {
+                f.debug_tuple("CompiledTooBig")
+                    .field(&limit)
+                    .finish()
+            }
+            Error::__Nonexhaustive => {
+                f.debug_tuple("__Nonexhaustive").finish()
+            }
+        }
     }
 }
--- a/third_party/rust/regex/src/exec.rs
+++ b/third_party/rust/regex/src/exec.rs
@@ -9,60 +9,62 @@
 // except according to those terms.
 
 use std::cell::RefCell;
 use std::collections::HashMap;
 use std::cmp;
 use std::sync::Arc;
 
 use thread_local::CachedThreadLocal;
-use syntax::{Expr, ExprBuilder, Literals};
+use syntax::ParserBuilder;
+use syntax::hir::Hir;
+use syntax::hir::literal::Literals;
 
 use backtrack;
 use compile::Compiler;
 use dfa;
 use error::Error;
 use input::{ByteInput, CharInput};
-use literals::LiteralSearcher;
+use literal::LiteralSearcher;
 use pikevm;
 use prog::Program;
 use re_builder::RegexOptions;
 use re_bytes;
 use re_set;
 use re_trait::{RegularExpression, Slot, Locations, as_slots};
 use re_unicode;
 use utf8::next_utf8;
 
-/// Exec manages the execution of a regular expression.
+/// `Exec` manages the execution of a regular expression.
 ///
 /// In particular, this manages the various compiled forms of a single regular
 /// expression and the choice of which matching engine to use to execute a
 /// regular expression.
 pub struct Exec {
     /// All read only state.
     ro: Arc<ExecReadOnly>,
     /// Caches for the various matching engines.
     cache: CachedThreadLocal<ProgramCache>,
 }
 
-/// ExecNoSync is like Exec, except it embeds a reference to a cache. This
+/// `ExecNoSync` is like `Exec`, except it embeds a reference to a cache. This
 /// means it is no longer Sync, but we can now avoid the overhead of
 /// synchronization to fetch the cache.
 #[derive(Debug)]
 pub struct ExecNoSync<'c> {
     /// All read only state.
     ro: &'c Arc<ExecReadOnly>,
     /// Caches for the various matching engines.
     cache: &'c ProgramCache,
 }
 
-/// ExecNoSyncStr is like ExecNoSync, but matches on &str instead of &[u8].
+/// `ExecNoSyncStr` is like `ExecNoSync`, but matches on &str instead of &[u8].
 pub struct ExecNoSyncStr<'c>(ExecNoSync<'c>);
 
-/// ExecReadOnly comprises all read only state for a regex. Namely, all such
+/// `ExecReadOnly` comprises all read only state for a regex. Namely, all such
 /// state is determined at compile time and never changes during search.
 #[derive(Debug)]
 struct ExecReadOnly {
     /// The original regular expressions given by the caller to compile.
     res: Vec<String>,
     /// A compiled program that is used in the NFA simulation and backtracking.
     /// It can be byte-based or Unicode codepoint based.
     ///
@@ -97,17 +99,17 @@ pub struct ExecBuilder {
     match_type: Option<MatchType>,
     bytes: bool,
     only_utf8: bool,
 }
 
 /// Parsed represents a set of parsed regular expressions and their detected
 /// literals.
 struct Parsed {
-    exprs: Vec<Expr>,
+    exprs: Vec<Hir>,
     prefixes: Literals,
     suffixes: Literals,
     bytes: bool,
 }
 
 impl ExecBuilder {
     /// Create a regex execution builder.
     ///
@@ -209,67 +211,72 @@ impl ExecBuilder {
         let mut exprs = Vec::with_capacity(self.options.pats.len());
         let mut prefixes = Some(Literals::empty());
         let mut suffixes = Some(Literals::empty());
         let mut bytes = false;
         let is_set = self.options.pats.len() > 1;
         // If we're compiling a regex set and that set has any anchored
         // expressions, then disable all literal optimizations.
         for pat in &self.options.pats {
-            let parser =
-                ExprBuilder::new()
+            let mut parser =
+                ParserBuilder::new()
+                    .octal(self.options.octal)
                     .case_insensitive(self.options.case_insensitive)
                     .multi_line(self.options.multi_line)
                     .dot_matches_new_line(self.options.dot_matches_new_line)
                     .swap_greed(self.options.swap_greed)
                     .ignore_whitespace(self.options.ignore_whitespace)
                     .unicode(self.options.unicode)
-                    .allow_bytes(!self.only_utf8);
-            let expr = try!(parser.parse(pat));
-            bytes = bytes || expr.has_bytes();
+                    .allow_invalid_utf8(!self.only_utf8)
+                    .nest_limit(self.options.nest_limit)
+                    .build();
+            let expr = parser
+                .parse(pat)
+                .map_err(|e| Error::Syntax(e.to_string()))?;
+            bytes = bytes || !expr.is_always_utf8();
 
-            if !expr.is_anchored_start() && expr.has_anchored_start() {
+            if !expr.is_anchored_start() && expr.is_any_anchored_start() {
                 // Partial anchors unfortunately make it hard to use prefixes,
                 // so disable them.
                 prefixes = None;
             } else if is_set && expr.is_anchored_start() {
                 // Regex sets with anchors do not go well with literal
                 // optimizations.
                 prefixes = None;
             }
             prefixes = prefixes.and_then(|mut prefixes| {
                 if !prefixes.union_prefixes(&expr) {
                     None
                 } else {
                     Some(prefixes)
                 }
             });
 
-            if !expr.is_anchored_end() && expr.has_anchored_end() {
+            if !expr.is_anchored_end() && expr.is_any_anchored_end() {
                 // Partial anchors unfortunately make it hard to use suffixes,
                 // so disable them.
                 suffixes = None;
             } else if is_set && expr.is_anchored_end() {
                 // Regex sets with anchors do not go well with literal
                 // optimizations.
-                prefixes = None;
+                suffixes = None;
             }
             suffixes = suffixes.and_then(|mut suffixes| {
                 if !suffixes.union_suffixes(&expr) {
                     None
                 } else {
                     Some(suffixes)
                 }
             });
             exprs.push(expr);
         }
         Ok(Parsed {
             exprs: exprs,
-            prefixes: prefixes.unwrap_or(Literals::empty()),
-            suffixes: suffixes.unwrap_or(Literals::empty()),
+            prefixes: prefixes.unwrap_or_else(Literals::empty),
+            suffixes: suffixes.unwrap_or_else(Literals::empty),
             bytes: bytes,
         })
     }
 
     /// Build an executor that can run a regular expression.
     pub fn build(self) -> Result<Exec, Error> {
         // Special case when we have no patterns to compile.
         // This can happen when compiling a regex set.
@@ -279,36 +286,36 @@ impl ExecBuilder {
                 nfa: Program::new(),
                 dfa: Program::new(),
                 dfa_reverse: Program::new(),
                 suffixes: LiteralSearcher::empty(),
                 match_type: MatchType::Nothing,
             });
             return Ok(Exec { ro: ro, cache: CachedThreadLocal::new() });
         }
-        let parsed = try!(self.parse());
-        let mut nfa = try!(
+        let parsed = self.parse()?;
+        let mut nfa =
             Compiler::new()
                      .size_limit(self.options.size_limit)
                      .bytes(self.bytes || parsed.bytes)
                      .only_utf8(self.only_utf8)
-                     .compile(&parsed.exprs));
-        let mut dfa = try!(
+                     .compile(&parsed.exprs)?;
+        let mut dfa =
             Compiler::new()
                      .size_limit(self.options.size_limit)
                      .dfa(true)
                      .only_utf8(self.only_utf8)
-                     .compile(&parsed.exprs));
-        let mut dfa_reverse = try!(
+                     .compile(&parsed.exprs)?;
+        let mut dfa_reverse =
             Compiler::new()
                      .size_limit(self.options.size_limit)
                      .dfa(true)
                      .only_utf8(self.only_utf8)
                      .reverse(true)
-                     .compile(&parsed.exprs));
+                     .compile(&parsed.exprs)?;
 
         let prefixes = parsed.prefixes.unambiguous_prefixes();
         let suffixes = parsed.suffixes.unambiguous_suffixes();
         nfa.prefixes = LiteralSearcher::prefixes(prefixes);
         dfa.prefixes = nfa.prefixes.clone();
         dfa.dfa_size_limit = self.options.dfa_size_limit;
         dfa_reverse.dfa_size_limit = self.options.dfa_size_limit;
 
@@ -392,17 +399,17 @@ impl<'c> RegularExpression for ExecNoSyn
                     dfa::Result::Match(end) => Some(end),
                     dfa::Result::NoMatch(_) => None,
                     dfa::Result::Quit => self.shortest_nfa(text, start),
                 }
             }
             MatchType::DfaAnchoredReverse => {
                 match dfa::Fsm::reverse(
                     &self.ro.dfa_reverse,
-                    &self.cache,
+                    self.cache,
                     true,
                     &text[start..],
                     text.len(),
                 ) {
                     dfa::Result::Match(_) => Some(text.len()),
                     dfa::Result::NoMatch(_) => None,
                     dfa::Result::Quit => self.shortest_nfa(text, start),
                 }
@@ -440,17 +447,17 @@ impl<'c> RegularExpression for ExecNoSyn
                     dfa::Result::Match(_) => true,
                     dfa::Result::NoMatch(_) => false,
                     dfa::Result::Quit => self.match_nfa(text, start),
                 }
             }
             MatchType::DfaAnchoredReverse => {
                 match dfa::Fsm::reverse(
                     &self.ro.dfa_reverse,
-                    &self.cache,
+                    self.cache,
                     true,
                     &text[start..],
                     text.len(),
                 ) {
                     dfa::Result::Match(_) => true,
                     dfa::Result::NoMatch(_) => false,
                     dfa::Result::Quit => self.match_nfa(text, start),
                 }
@@ -549,22 +556,26 @@ impl<'c> RegularExpression for ExecNoSyn
         }
         match self.ro.match_type {
             MatchType::Literal(ty) => {
                 self.find_literals(ty, text, start).and_then(|(s, e)| {
                     self.captures_nfa_with_match(slots, text, s, e)
                 })
             }
             MatchType::Dfa => {
-                match self.find_dfa_forward(text, start) {
-                    dfa::Result::Match((s, e)) => {
-                        self.captures_nfa_with_match(slots, text, s, e)
+                if self.ro.nfa.is_anchored_start {
+                    self.captures_nfa(slots, text, start)
+                } else {
+                    match self.find_dfa_forward(text, start) {
+                        dfa::Result::Match((s, e)) => {
+                            self.captures_nfa_with_match(slots, text, s, e)
+                        }
+                        dfa::Result::NoMatch(_) => None,
+                        dfa::Result::Quit => self.captures_nfa(slots, text, start),
                     }
-                    dfa::Result::NoMatch(_) => None,
-                    dfa::Result::Quit => self.captures_nfa(slots, text, start),
                 }
             }
             MatchType::DfaAnchoredReverse => {
                 match self.find_dfa_anchored_reverse(text, start) {
                     dfa::Result::Match((s, e)) => {
                         self.captures_nfa_with_match(slots, text, s, e)
                     }
                     dfa::Result::NoMatch(_) => None,
@@ -604,18 +615,23 @@ impl<'c> ExecNoSync<'c> {
         match ty {
             Unanchored => {
                 let lits = &self.ro.nfa.prefixes;
                 lits.find(&text[start..])
                     .map(|(s, e)| (start + s, start + e))
             }
             AnchoredStart => {
                 let lits = &self.ro.nfa.prefixes;
-                lits.find_start(&text[start..])
-                    .map(|(s, e)| (start + s, start + e))
+                if !self.ro.nfa.is_anchored_start
+                    || (self.ro.nfa.is_anchored_start && start == 0) {
+                    lits.find_start(&text[start..])
+                        .map(|(s, e)| (start + s, start + e))
+                } else {
+                    None
+                }
             }
             AnchoredEnd => {
                 let lits = &self.ro.suffixes;
                 lits.find_end(&text[start..])
                     .map(|(s, e)| (start + s, start + e))
             }
         }
     }
@@ -628,30 +644,30 @@ impl<'c> ExecNoSync<'c> {
     fn find_dfa_forward(
         &self,
         text: &[u8],
         start: usize,
     ) -> dfa::Result<(usize, usize)> {
         use dfa::Result::*;
         let end = match dfa::Fsm::forward(
             &self.ro.dfa,
-            &self.cache,
+            self.cache,
             false,
             text,
             start,
         ) {
             NoMatch(i) => return NoMatch(i),
             Quit => return Quit,
             Match(end) if start == end => return Match((start, start)),
             Match(end) => end,
         };
         // Now run the DFA in reverse to find the start of the match.
         match dfa::Fsm::reverse(
             &self.ro.dfa_reverse,
-            &self.cache,
+            self.cache,
             false,
             &text[start..],
             end - start,
         ) {
             Match(s) => Match((start + s, end)),
             NoMatch(i) => NoMatch(i),
             Quit => Quit,
         }
@@ -667,31 +683,31 @@ impl<'c> ExecNoSync<'c> {
     fn find_dfa_anchored_reverse(
         &self,
         text: &[u8],
         start: usize,
     ) -> dfa::Result<(usize, usize)> {
         use dfa::Result::*;
         match dfa::Fsm::reverse(
             &self.ro.dfa_reverse,
-            &self.cache,
+            self.cache,
             false,
             &text[start..],
             text.len() - start,
         ) {
             Match(s) => Match((start + s, text.len())),
             NoMatch(i) => NoMatch(i),
             Quit => Quit,
         }
     }
 
     /// Finds the end of the shortest match using only the DFA.
     #[inline(always)] // reduces constant overhead
     fn shortest_dfa(&self, text: &[u8], start: usize) -> dfa::Result<usize> {
-        dfa::Fsm::forward(&self.ro.dfa, &self.cache, true, text, start)
+        dfa::Fsm::forward(&self.ro.dfa, self.cache, true, text, start)
     }
 
     /// Finds the end of the shortest match using only the DFA by scanning for
     /// suffix literals.
     ///
     #[inline(always)] // reduces constant overhead
     fn shortest_dfa_reverse_suffix(
         &self,
@@ -726,23 +742,23 @@ impl<'c> ExecNoSync<'c> {
         use dfa::Result::*;
 
         let lcs = self.ro.suffixes.lcs();
         debug_assert!(lcs.len() >= 1);
         let mut start = original_start;
         let mut end = start;
         while end <= text.len() {
             start = end;
-            end = end + match lcs.find(&text[end..]) {
+            end += match lcs.find(&text[end..]) {
                 None => return Some(NoMatch(text.len())),
                 Some(start) => start + lcs.len(),
             };
             match dfa::Fsm::reverse(
                 &self.ro.dfa_reverse,
-                &self.cache,
+                self.cache,
                 false,
                 &text[start..end],
                 end - start,
             ) {
                 Match(0) | NoMatch(0) => return None,
                 Match(s) => return Some(Match((s + start, end))),
                 NoMatch(_) => continue,
                 Quit => return Some(Quit),
@@ -773,17 +789,17 @@ impl<'c> ExecNoSync<'c> {
         // without a match is if the DFA gives up (seems unlikely).
         //
         // Now run the DFA forwards to find the proper end of the match.
         // (The suffix literal match can only indicate the earliest
         // possible end location, which may appear before the end of the
         // leftmost-first match.)
         match dfa::Fsm::forward(
             &self.ro.dfa,
-            &self.cache,
+            self.cache,
             false,
             text,
             match_start,
         ) {
             NoMatch(_) => panic!("BUG: reverse match implies forward match"),
             Quit => Quit,
             Match(e) => Match((match_start, e)),
         }
@@ -934,26 +950,26 @@ impl<'c> ExecNoSync<'c> {
         slots: &mut [Slot],
         quit_after_match: bool,
         text: &[u8],
         start: usize,
     ) -> bool {
         if self.ro.nfa.uses_bytes() {
             pikevm::Fsm::exec(
                 &self.ro.nfa,
-                &self.cache,
+                self.cache,
                 matches,
                 slots,
                 quit_after_match,
                 ByteInput::new(text, self.ro.nfa.only_utf8),
                 start)
         } else {
             pikevm::Fsm::exec(
                 &self.ro.nfa,
-                &self.cache,
+                self.cache,
                 matches,
                 slots,
                 quit_after_match,
                 CharInput::new(text),
                 start)
         }
     }
 
@@ -963,25 +979,25 @@ impl<'c> ExecNoSync<'c> {
         matches: &mut [bool],
         slots: &mut [Slot],
         text: &[u8],
         start: usize,
     ) -> bool {
         if self.ro.nfa.uses_bytes() {
             backtrack::Bounded::exec(
                 &self.ro.nfa,
-                &self.cache,
+                self.cache,
                 matches,
                 slots,
                 ByteInput::new(text, self.ro.nfa.only_utf8),
                 start)
         } else {
             backtrack::Bounded::exec(
                 &self.ro.nfa,
-                &self.cache,
+                self.cache,
                 matches,
                 slots,
                 CharInput::new(text),
                 start)
         }
     }
 
     /// Finds which regular expressions match the given text.
@@ -998,24 +1014,24 @@ impl<'c> ExecNoSync<'c> {
         start: usize,
     ) -> bool {
         use self::MatchType::*;
         if !self.is_anchor_end_match(text) {
             return false;
         }
         match self.ro.match_type {
             Literal(ty) => {
-                debug_assert!(matches.len() == 1);
+                debug_assert_eq!(matches.len(), 1);
                 matches[0] = self.find_literals(ty, text, start).is_some();
                 matches[0]
             }
             Dfa | DfaAnchoredReverse | DfaSuffix | DfaMany => {
                 match dfa::Fsm::forward_many(
                     &self.ro.dfa,
-                    &self.cache,
+                    self.cache,
                     matches,
                     text,
                     start,
                 ) {
                     dfa::Result::Match(_) => true,
                     dfa::Result::NoMatch(_) => false,
                     dfa::Result::Quit => {
                         self.exec_nfa(
@@ -1246,17 +1262,17 @@ enum MatchNfaType {
     Backtrack,
     /// The Pike VM.
     ///
     /// (This is only set by tests, since it never makes sense to always want
     /// the Pike VM.)
     PikeVM,
 }
 
-/// ProgramCache maintains reusable allocations for each matching engine
+/// `ProgramCache` maintains reusable allocations for each matching engine
 /// available to a particular program.
 pub type ProgramCache = RefCell<ProgramCacheInner>;
 
 #[derive(Clone, Debug)]
 pub struct ProgramCacheInner {
     pub pikevm: pikevm::Cache,
     pub backtrack: backtrack::Cache,
     pub dfa: dfa::Cache,
@@ -1268,8 +1284,67 @@ impl ProgramCacheInner {
         ProgramCacheInner {
             pikevm: pikevm::Cache::new(&ro.nfa),
             backtrack: backtrack::Cache::new(&ro.nfa),
             dfa: dfa::Cache::new(&ro.dfa),
             dfa_reverse: dfa::Cache::new(&ro.dfa_reverse),
         }
     }
 }
+
+#[cfg(test)]
+mod test {
+    #[test]
+    fn uppercut_s_backtracking_bytes_default_bytes_mismatch() {
+        use internal::ExecBuilder;
+
+        let backtrack_bytes_re = ExecBuilder::new("^S")
+            .bounded_backtracking()
+            .only_utf8(false)
+            .build()
+            .map(|exec| exec.into_byte_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let default_bytes_re = ExecBuilder::new("^S")
+            .only_utf8(false)
+            .build()
+            .map(|exec| exec.into_byte_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let input = vec![83, 83];
+
+        let s1 = backtrack_bytes_re.split(&input);
+        let s2 = default_bytes_re.split(&input);
+        for (chunk1, chunk2) in s1.zip(s2) {
+            assert_eq!(chunk1, chunk2);
+        }
+    }
+
+    #[test]
+    fn unicode_lit_star_backtracking_utf8bytes_default_utf8bytes_mismatch() {
+        use internal::ExecBuilder;
+
+        let backtrack_bytes_re = ExecBuilder::new(r"^(?u:\*)")
+            .bounded_backtracking()
+            .bytes(true)
+            .build()
+            .map(|exec| exec.into_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let default_bytes_re = ExecBuilder::new(r"^(?u:\*)")
+            .bytes(true)
+            .build()
+            .map(|exec| exec.into_regex())
+            .map_err(|err| format!("{}", err))
+            .unwrap();
+
+        let input = "**";
+
+        let s1 = backtrack_bytes_re.split(input);
+        let s2 = default_bytes_re.split(input);
+        for (chunk1, chunk2) in s1.zip(s2) {
+            assert_eq!(chunk1, chunk2);
+        }
+    }
+}
--- a/third_party/rust/regex/src/expand.rs
+++ b/third_party/rust/regex/src/expand.rs
@@ -84,18 +84,18 @@ pub fn expand_bytes(
                 dst.extend(
                     caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""));
             }
         }
     }
     dst.extend(replacement);
 }
 
-/// CaptureRef represents a reference to a capture group inside some text. The
-/// reference is either a capture group name or a number.
+/// `CaptureRef` represents a reference to a capture group inside some text.
+/// The reference is either a capture group name or a number.
 ///
 /// It is also tagged with the position in the text immediately proceding the
 /// capture reference.
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
 struct CaptureRef<'a> {
     cap: Ref<'a>,
     end: usize,
 }
@@ -145,17 +145,17 @@ fn find_cap_ref<T: ?Sized + AsRef<[u8]>>
     }
     if cap_end == i {
         return None;
     }
     // We just verified that the range 0..cap_end is valid ASCII, so it must
     // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
     // check with either unsafe or by parsing the number straight from &[u8].
     let cap = str::from_utf8(&rep[i..cap_end])
-                  .ok().expect("valid UTF-8 capture name");
+                  .expect("valid UTF-8 capture name");
     if brace {
         if !rep.get(cap_end).map_or(false, |&b| b == b'}') {
             return None;
         }
         cap_end += 1;
     }
     Some(CaptureRef {
         cap: match cap.parse::<u32>() {
--- a/third_party/rust/regex/src/input.rs
+++ b/third_party/rust/regex/src/input.rs
@@ -11,17 +11,17 @@
 use std::char;
 use std::cmp::Ordering;
 use std::fmt;
 use std::ops;
 use std::u32;
 
 use syntax;
 
-use literals::LiteralSearcher;
+use literal::LiteralSearcher;
 use prog::InstEmptyLook;
 use utf8::{decode_utf8, decode_last_utf8};
 
 /// Represents a location in the input.
 #[derive(Clone, Copy, Debug)]
 pub struct InputAt {
     pos: usize,
     c: Char,
@@ -53,16 +53,22 @@ impl InputAt {
         self.byte
     }
 
     /// Returns the UTF-8 width of the character at this position.
     pub fn len(&self) -> usize {
         self.len
     }
 
+    /// Returns whether the UTF-8 width of the character at this position
+    /// is zero.
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
     /// Returns the byte offset of this position.
     pub fn pos(&self) -> usize {
         self.pos
     }
 
     /// Returns the byte offset of the next position in the input.
     pub fn next_pos(&self) -> usize {
         self.pos + self.len
@@ -93,16 +99,19 @@ pub trait Input {
         &self,
         prefixes: &LiteralSearcher,
         at: InputAt,
     ) -> Option<InputAt>;
 
     /// The number of bytes in the input.
     fn len(&self) -> usize;
 
+    /// Whether the input is empty.
+    fn is_empty(&self) -> bool { self.len() == 0 }
+
     /// Return the given input as a sequence of bytes.
     fn as_bytes(&self) -> &[u8];
 }
 
 impl<'a, T: Input> Input for &'a T {
     fn at(&self, i: usize) -> InputAt { (**self).at(i) }
 
     fn next_char(&self, at: InputAt) -> Char { (**self).next_char(at) }
@@ -209,20 +218,16 @@ impl<'t> Input for CharInput<'t> {
     }
 
     fn as_bytes(&self) -> &[u8] {
         self.0
     }
 }
 
 /// An input reader over bytes.
-///
-/// N.B. We represent the reader with a string for now, since that gives us
-/// easy access to necessary Unicode decoding (used for word boundary look
-/// ahead/look behind).
 #[derive(Clone, Copy, Debug)]
 pub struct ByteInput<'t> {
     text: &'t [u8],
     only_utf8: bool,
 }
 
 impl<'t> ByteInput<'t> {
     /// Return a new byte-based input reader for the given string.
@@ -242,17 +247,17 @@ impl<'t> ops::Deref for ByteInput<'t> {
     }
 }
 
 impl<'t> Input for ByteInput<'t> {
     fn at(&self, i: usize) -> InputAt {
         InputAt {
             pos: i,
             c: None.into(),
-            byte: self.get(i).map(|&b| b),
+            byte: self.get(i).cloned(),
             len: 1,
         }
     }
 
     fn next_char(&self, at: InputAt) -> Char {
         decode_utf8(&self[at.pos()..]).map(|(c, _)| c).into()
     }
 
@@ -320,17 +325,17 @@ impl<'t> Input for ByteInput<'t> {
         prefixes.find(&self[at.pos()..]).map(|(s, _)| self.at(at.pos() + s))
     }
 
     fn len(&self) -> usize {
         self.text.len()
     }
 
     fn as_bytes(&self) -> &[u8] {
-        &self.text
+        self.text
     }
 }
 
 /// An inline representation of `Option<char>`.
 ///
 /// This eliminates the need to do case analysis on `Option<char>` to determine
 /// ordinality with other characters.
 ///
@@ -361,38 +366,28 @@ impl Char {
     pub fn len_utf8(self) -> usize {
         char::from_u32(self.0).map_or(0, |c| c.len_utf8())
     }
 
     /// Returns true iff the character is a word character.
     ///
     /// If the character is absent, then false is returned.
     pub fn is_word_char(self) -> bool {
-        char::from_u32(self.0).map_or(false, syntax::is_word_char)
+        char::from_u32(self.0).map_or(false, syntax::is_word_character)
     }
 
     /// Returns true iff the byte is a word byte.
     ///
     /// If the byte is absent, then false is returned.
     pub fn is_word_byte(self) -> bool {
         match char::from_u32(self.0) {
-            None => false,
             Some(c) if c <= '\u{7F}' => syntax::is_word_byte(c as u8),
-            Some(_) => false,
+            None | Some(_) => false,
         }
     }
-
-    /// Converts the character to a real primitive `char`.
-    ///
-    /// If the character is absent, then `None` is returned.
-    pub fn as_char(self) -> Option<char> {
-        // This is only used in the `regex!` macro because it expands char
-        // classes into `match` expressions (instead of binary search).
-        char::from_u32(self.0)
-    }
 }
 
 impl From<char> for Char {
     fn from(c: char) -> Char { Char(c as u32) }
 }
 
 impl From<Option<char>> for Char {
     fn from(c: Option<char>) -> Char {
--- a/third_party/rust/regex/src/lib.rs
+++ b/third_party/rust/regex/src/lib.rs
@@ -51,17 +51,17 @@ assert!(re.is_match("2014-01-01"));
 ```
 
 Notice the use of the `^` and `$` anchors. In this crate, every expression
 is executed with an implicit `.*?` at the beginning and end, which allows
 it to match anywhere in the text. Anchors can be used to ensure that the
 full text matches an expression.
 
 This example also demonstrates the utility of
-[raw strings](https://doc.rust-lang.org/stable/reference.html#raw-string-literals)
+[raw strings](https://doc.rust-lang.org/stable/reference/tokens.html#raw-string-literals)
 in Rust, which
 are just like regular strings except they are prefixed with an `r` and do
 not process any escape sequences. For example, `"\\d"` is the same
 expression as `r"\d"`.
 
 # Example: Avoid compiling the same regex in a loop
 
 It is an anti-pattern to compile the same regular expression in a loop
@@ -212,19 +212,18 @@ only need to test if an expression match
 instead.)
 
 # Unicode
 
 This implementation executes regular expressions **only** on valid UTF-8
 while exposing match locations as byte indices into the search string.
 
 Only simple case folding is supported. Namely, when matching
-case-insensitively, the characters are first mapped using the [simple case
-folding](ftp://ftp.unicode.org/Public/UNIDATA/CaseFolding.txt) mapping
-before matching.
+case-insensitively, the characters are first mapped using the "simple" case
+folding rules defined by Unicode.
 
 Regular expressions themselves are **only** interpreted as a sequence of
 Unicode scalar values. This means you can use Unicode characters directly
 in your expression:
 
 ```rust
 # extern crate regex; use regex::Regex;
 # fn main() {
@@ -243,29 +242,35 @@ are some examples:
   of whitespace categorized by Unicode.
 * `\b` matches a Unicode word boundary.
 * Negated character classes like `[^a]` match all Unicode scalar values except
   for `a`.
 * `^` and `$` are **not** Unicode aware in multi-line mode. Namely, they only
   recognize `\n` and not any of the other forms of line terminators defined
   by Unicode.
 
-Finally, Unicode general categories and scripts are available as character
-classes. For example, you can match a sequence of numerals, Greek or
-Cherokee letters:
+Unicode general categories, scripts, script extensions, ages and a smattering
+of boolean properties are available as character classes. For example, you can
+match a sequence of numerals, Greek or Cherokee letters:
 
 ```rust
 # extern crate regex; use regex::Regex;
 # fn main() {
 let re = Regex::new(r"[\pN\p{Greek}\p{Cherokee}]+").unwrap();
 let mat = re.find("abcΔᎠβⅠᏴγδⅡxyz").unwrap();
 assert_eq!((mat.start(), mat.end()), (3, 23));
 # }
 ```
 
+For a more detailed breakdown of Unicode support with respect to
+[UTS#18](http://unicode.org/reports/tr18/),
+please see the
+[UNICODE](https://github.com/rust-lang/regex/blob/master/UNICODE.md)
+document in the root of the regex repository.
+
 # Opt out of Unicode support
 
 The `bytes` sub-module provides a `Regex` type that can be used to match
 on `&[u8]`. By default, text is interpreted as UTF-8 just like it is with
 the main `Regex` type. However, this behavior can be disabled by turning
 off the `u` flag, even if doing so could result in matching invalid UTF-8.
 For example, when the `u` flag is disabled, `.` will match any byte instead
 of any Unicode scalar value.
@@ -302,16 +307,18 @@ a separate crate, [`regex-syntax`](../re
 [xyz]         A character class matching either x, y or z (union).
 [^xyz]        A character class matching any character except x, y and z.
 [a-z]         A character class matching any character in range a-z.
 [[:alpha:]]   ASCII character class ([A-Za-z])
 [[:^alpha:]]  Negated ASCII character class ([^A-Za-z])
 [x[^xyz]]     Nested/grouping character class (matching any character except y and z)
 [a-y&&xyz]    Intersection (matching x or y)
 [0-9&&[^4]]   Subtraction using intersection and negation (matching 0-9 except 4)
+[0-9--4]      Direct subtraction (matching 0-9 except 4)
+[a-g~~b-h]    Symmetric difference (matching `a` and `h` only)
 [\[\]]        Escaping in character classes (matching [ or ])
 </pre>
 
 Any named character class may appear inside a bracketed `[...]` character
 class. For example, `[\p{Greek}[:digit:]]` matches any Greek or ASCII
 digit. `[\p{Greek}&&\pL]` matches Greek letters.
 
 Precedence in character classes, from most binding to least:
@@ -369,64 +376,87 @@ x{n}?     exactly n x
 Flags are each a single character. For example, `(?x)` sets the flag `x`
 and `(?-x)` clears the flag `x`. Multiple flags can be set or cleared at
 the same time: `(?xy)` sets both the `x` and `y` flags and `(?x-y)` sets
 the `x` flag and clears the `y` flag.
 
 All flags are by default disabled unless stated otherwise. They are:
 
 <pre class="rust">
-i     case-insensitive
+i     case-insensitive: letters match both upper and lower case
 m     multi-line mode: ^ and $ match begin/end of line
 s     allow . to match \n
 U     swap the meaning of x* and x*?
 u     Unicode support (enabled by default)
 x     ignore whitespace and allow line comments (starting with `#`)
 </pre>
 
-Here's an example that matches case-insensitively for only part of the
-expression:
+Flags can be toggled within a pattern. Here's an example that matches
+case-insensitively for the first part but case-sensitively for the second part:
 
 ```rust
 # extern crate regex; use regex::Regex;
 # fn main() {
 let re = Regex::new(r"(?i)a+(?-i)b+").unwrap();
 let cap = re.captures("AaAaAbbBBBb").unwrap();
 assert_eq!(&cap[0], "AaAaAbb");
 # }
 ```
 
 Notice that the `a+` matches either `a` or `A`, but the `b+` only matches
 `b`.
 
+Multi-line mode means `^` and `$` no longer match just at the beginning/end of
+the input, but at the beginning/end of lines:
+
+```
+# use regex::Regex;
+let re = Regex::new(r"(?m)^line \d+").unwrap();
+let m = re.find("line one\nline 2\n").unwrap();
+assert_eq!(m.as_str(), "line 2");
+```
+
+Note that `^` matches after new lines, even at the end of input:
+
+```
+# use regex::Regex;
+let re = Regex::new(r"(?m)^").unwrap();
+let m = re.find_iter("test\n").last().unwrap();
+assert_eq!((m.start(), m.end()), (5, 5));
+```
+
 Here is an example that uses an ASCII word boundary instead of a Unicode
 word boundary:
 
 ```rust
 # extern crate regex; use regex::Regex;
 # fn main() {
 let re = Regex::new(r"(?-u:\b).+(?-u:\b)").unwrap();
 let cap = re.captures("$$abc$$").unwrap();
 assert_eq!(&cap[0], "abc");
 # }
 ```
 
 ## Escape sequences
 
 <pre class="rust">
-\*         literal *, works for any punctuation character: \.+*?()|[]{}^$
-\a         bell (\x07)
-\f         form feed (\x0C)
-\t         horizontal tab
-\n         new line
-\r         carriage return
-\v         vertical tab (\x0B)
-\123       octal character code (up to three digits)
-\x7F       hex character code (exactly two digits)
-\x{10FFFF} any hex character code corresponding to a Unicode code point
+\*          literal *, works for any punctuation character: \.+*?()|[]{}^$
+\a          bell (\x07)
+\f          form feed (\x0C)
+\t          horizontal tab
+\n          new line
+\r          carriage return
+\v          vertical tab (\x0B)
+\123        octal character code (up to three digits) (when enabled)
+\x7F        hex character code (exactly two digits)
+\x{10FFFF}  any hex character code corresponding to a Unicode code point
+\u007F      hex character code (exactly four digits)
+\u{7F}      any hex character code corresponding to a Unicode code point
+\U0000007F  hex character code (exactly eight digits)
+\U{7F}      any hex character code corresponding to a Unicode code point
 </pre>
 
 ## Perl character classes (Unicode friendly)
 
 These classes are based on the definitions provided in
 [UTS#18](http://www.unicode.org/reports/tr18/#Compatibility_Properties):
 
 <pre class="rust">
@@ -485,35 +515,41 @@ the limit is reached too frequently, it 
 another matching engine with fixed memory requirements.
 (The DFA size limit can also be tweaked. See
 [`RegexBuilder::dfa_size_limit`](struct.RegexBuilder.html#method.dfa_size_limit).)
 */
 
 #![deny(missing_docs)]
 #![cfg_attr(test, deny(warnings))]
 #![cfg_attr(feature = "pattern", feature(pattern))]
-#![cfg_attr(feature = "simd-accel", feature(cfg_target_feature))]
 
 extern crate aho_corasick;
 extern crate memchr;
 extern crate thread_local;
-#[cfg(test)] extern crate quickcheck;
+#[cfg(test)]
+#[macro_use]
+extern crate quickcheck;
 extern crate regex_syntax as syntax;
-#[cfg(feature = "simd-accel")] extern crate simd;
 extern crate utf8_ranges;
 
+#[cfg(feature = "use_std")]
 pub use error::Error;
+#[cfg(feature = "use_std")]
 pub use re_builder::unicode::*;
+#[cfg(feature = "use_std")]
 pub use re_builder::set_unicode::*;
+#[cfg(feature = "use_std")]
 pub use re_set::unicode::*;
+#[cfg(feature = "use_std")]
 pub use re_trait::Locations;
+#[cfg(feature = "use_std")]
 pub use re_unicode::{
     Regex, Match, Captures,
     CaptureNames, Matches, CaptureMatches, SubCaptureMatches,
-    Replacer, NoExpand, Split, SplitN,
+    Replacer, ReplacerRef, NoExpand, Split, SplitN,
     escape,
 };
 
 /**
 Match regular expressions on arbitrary bytes.
 
 This module provides a nearly identical API to the one found in the
 top-level of this crate. There are two important differences:
@@ -584,25 +620,27 @@ character classes are allowed.
 3. In ASCII compatible mode, Perl character classes (`\w`, `\d` and `\s`)
 revert to their typical ASCII definition. `\w` maps to `[[:word:]]`, `\d` maps
 to `[[:digit:]]` and `\s` maps to `[[:space:]]`.
 4. In ASCII compatible mode, word boundaries use the ASCII compatible `\w` to
 determine whether a byte is a word byte or not.
 5. Hexadecimal notation can be used to specify arbitrary bytes instead of
 Unicode codepoints. For example, in ASCII compatible mode, `\xFF` matches the
 literal byte `\xFF`, while in Unicode mode, `\xFF` is a Unicode codepoint that
-matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation.
+matches its UTF-8 encoding of `\xC3\xBF`. Similarly for octal notation when
+enabled.
 6. `.` matches any *byte* except for `\n` instead of any Unicode scalar value.
 When the `s` flag is enabled, `.` matches any byte.
 
 # Performance
 
 In general, one should expect performance on `&[u8]` to be roughly similar to
 performance on `&str`.
 */
+#[cfg(feature = "use_std")]
 pub mod bytes {
     pub use re_builder::bytes::*;
     pub use re_builder::set_bytes::*;
     pub use re_bytes::*;
     pub use re_set::bytes::*;
     pub use re_trait::Locations;
 }
 
@@ -610,39 +648,34 @@ mod backtrack;
 mod utf8;
 mod compile;
 mod dfa;
 mod error;
 mod exec;
 mod expand;
 mod freqs;
 mod input;
-mod literals;
+mod literal;
 #[cfg(feature = "pattern")]
 mod pattern;
 mod pikevm;
 mod prog;
 mod re_builder;
 mod re_bytes;
-mod re_plugin;
 mod re_set;
 mod re_trait;
 mod re_unicode;
-#[cfg(feature = "simd-accel")]
-mod simd_accel;
-#[cfg(not(feature = "simd-accel"))]
-#[path = "simd_fallback/mod.rs"]
-mod simd_accel;
 mod sparse;
+#[cfg(feature = "unstable")]
+mod vector;
 
-/// The `internal` module exists to support the `regex!` macro and other
-/// suspicious activity, such as testing different matching engines and
-/// supporting the `regex-debug` CLI utility.
+/// The `internal` module exists to support suspicious activity, such as
+/// testing different matching engines and supporting the `regex-debug` CLI
+/// utility.
 #[doc(hidden)]
+#[cfg(feature = "use_std")]
 pub mod internal {
     pub use compile::Compiler;
     pub use exec::{Exec, ExecBuilder};
     pub use input::{Char, Input, CharInput, InputAt};
-    pub use literals::LiteralSearcher;
+    pub use literal::LiteralSearcher;
     pub use prog::{Program, Inst, EmptyLook, InstRanges};
-    pub use re_plugin::Plugin;
-    pub use re_unicode::_Regex;
 }
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/literal/mod.rs
@@ -0,0 +1,1141 @@
+// Copyright 2014-2015 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::cmp;
+use std::mem;
+
+use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
+use memchr::{memchr, memchr2, memchr3};
+use syntax::hir::literal::{Literal, Literals};
+
+use freqs::BYTE_FREQUENCIES;
+use self::teddy_avx2::{Teddy as TeddyAVX2};
+use self::teddy_ssse3::{Teddy as TeddySSSE3};
+
+mod teddy_avx2;
+mod teddy_ssse3;
+
+/// A prefix extracted from a compiled regular expression.
+///
+/// A regex prefix is a set of literal strings that *must* be matched at the
+/// beginning of a regex in order for the entire regex to match. Similarly
+/// for a regex suffix.
+#[derive(Clone, Debug)]
+pub struct LiteralSearcher {
+    complete: bool,
+    lcp: FreqyPacked,
+    lcs: FreqyPacked,
+    matcher: Matcher,
+}
+
+#[derive(Clone, Debug)]
+enum Matcher {
+    /// No literals. (Never advances through the input.)
+    Empty,
+    /// A set of four or more single byte literals.
+    Bytes(SingleByteSet),
+    /// A single substring, find using memchr and frequency analysis.
+    FreqyPacked(FreqyPacked),
+    /// A single substring, find using Boyer-Moore.
+    BoyerMoore(BoyerMooreSearch),
+    /// An Aho-Corasick automaton.
+    AC(FullAcAutomaton<Literal>),
+    /// A simd accelerated multiple string matcher. Used only for a small
+    /// number of small literals.
+    TeddySSSE3(TeddySSSE3),
+    /// A simd accelerated multiple string matcher. Used only for a small
+    /// number of small literals. This uses 256-bit vectors.
+    TeddyAVX2(TeddyAVX2),
+}
+
+impl LiteralSearcher {
+    /// Returns a matcher that never matches and never advances the input.
+    pub fn empty() -> Self {
+        Self::new(Literals::empty(), Matcher::Empty)
+    }
+
+    /// Returns a matcher for literal prefixes from the given set.
+    pub fn prefixes(lits: Literals) -> Self {
+        let matcher = Matcher::prefixes(&lits);
+        Self::new(lits, matcher)
+    }
+
+    /// Returns a matcher for literal suffixes from the given set.
+    pub fn suffixes(lits: Literals) -> Self {
+        let matcher = Matcher::suffixes(&lits);
+        Self::new(lits, matcher)
+    }
+
+    fn new(lits: Literals, matcher: Matcher) -> Self {
+        let complete = lits.all_complete();
+        LiteralSearcher {
+            complete: complete,
+            lcp: FreqyPacked::new(lits.longest_common_prefix().to_vec()),
+            lcs: FreqyPacked::new(lits.longest_common_suffix().to_vec()),
+            matcher: matcher,
+        }
+    }
+
+    /// Returns true if all matches comprise the entire regular expression.
+    ///
+    /// This does not necessarily mean that a literal match implies a match
+    /// of the regular expression. For example, the regular expresison `^a`
+    /// is comprised of a single complete literal `a`, but the regular
+    /// expression demands that it only match at the beginning of a string.
+    pub fn complete(&self) -> bool {
+        self.complete && !self.is_empty()
+    }
+
+    /// Find the position of a literal in `haystack` if it exists.
+    #[inline(always)] // reduces constant overhead
+    pub fn find(&self, haystack: &[u8]) -> Option<(usize, usize)> {
+        use self::Matcher::*;
+        match self.matcher {
+            Empty => Some((0, 0)),
+            Bytes(ref sset) => sset.find(haystack).map(|i| (i, i + 1)),
+            FreqyPacked(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
+            BoyerMoore(ref s) => s.find(haystack).map(|i| (i, i + s.len())),
+            AC(ref aut) => aut.find(haystack).next().map(|m| (m.start, m.end)),
+            TeddySSSE3(ref t) => t.find(haystack).map(|m| (m.start, m.end)),
+            TeddyAVX2(ref t) => t.find(haystack).map(|m| (m.start, m.end)),
+        }
+    }
+
+    /// Like find, except matches must start at index `0`.
+    pub fn find_start(&self, haystack: &[u8]) -> Option<(usize, usize)> {
+        for lit in self.iter() {
+            if lit.len() > haystack.len() {
+                continue;
+            }
+            if lit == &haystack[0..lit.len()] {
+                return Some((0, lit.len()));
+            }
+        }
+        None
+    }
+
+    /// Like find, except matches must end at index `haystack.len()`.
+    pub fn find_end(&self, haystack: &[u8]) -> Option<(usize, usize)> {
+        for lit in self.iter() {
+            if lit.len() > haystack.len() {
+                continue;
+            }
+            if lit == &haystack[haystack.len() - lit.len()..] {
+                return Some((haystack.len() - lit.len(), haystack.len()));
+            }
+        }
+        None
+    }
+
+    /// Returns an iterator over all literals to be matched.
+    pub fn iter(&self) -> LiteralIter {
+        match self.matcher {
+            Matcher::Empty => LiteralIter::Empty,
+            Matcher::Bytes(ref sset) => LiteralIter::Bytes(&sset.dense),
+            Matcher::FreqyPacked(ref s) => LiteralIter::Single(&s.pat),
+            Matcher::BoyerMoore(ref s) => LiteralIter::Single(&s.pattern),
+            Matcher::AC(ref ac) => LiteralIter::AC(ac.patterns()),
+            Matcher::TeddySSSE3(ref ted) => {
+                LiteralIter::TeddySSSE3(ted.patterns())
+            }
+            Matcher::TeddyAVX2(ref ted) => {
+                LiteralIter::TeddyAVX2(ted.patterns())
+            }
+        }
+    }
+
+    /// Returns a matcher for the longest common prefix of this matcher.
+    pub fn lcp(&self) -> &FreqyPacked {
+        &self.lcp
+    }
+
+    /// Returns a matcher for the longest common suffix of this matcher.
+    pub fn lcs(&self) -> &FreqyPacked {
+        &self.lcs
+    }
+
+    /// Returns true iff this prefix is empty.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns the number of prefixes in this machine.
+    pub fn len(&self) -> usize {
+        use self::Matcher::*;
+        match self.matcher {
+            Empty => 0,
+            Bytes(ref sset) => sset.dense.len(),
+            FreqyPacked(_) => 1,
+            BoyerMoore(_) => 1,
+            AC(ref aut) => aut.len(),
+            TeddySSSE3(ref ted) => ted.len(),
+            TeddyAVX2(ref ted) => ted.len(),
+        }
+    }
+
+    /// Return the approximate heap usage of literals in bytes.
+    pub fn approximate_size(&self) -> usize {
+        use self::Matcher::*;
+        match self.matcher {
+            Empty => 0,
+            Bytes(ref sset) => sset.approximate_size(),
+            FreqyPacked(ref single) => single.approximate_size(),
+            BoyerMoore(ref single) => single.approximate_size(),
+            AC(ref aut) => aut.heap_bytes(),
+            TeddySSSE3(ref ted) => ted.approximate_size(),
+            TeddyAVX2(ref ted) => ted.approximate_size(),
+        }
+    }
+}
+
+impl Matcher {
+    fn prefixes(lits: &Literals) -> Self {
+        let sset = SingleByteSet::prefixes(lits);
+        Matcher::new(lits, sset)
+    }
+
+    fn suffixes(lits: &Literals) -> Self {
+        let sset = SingleByteSet::suffixes(lits);
+        Matcher::new(lits, sset)
+    }
+
+    fn new(lits: &Literals, sset: SingleByteSet) -> Self {
+        if lits.literals().is_empty() {
+            return Matcher::Empty;
+        }
+        if sset.dense.len() >= 26 {
+            // Avoid trying to match a large number of single bytes.
+            // This is *very* sensitive to a frequency analysis comparison
+            // between the bytes in sset and the composition of the haystack.
+            // No matter the size of sset, if its members all are rare in the
+            // haystack, then it'd be worth using it. How to tune this... IDK.
+            // ---AG
+            return Matcher::Empty;
+        }
+        if sset.complete {
+            return Matcher::Bytes(sset);
+        }
+        if lits.literals().len() == 1 {
+            let lit = lits.literals()[0].to_vec();
+            if BoyerMooreSearch::should_use(lit.as_slice()) {
+                return Matcher::BoyerMoore(BoyerMooreSearch::new(lit));
+            } else {
+                return Matcher::FreqyPacked(FreqyPacked::new(lit));
+            }
+        }
+        let is_aho_corasick_fast = sset.dense.len() == 1 && sset.all_ascii;
+        if TeddyAVX2::available() && !is_aho_corasick_fast {
+            const MAX_TEDDY_LITERALS: usize = 32;
+            if lits.literals().len() <= MAX_TEDDY_LITERALS {
+                if let Some(ted) = TeddyAVX2::new(lits) {
+                    return Matcher::TeddyAVX2(ted);
+                }
+            }
+        }
+        if TeddySSSE3::available() && !is_aho_corasick_fast {
+            // Only try Teddy if Aho-Corasick can't use memchr on an ASCII
+            // byte. Also, in its current form, Teddy doesn't scale well to
+            // lots of literals.
+            //
+            // We impose the ASCII restriction since an alternation of
+            // non-ASCII string literals in the same language is likely to all
+            // start with the same byte. Even worse, the corpus being searched
+            // probably has a similar composition, which ends up completely
+            // negating the benefit of memchr.
+            const MAX_TEDDY_LITERALS: usize = 32;
+            if lits.literals().len() <= MAX_TEDDY_LITERALS {
+                if let Some(ted) = TeddySSSE3::new(lits) {
+                    return Matcher::TeddySSSE3(ted);
+                }
+            }
+            // Fallthrough to ol' reliable Aho-Corasick...
+        }
+        let pats = lits.literals().to_owned();
+        Matcher::AC(AcAutomaton::new(pats).into_full())
+    }
+}
+
+pub enum LiteralIter<'a> {
+    Empty,
+    Bytes(&'a [u8]),
+    Single(&'a [u8]),
+    AC(&'a [Literal]),
+    TeddySSSE3(&'a [Vec<u8>]),
+    TeddyAVX2(&'a [Vec<u8>]),
+}
+
+impl<'a> Iterator for LiteralIter<'a> {
+    type Item = &'a [u8];
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match *self {
+            LiteralIter::Empty => None,
+            LiteralIter::Bytes(ref mut many) => {
+                if many.is_empty() {
+                    None
+                } else {
+                    let next = &many[0..1];
+                    *many = &many[1..];
+                    Some(next)
+                }
+            }
+            LiteralIter::Single(ref mut one) => {
+                if one.is_empty() {
+                    None
+                } else {
+                    let next = &one[..];
+                    *one = &[];
+                    Some(next)
+                }
+            }
+            LiteralIter::AC(ref mut lits) => {
+                if lits.is_empty() {
+                    None
+                } else {
+                    let next = &lits[0];
+                    *lits = &lits[1..];
+                    Some(&**next)
+                }
+            }
+            LiteralIter::TeddySSSE3(ref mut lits) => {
+                if lits.is_empty() {
+                    None
+                } else {
+                    let next = &lits[0];
+                    *lits = &lits[1..];
+                    Some(&**next)
+                }
+            }
+            LiteralIter::TeddyAVX2(ref mut lits) => {
+                if lits.is_empty() {
+                    None
+                } else {
+                    let next = &lits[0];
+                    *lits = &lits[1..];
+                    Some(&**next)
+                }
+            }
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+struct SingleByteSet {
+    sparse: Vec<bool>,
+    dense: Vec<u8>,
+    complete: bool,
+    all_ascii: bool,
+}
+
+impl SingleByteSet {
+    fn new() -> SingleByteSet {
+        SingleByteSet {
+            sparse: vec![false; 256],
+            dense: vec![],
+            complete: true,
+            all_ascii: true,
+        }
+    }
+
+    fn prefixes(lits: &Literals) -> SingleByteSet {
+        let mut sset = SingleByteSet::new();
+        for lit in lits.literals() {
+            sset.complete = sset.complete && lit.len() == 1;
+            if let Some(&b) = lit.get(0) {
+                if !sset.sparse[b as usize] {
+                    if b > 0x7F {
+                        sset.all_ascii = false;
+                    }
+                    sset.dense.push(b);
+                    sset.sparse[b as usize] = true;
+                }
+            }
+        }
+        sset
+    }
+
+    fn suffixes(lits: &Literals) -> SingleByteSet {
+        let mut sset = SingleByteSet::new();
+        for lit in lits.literals() {
+            sset.complete = sset.complete && lit.len() == 1;
+            if let Some(&b) = lit.get(lit.len().checked_sub(1).unwrap()) {
+                if !sset.sparse[b as usize] {
+                    if b > 0x7F {
+                        sset.all_ascii = false;
+                    }
+                    sset.dense.push(b);
+                    sset.sparse[b as usize] = true;
+                }
+            }
+        }
+        sset
+    }
+
+    /// Faster find that special cases certain sizes to use memchr.
+    #[inline(always)] // reduces constant overhead
+    fn find(&self, text: &[u8]) -> Option<usize> {
+        match self.dense.len() {
+            0 => None,
+            1 => memchr(self.dense[0], text),
+            2 => memchr2(self.dense[0], self.dense[1], text),
+            3 => memchr3(self.dense[0], self.dense[1], self.dense[2], text),
+            _ => self._find(text),
+        }
+    }
+
+    /// Generic find that works on any sized set.
+    fn _find(&self, haystack: &[u8]) -> Option<usize> {
+        for (i, &b) in haystack.iter().enumerate() {
+            if self.sparse[b as usize] {
+                return Some(i);
+            }
+        }
+        None
+    }
+
+    fn approximate_size(&self) -> usize {
+        (self.dense.len() * mem::size_of::<u8>())
+        + (self.sparse.len() * mem::size_of::<bool>())
+    }
+}
+
+/// Provides an implementation of fast subtring search using frequency
+/// analysis.
+///
+/// memchr is so fast that we do everything we can to keep the loop in memchr
+/// for as long as possible. The easiest way to do this is to intelligently
+/// pick the byte to send to memchr. The best byte is the byte that occurs
+/// least frequently in the haystack. Since doing frequency analysis on the
+/// haystack is far too expensive, we compute a set of fixed frequencies up
+/// front and hard code them in src/freqs.rs. Frequency analysis is done via
+/// scripts/frequencies.py.
+#[derive(Clone, Debug)]
+pub struct FreqyPacked {
+    /// The pattern.
+    pat: Vec<u8>,
+    /// The number of Unicode characters in the pattern. This is useful for
+    /// determining the effective length of a pattern when deciding which
+    /// optimizations to perform. A trailing incomplete UTF-8 sequence counts
+    /// as one character.
+    char_len: usize,
+    /// The rarest byte in the pattern, according to pre-computed frequency
+    /// analysis.
+    rare1: u8,
+    /// The offset of the rarest byte in `pat`.
+    rare1i: usize,
+    /// The second rarest byte in the pattern, according to pre-computed
+    /// frequency analysis. (This may be equivalent to the rarest byte.)
+    ///
+    /// The second rarest byte is used as a type of guard for quickly detecting
+    /// a mismatch after memchr locates an instance of the rarest byte. This
+    /// is a hedge against pathological cases where the pre-computed frequency
+    /// analysis may be off. (But of course, does not prevent *all*
+    /// pathological cases.)
+    rare2: u8,
+    /// The offset of the second rarest byte in `pat`.
+    rare2i: usize,
+}
+
+impl FreqyPacked {
+    fn new(pat: Vec<u8>) -> FreqyPacked {
+        if pat.is_empty() {
+            return FreqyPacked::empty();
+        }
+
+        // Find the rarest two bytes. Try to make them distinct (but it's not
+        // required).
+        let mut rare1 = pat[0];
+        let mut rare2 = pat[0];
+        for b in pat[1..].iter().cloned() {
+            if freq_rank(b) < freq_rank(rare1) {
+                rare1 = b;
+            }
+        }
+        for &b in &pat {
+            if rare1 == rare2 {
+                rare2 = b
+            } else if b != rare1 && freq_rank(b) < freq_rank(rare2) {
+                rare2 = b;
+            }
+        }
+
+        // And find the offsets of their last occurrences.
+        let rare1i = pat.iter().rposition(|&b| b == rare1).unwrap();
+        let rare2i = pat.iter().rposition(|&b| b == rare2).unwrap();
+
+        let char_len = char_len_lossy(&pat);
+        FreqyPacked {
+            pat: pat,
+            char_len: char_len,
+            rare1: rare1,
+            rare1i: rare1i,
+            rare2: rare2,
+            rare2i: rare2i,
+        }
+    }
+
+    fn empty() -> FreqyPacked {
+        FreqyPacked {
+            pat: vec![],
+            char_len: 0,
+            rare1: 0,
+            rare1i: 0,
+            rare2: 0,
+            rare2i: 0,
+        }
+    }
+
+    #[inline(always)] // reduces constant overhead
+    pub fn find(&self, haystack: &[u8]) -> Option<usize> {
+        let pat = &*self.pat;
+        if haystack.len() < pat.len() || pat.is_empty() {
+            return None;
+        }
+        let mut i = self.rare1i;
+        while i < haystack.len() {
+            i += match memchr(self.rare1, &haystack[i..]) {
+                None => return None,
+                Some(i) => i,
+            };
+            let start = i - self.rare1i;
+            let end = start + pat.len();
+            if end > haystack.len() {
+                return None;
+            }
+            let aligned = &haystack[start..end];
+            if aligned[self.rare2i] == self.rare2 && aligned == &*self.pat {
+                return Some(start);
+            }
+            i += 1;
+        }
+        None
+    }
+
+    #[inline(always)] // reduces constant overhead
+    pub fn is_suffix(&self, text: &[u8]) -> bool {
+        if text.len() < self.len() {
+            return false;
+        }
+        text[text.len() - self.len()..] == *self.pat
+    }
+
+    pub fn len(&self) -> usize {
+        self.pat.len()
+    }
+
+    pub fn char_len(&self) -> usize {
+        self.char_len
+    }
+
+    fn approximate_size(&self) -> usize {
+        self.pat.len() * mem::size_of::<u8>()
+    }
+}
+
+fn char_len_lossy(bytes: &[u8]) -> usize {
+    String::from_utf8_lossy(bytes).chars().count()
+}
+
+/// An implementation of Tuned Boyer-Moore as laid out by
+/// Andrew Hume and Daniel Sunday in "Fast String Searching".
+/// O(n) in the size of the input.
+///
+/// Fast string searching algorithms come in many variations,
+/// but they can generally be described in terms of three main
+/// components.
+///
+/// The skip loop is where the string searcher wants to spend
+/// as much time as possible. Exactly which character in the
+/// pattern the skip loop examines varies from algorithm to
+/// algorithm, but in the simplest case this loop repeated
+/// looks at the last character in the pattern and jumps
+/// forward in the input if it is not in the pattern.
+/// Robert Boyer and J Moore called this the "fast" loop in
+/// their original paper.
+///
+/// The match loop is responsible for actually examining the
+/// whole potentially matching substring. In order to fail
+/// faster, the match loop sometimes has a guard test attached.
+/// The guard test uses frequency analysis of the different
+/// characters in the pattern to choose the least frequency
+/// occurring character and use it to find match failures
+/// as quickly as possible.
+///
+/// The shift rule governs how the algorithm will shuffle its
+/// test window in the event of a failure during the match loop.
+/// Certain shift rules allow the worst-case run time of the
+/// algorithm to be shown to be O(n) in the size of the input
+/// rather than O(nm) in the size of the input and the size
+/// of the pattern (as naive Boyer-Moore is).
+///
+/// "Fast String Searching", in addition to presenting a tuned
+/// algorithm, provides a comprehensive taxonomy of the many
+/// different flavors of string searchers. Under that taxonomy
+/// TBM, the algorithm implemented here, uses an unrolled fast
+/// skip loop with memchr fallback, a forward match loop with guard,
+/// and the mini Sunday's delta shift rule. To unpack that you'll have to
+/// read the paper.
+#[derive(Clone, Debug)]
+pub struct BoyerMooreSearch {
+    /// The pattern we are going to look for in the haystack.
+    pattern: Vec<u8>,
+
+    /// The skip table for the skip loop.
+    ///
+    /// Maps the character at the end of the input
+    /// to a shift.
+    skip_table: Vec<usize>,
+
+    /// The guard character (least frequently occurring char).
+    guard: u8,
+    /// The reverse-index of the guard character in the pattern.
+    guard_reverse_idx: usize,
+
+    /// Daniel Sunday's mini generalized delta2 shift table.
+    ///
+    /// We use a skip loop, so we only have to provide a shift
+    /// for the skip char (last char). This is why it is a mini
+    /// shift rule.
+    md2_shift: usize,
+}
+
+impl BoyerMooreSearch {
+    /// Create a new string searcher, performing whatever
+    /// compilation steps are required.
+    fn new(pattern: Vec<u8>) -> Self {
+        debug_assert!(pattern.len() > 0);
+
+        let (g, gi) = Self::select_guard(pattern.as_slice());
+        let skip_table = Self::compile_skip_table(pattern.as_slice());
+        let md2_shift = Self::compile_md2_shift(pattern.as_slice());
+        BoyerMooreSearch {
+            pattern: pattern,
+            skip_table: skip_table,
+            guard: g,
+            guard_reverse_idx: gi,
+            md2_shift: md2_shift,
+        }
+    }
+
+    /// Find the pattern in `haystack`, returning the offset
+    /// of the start of the first occurrence of the pattern
+    /// in `haystack`.
+    #[inline]
+    fn find(&self, haystack: &[u8]) -> Option<usize> {
+        if haystack.len() < self.pattern.len() {
+            return None;
+        }
+
+        let mut window_end = self.pattern.len() - 1;
+
+        // Inspired by the grep source. It is a way
+        // to do correct loop unrolling without having to place
+        // a crashpad of terminating charicters at the end in
+        // the way described in the Fast String Searching paper.
+        const NUM_UNROLL: usize = 10;
+        // 1 for the initial position, and 1 for the md2 shift
+        let short_circut = (NUM_UNROLL + 2) * self.pattern.len();
+
+        if haystack.len() > short_circut {
+            // just 1 for the md2 shift
+            let backstop = haystack.len() - ((NUM_UNROLL + 1) * self.pattern.len());
+            loop {
+                window_end = match self.skip_loop(haystack, window_end, backstop) {
+                    Some(i) => i,
+                    None => return None,
+                };
+                if window_end >= backstop {
+                    break;
+                }
+
+                if self.check_match(haystack, window_end) {
+                    return Some(window_end - (self.pattern.len() - 1));
+                } else {
+                    let skip = self.skip_table[haystack[window_end] as usize];
+                    window_end +=
+                        if skip == 0 { self.md2_shift } else { skip };
+                    continue;
+                }
+            }
+        }
+
+        // now process the input after the backstop
+        while window_end < haystack.len() {
+            let mut skip = self.skip_table[haystack[window_end] as usize];
+            if skip == 0 {
+                if self.check_match(haystack, window_end) {
+                    return Some(window_end - (self.pattern.len() - 1));
+                } else {
+                    skip = self.md2_shift;
+                }
+            }
+            window_end += skip;
+        }
+
+        None
+    }
+
+    fn len(&self) -> usize {
+        return self.pattern.len()
+    }
+
+    /// The key heuristic behind which the BoyerMooreSearch lives.
+    ///
+    /// See `rust-lang/regex/issues/408`.
+    ///
+    /// Tuned Boyer-Moore is actually pretty slow! It turns out a handrolled
+    /// platform-specific memchr routine with a bit of frequency
+    /// analysis sprinkled on top actually wins most of the time.
+    /// However, there are a few cases where Tuned Boyer-Moore still
+    /// wins.
+    ///
+    /// If the haystack is random, frequency analysis doesn't help us,
+    /// so Boyer-Moore will win for sufficiently large needles.
+    /// Unfortunately, there is no obvious way to determine this
+    /// ahead of time.
+    ///
+    /// If the pattern itself consists of very common characters,
+    /// frequency analysis won't get us anywhere. The most extreme
+    /// example of this is a pattern like `eeeeeeeeeeeeeeee`. Fortunately,
+    /// this case is wholly determined by the pattern, so we can actually
+    /// implement the heuristic.
+    ///
+    /// A third case is if the pattern is sufficiently long. The idea
+    /// here is that once the pattern gets long enough the Tuned
+    /// Boyer-Moore skip loop will start making strides long enough
+    /// to beat the asm deep magic that is memchr.
+    fn should_use(pattern: &[u8]) -> bool {
+        // The minimum pattern length required to use TBM.
+        const MIN_LEN: usize = 9;
+        // The minimum frequency rank (lower is rarer) that every byte in the
+        // pattern must have in order to use TBM. That is, if the pattern
+        // contains _any_ byte with a lower rank, then TBM won't be used.
+        const MIN_CUTOFF: usize = 150;
+        // The maximum frequency rank for any byte.
+        const MAX_CUTOFF: usize = 255;
+        // The scaling factor used to determine the actual cutoff frequency
+        // to use (keeping in mind that the minimum frequency rank is bounded
+        // by MIN_CUTOFF). This scaling factor is an attempt to make TBM more
+        // likely to be used as the pattern grows longer. That is, longer
+        // patterns permit somewhat less frequent bytes than shorter patterns,
+        // under the assumption that TBM gets better as the pattern gets
+        // longer.
+        const LEN_CUTOFF_PROPORTION: usize = 4;
+
+        let scaled_rank = pattern.len().wrapping_mul(LEN_CUTOFF_PROPORTION);
+        let cutoff = cmp::max(
+            MIN_CUTOFF,
+            MAX_CUTOFF - cmp::min(MAX_CUTOFF, scaled_rank),
+        );
+        // The pattern must be long enough to be worthwhile. e.g., memchr will
+        // be faster on `e` because it is short even though e is quite common.
+        pattern.len() > MIN_LEN
+            // all the bytes must be more common than the cutoff.
+            && pattern.iter().all(|c| freq_rank(*c) >= cutoff)
+    }
+
+    /// Check to see if there is a match at the given position
+    #[inline]
+    fn check_match(&self, haystack: &[u8], window_end: usize) -> bool {
+        // guard test
+        if haystack[window_end - self.guard_reverse_idx] != self.guard {
+            return false;
+        }
+
+        // match loop
+        let window_start = window_end - (self.pattern.len() - 1);
+        for i in 0..self.pattern.len() {
+            if self.pattern[i] != haystack[window_start + i] {
+                return false;
+            }
+        }
+
+        true
+    }
+
+    /// Skip forward according to the shift table.
+    ///
+    /// Returns the offset of the next occurrence
+    /// of the last char in the pattern, or the none
+    /// if it never reappears. If `skip_loop` hits the backstop
+    /// it will leave early.
+    #[inline]
+    fn skip_loop(&self,
+        haystack: &[u8],
+        mut window_end: usize,
+        backstop: usize,
+    ) -> Option<usize> {
+        use std::mem;
+
+        let window_end_snapshot = window_end;
+        let skip_of = |we: usize| -> usize {
+            // Unsafe might make this faster, but the benchmarks
+            // were hard to interpret.
+            self.skip_table[haystack[we] as usize]
+        };
+
+        loop {
+            let mut skip = skip_of(window_end); window_end += skip;
+            skip = skip_of(window_end); window_end += skip;
+            if skip != 0 {
+                skip = skip_of(window_end); window_end += skip;
+                skip = skip_of(window_end); window_end += skip;
+                skip = skip_of(window_end); window_end += skip;
+                if skip != 0 {
+                    skip = skip_of(window_end); window_end += skip;
+                    skip = skip_of(window_end); window_end += skip;
+                    skip = skip_of(window_end); window_end += skip;
+                    if skip != 0 {
+                        skip = skip_of(window_end); window_end += skip;
+                        skip = skip_of(window_end); window_end += skip;
+
+                        // If ten iterations did not make at least 16 words
+                        // worth of progress, we just fall back on memchr.
+                        if window_end - window_end_snapshot >
+                             16 * mem::size_of::<usize>() {
+
+                            // Returning a window_end >= backstop will immediatly
+                            // break us out of the inner loop in `find`.
+                            if window_end >= backstop {
+                                return Some(window_end);
+                            }
+
+                            continue; // we made enough progress
+                        } else {
+                            // In case we are already there, and so that
+                            // we will catch the guard char.
+                            window_end = window_end
+                                .checked_sub(1 + self.guard_reverse_idx)
+                                .unwrap_or(0);
+
+                            match memchr(self.guard, &haystack[window_end..]) {
+                                None => return None,
+                                Some(g_idx) => {
+                                    return Some(window_end + g_idx + self.guard_reverse_idx);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            return Some(window_end);
+        }
+    }
+
+    /// Compute the ufast skip table.
+    fn compile_skip_table(pattern: &[u8]) -> Vec<usize> {
+        let mut tab = vec![pattern.len(); 256];
+
+        // For every char in the pattern, we write a skip
+        // that will line us up with the rightmost occurrence.
+        //
+        // N.B. the sentinel (0) is written by the last
+        // loop iteration.
+        for (i, c) in pattern.iter().enumerate() {
+            tab[*c as usize] = (pattern.len() - 1) - i;
+        }
+
+        tab
+    }
+
+    /// Select the guard character based off of the precomputed
+    /// frequency table.
+    fn select_guard(pattern: &[u8]) -> (u8, usize) {
+        let mut rarest = pattern[0];
+        let mut rarest_rev_idx = pattern.len() - 1;
+        for (i, c) in pattern.iter().enumerate() {
+            if freq_rank(*c) < freq_rank(rarest) {
+                rarest = *c;
+                rarest_rev_idx = (pattern.len() - 1) - i;
+            }
+        }
+
+        (rarest, rarest_rev_idx)
+    }
+
+    /// If there is another occurrence of the skip
+    /// char, shift to it, otherwise just shift to
+    /// the next window.
+    fn compile_md2_shift(pattern: &[u8]) -> usize {
+        let shiftc = *pattern.last().unwrap();
+
+        // For a pattern of length 1 we will never apply the
+        // shift rule, so we use a poison value on the principle
+        // that failing fast is a good thing.
+        if pattern.len() == 1 {
+            return 0xDEADBEAF;
+        }
+
+        let mut i = pattern.len() - 2;
+        while i > 0 {
+            if pattern[i] == shiftc {
+                return (pattern.len() - 1) - i;
+            }
+            i -= 1;
+        }
+
+        // The skip char never re-occurs in the pattern, so
+        // we can just shift the whole window length.
+        pattern.len() - 1
+    }
+
+    fn approximate_size(&self) -> usize {
+        (self.pattern.len() * mem::size_of::<u8>())
+            + (256 * mem::size_of::<usize>()) // skip table
+    }
+}
+
+fn freq_rank(b: u8) -> usize {
+    BYTE_FREQUENCIES[b as usize] as usize
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{BoyerMooreSearch, FreqyPacked};
+
+    //
+    // Unit Tests
+    //
+
+    // The "hello, world" of string searching
+    #[test]
+    fn bm_find_subs() {
+        let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..]));
+        let haystack = b"I keep seeing patterns in this text";
+        assert_eq!(14, searcher.find(haystack).unwrap());
+    }
+
+    #[test]
+    fn bm_find_no_subs() {
+        let searcher = BoyerMooreSearch::new(Vec::from(&b"pattern"[..]));
+        let haystack = b"I keep seeing needles in this text";
+        assert_eq!(None, searcher.find(haystack));
+    }
+
+    //
+    // Regression Tests
+    //
+
+    #[test]
+    fn bm_skip_reset_bug() {
+        let haystack = vec![0, 0, 0, 0, 0, 1, 1, 0];
+        let needle = vec![0, 1, 1, 0];
+
+        let searcher = BoyerMooreSearch::new(needle);
+        let offset = searcher.find(haystack.as_slice()).unwrap();
+        assert_eq!(4, offset);
+    }
+
+    #[test]
+    fn bm_backstop_underflow_bug() {
+        let haystack = vec![0, 0];
+        let needle = vec![0, 0];
+
+        let searcher = BoyerMooreSearch::new(needle);
+        let offset = searcher.find(haystack.as_slice()).unwrap();
+        assert_eq!(0, offset);
+    }
+
+    #[test]
+    fn bm_naive_off_by_one_bug() {
+        let haystack = vec![91];
+        let needle = vec![91];
+
+        let naive_offset = naive_find(&needle, &haystack).unwrap();
+        assert_eq!(0, naive_offset);
+    }
+
+    #[test]
+    fn bm_memchr_fallback_indexing_bug() {
+        let mut haystack = vec![
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 87, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let needle = vec![1, 1, 1, 1, 32, 32, 87];
+        let needle_start = haystack.len();
+        haystack.extend(needle.clone());
+
+        let searcher = BoyerMooreSearch::new(needle);
+        assert_eq!(needle_start, searcher.find(haystack.as_slice()).unwrap());
+    }
+
+    #[test]
+    fn bm_backstop_boundary() {
+        let haystack = b"\
+// aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+e_data.clone_created(entity_id, entity_to_add.entity_id);
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
+".to_vec();
+        let needle = b"clone_created".to_vec();
+
+        let searcher = BoyerMooreSearch::new(needle);
+        let result = searcher.find(&haystack);
+        assert_eq!(Some(43), result);
+    }
+
+    #[test]
+    fn bm_win_gnu_indexing_bug() {
+        let haystack_raw = vec![
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0];
+        let needle = vec![1, 1, 1, 1, 1, 1, 1];
+        let haystack = haystack_raw.as_slice();
+
+        BoyerMooreSearch::new(needle.clone()).find(haystack);
+    }
+
+    //
+    // QuickCheck Properties
+    //
+
+    use quickcheck::TestResult;
+
+    fn naive_find(needle: &[u8], haystack: &[u8]) -> Option<usize> {
+        assert!(needle.len() <= haystack.len());
+
+        for i in 0..(haystack.len() - (needle.len() - 1)) {
+            if haystack[i] == needle[0]
+                && &haystack[i..(i+needle.len())] == needle {
+                return Some(i)
+            }
+        }
+
+        None
+    }
+
+    quickcheck! {
+        fn qc_bm_equals_nieve_find(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult {
+            if pile1.len() == 0 || pile2.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let (needle, haystack) = if pile1.len() < pile2.len() {
+                (pile1, pile2.as_slice())
+            } else {
+                (pile2, pile1.as_slice())
+            };
+
+            let searcher = BoyerMooreSearch::new(needle.clone());
+            TestResult::from_bool(
+                searcher.find(haystack) == naive_find(&needle, haystack))
+        }
+
+        fn qc_bm_equals_single(pile1: Vec<u8>, pile2: Vec<u8>) -> TestResult {
+            if pile1.len() == 0 || pile2.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let (needle, haystack) = if pile1.len() < pile2.len() {
+                (pile1, pile2.as_slice())
+            } else {
+                (pile2, pile1.as_slice())
+            };
+
+            let bm_searcher = BoyerMooreSearch::new(needle.clone());
+            let freqy_memchr = FreqyPacked::new(needle);
+            TestResult::from_bool(
+                bm_searcher.find(haystack) == freqy_memchr.find(haystack))
+        }
+
+        fn qc_bm_finds_trailing_needle(
+            haystack_pre: Vec<u8>,
+            needle: Vec<u8>
+        ) -> TestResult {
+            if needle.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let mut haystack = haystack_pre.clone();
+            let searcher = BoyerMooreSearch::new(needle.clone());
+
+            if haystack.len() >= needle.len() &&
+                searcher.find(haystack.as_slice()).is_some() {
+                return TestResult::discard();
+            }
+
+            haystack.extend(needle.clone());
+
+            // What if the the tail of the haystack can start the
+            // needle?
+            let start = haystack_pre.len()
+                .checked_sub(needle.len())
+                .unwrap_or(0);
+            for i in 0..(needle.len() - 1) {
+                if searcher.find(&haystack[(i + start)..]).is_some() {
+                    return TestResult::discard();
+                }
+            }
+
+            TestResult::from_bool(
+                searcher.find(haystack.as_slice())
+                        .map(|x| x == haystack_pre.len())
+                        .unwrap_or(false))
+        }
+
+        // qc_equals_* is only testing the negative case as @burntsushi
+        // pointed out in https://github.com/rust-lang/regex/issues/446.
+        // This quickcheck prop represents an effort to force testing of
+        // the positive case. qc_bm_finds_first and qc_bm_finds_trailing_needle
+        // already check some of the positive cases, but they don't cover
+        // cases where the needle is in the middle of haystack. This prop
+        // fills that hole.
+        fn qc_bm_finds_subslice(
+            haystack: Vec<u8>,
+            needle_start: usize,
+            needle_length: usize
+        ) -> TestResult {
+            if haystack.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let needle_start = needle_start % haystack.len();
+            let needle_length = needle_length % (haystack.len() - needle_start);
+
+            if needle_length == 0 {
+                return TestResult::discard();
+            }
+
+            let needle = &haystack[needle_start..(needle_start + needle_length)];
+
+            let bm_searcher = BoyerMooreSearch::new(needle.to_vec());
+
+            let start = naive_find(&needle, &haystack);
+            match start {
+                None => TestResult::from_bool(false),
+                Some(nf_start) =>
+                    TestResult::from_bool(
+                        nf_start <= needle_start
+                            && bm_searcher.find(&haystack) == start
+                    )
+            }
+        }
+
+        fn qc_bm_finds_first(needle: Vec<u8>) -> TestResult {
+            if needle.len() == 0 {
+                return TestResult::discard();
+            }
+
+            let mut haystack = needle.clone();
+            let searcher = BoyerMooreSearch::new(needle.clone());
+            haystack.extend(needle);
+
+            TestResult::from_bool(
+                searcher.find(haystack.as_slice())
+                        .map(|x| x == 0)
+                        .unwrap_or(false))
+        }
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/literal/teddy_avx2/fallback.rs
@@ -0,0 +1,20 @@
+use syntax::hir::literal::Literals;
+
+#[derive(Debug, Clone)]
+pub struct Teddy(());
+
+#[derive(Debug, Clone)]
+pub struct Match {
+    pub pat: usize,
+    pub start: usize,
+    pub end: usize,
+}
+
+impl Teddy {
+    pub fn available() -> bool { false }
+    pub fn new(_pats: &Literals) -> Option<Teddy> { None }
+    pub fn patterns(&self) -> &[Vec<u8>] { &[] }
+    pub fn len(&self) -> usize { 0 }
+    pub fn approximate_size(&self) -> usize { 0 }
+    pub fn find(&self, _haystack: &[u8]) -> Option<Match> { None }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/literal/teddy_avx2/imp.rs
@@ -0,0 +1,468 @@
+/*!
+This is the Teddy searcher, but ported to AVX2.
+
+See the module comments in the SSSE3 Teddy searcher for a more in depth
+explanation of how this algorithm works. For the most part, this port is
+basically the same as the SSSE3 version, but using 256-bit vectors instead of
+128-bit vectors, which increases throughput.
+*/
+
+use std::cmp;
+
+use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
+use syntax::hir::literal::Literals;
+
+use vector::avx2::{AVX2VectorBuilder, u8x32};
+
+/// Corresponds to the number of bytes read at a time in the haystack.
+const BLOCK_SIZE: usize = 32;
+
+/// Match reports match information.
+#[derive(Debug, Clone)]
+pub struct Match {
+    /// The index of the pattern that matched. The index is in correspondence
+    /// with the order of the patterns given at construction.
+    pub pat: usize,
+    /// The start byte offset of the match.
+    pub start: usize,
+    /// The end byte offset of the match. This is always `start + pat.len()`.
+    pub end: usize,
+}
+
+/// A SIMD accelerated multi substring searcher.
+#[derive(Debug, Clone)]
+pub struct Teddy {
+    /// A builder for AVX2 empowered vectors.
+    vb: AVX2VectorBuilder,
+    /// A list of substrings to match.
+    pats: Vec<Vec<u8>>,
+    /// An Aho-Corasick automaton of the patterns. We use this when we need to
+    /// search pieces smaller than the Teddy block size.
+    ac: FullAcAutomaton<Vec<u8>>,
+    /// A set of 8 buckets. Each bucket corresponds to a single member of a
+    /// bitset. A bucket contains zero or more substrings. This is useful
+    /// when the number of substrings exceeds 8, since our bitsets cannot have
+    /// more than 8 members.
+    buckets: Vec<Vec<usize>>,
+    /// Our set of masks. There's one mask for each byte in the fingerprint.
+    masks: Masks,
+}
+
+impl Teddy {
+    /// Returns true if and only if Teddy is supported on this platform.
+    ///
+    /// If this returns `false`, then `Teddy::new(...)` is guaranteed to
+    /// return `None`.
+    pub fn available() -> bool {
+        AVX2VectorBuilder::new().is_some()
+    }
+
+    /// Create a new `Teddy` multi substring matcher.
+    ///
+    /// If a `Teddy` matcher could not be created (e.g., `pats` is empty or has
+    /// an empty substring), then `None` is returned.
+    pub fn new(pats: &Literals) -> Option<Teddy> {
+        let vb = match AVX2VectorBuilder::new() {
+            None => return None,
+            Some(vb) => vb,
+        };
+        if !Teddy::available() {
+            return None;
+        }
+
+        let pats: Vec<_> = pats.literals().iter().map(|p|p.to_vec()).collect();
+        let min_len = pats.iter().map(|p| p.len()).min().unwrap_or(0);
+        // Don't allow any empty patterns and require that we have at
+        // least one pattern.
+        if min_len < 1 {
+            return None;
+        }
+        // Pick the largest mask possible, but no larger than 3.
+        let nmasks = cmp::min(3, min_len);
+        let mut masks = Masks::new(vb, nmasks);
+        let mut buckets = vec![vec![]; 8];
+        // Assign a substring to each bucket, and add the bucket's bitfield to
+        // the appropriate position in the mask.
+        for (pati, pat) in pats.iter().enumerate() {
+            let bucket = pati % 8;
+            buckets[bucket].push(pati);
+            masks.add(bucket as u8, pat);
+        }
+        Some(Teddy {
+            vb: vb,
+            pats: pats.to_vec(),
+            ac: AcAutomaton::new(pats.to_vec()).into_full(),
+            buckets: buckets,
+            masks: masks,
+        })
+    }
+
+    /// Returns all of the substrings matched by this `Teddy`.
+    pub fn patterns(&self) -> &[Vec<u8>] {
+        &self.pats
+    }
+
+    /// Returns the number of substrings in this matcher.
+    pub fn len(&self) -> usize {
+        self.pats.len()
+    }
+
+    /// Returns the approximate size on the heap used by this matcher.
+    pub fn approximate_size(&self) -> usize {
+        self.pats.iter().fold(0, |a, b| a + b.len())
+    }
+
+    /// Searches `haystack` for the substrings in this `Teddy`. If a match was
+    /// found, then it is returned. Otherwise, `None` is returned.
+    pub fn find(&self, haystack: &[u8]) -> Option<Match> {
+        // This is safe because the only way we can construct a Teddy type
+        // is if AVX2 is available.
+        unsafe { self.find_impl(haystack) }
+    }
+
+    #[allow(unused_attributes)]
+    #[target_feature(enable = "avx2")]
+    unsafe fn find_impl(&self, haystack: &[u8]) -> Option<Match> {
+        // If our haystack is smaller than the block size, then fall back to
+        // a naive brute force search.
+        if haystack.is_empty() || haystack.len() < (BLOCK_SIZE + 2) {
+            return self.slow(haystack, 0);
+        }
+        match self.masks.len() {
+            0 => None,
+            1 => self.find1(haystack),
+            2 => self.find2(haystack),
+            3 => self.find3(haystack),
+            _ => unreachable!(),
+        }
+    }
+
+    /// `find1` is used when there is only 1 mask. This is the easy case and is
+    /// pretty much as described in the module documentation.
+    #[inline(always)]
+    fn find1(&self, haystack: &[u8]) -> Option<Match> {
+        let mut pos = 0;
+        let zero = self.vb.u8x32_splat(0);
+        let len = haystack.len();
+        debug_assert!(len >= BLOCK_SIZE);
+        while pos <= len - BLOCK_SIZE {
+            let h = unsafe {
+                // I tried and failed to eliminate bounds checks in safe code.
+                // This is safe because of our loop invariant: pos is always
+                // <= len-32.
+                let p = haystack.get_unchecked(pos..);
+                self.vb.u8x32_load_unchecked_unaligned(p)
+            };
+            // N.B. `res0` is our `C` in the module documentation.
+            let res0 = self.masks.members1(h);
+            // Only do expensive verification if there are any non-zero bits.
+            let bitfield = res0.ne(zero).movemask();
+            if bitfield != 0 {
+                if let Some(m) = self.verify(haystack, pos, res0, bitfield) {
+                    return Some(m);
+                }
+            }
+            pos += BLOCK_SIZE;
+        }
+        self.slow(haystack, pos)
+    }
+
+    /// `find2` is used when there are 2 masks, e.g., the fingerprint is 2 bytes
+    /// long.
+    #[inline(always)]
+    fn find2(&self, haystack: &[u8]) -> Option<Match> {
+        // This is an exotic way to right shift a SIMD vector across lanes.
+        // See below at use for more details.
+        let zero = self.vb.u8x32_splat(0);
+        let len = haystack.len();
+        // The previous value of `C` (from the module documentation) for the
+        // *first* byte in the fingerprint. On subsequent iterations, we take
+        // the last bitset from the previous `C` and insert it into the first
+        // position of the current `C`, shifting all other bitsets to the right
+        // one lane. This causes `C` for the first byte to line up with `C` for
+        // the second byte, so that they can be `AND`'d together.
+        let mut prev0 = self.vb.u8x32_splat(0xFF);
+        let mut pos = 1;
+        debug_assert!(len >= BLOCK_SIZE);
+        while pos <= len - BLOCK_SIZE {
+            let h = unsafe {
+                // I tried and failed to eliminate bounds checks in safe code.
+                // This is safe because of our loop invariant: pos is always
+                // <= len-32.
+                let p = haystack.get_unchecked(pos..);
+                self.vb.u8x32_load_unchecked_unaligned(p)
+            };
+            let (res0, res1) = self.masks.members2(h);
+
+            // Do this:
+            //
+            //     (prev0 << 15) | (res0 >> 1)
+            //
+            // This lets us line up our C values for each byte.
+            let res0prev0 = res0.alignr_15(prev0);
+
+            // `AND`'s our `C` values together.
+            let res = res0prev0.and(res1);
+            prev0 = res0;
+
+            let bitfield = res.ne(zero).movemask();
+            if bitfield != 0 {
+                let pos = pos.checked_sub(1).unwrap();
+                if let Some(m) = self.verify(haystack, pos, res, bitfield) {
+                    return Some(m);
+                }
+            }
+            pos += BLOCK_SIZE;
+        }
+        // The windowing above doesn't check the last byte in the last
+        // window, so start the slow search at the last byte of the last
+        // window.
+        self.slow(haystack, pos.checked_sub(1).unwrap())
+    }
+
+    /// `find3` is used when there are 3 masks, e.g., the fingerprint is 3 bytes
+    /// long.
+    ///
+    /// N.B. This is a straight-forward extrapolation of `find2`. The only
+    /// difference is that we need to keep track of two previous values of `C`,
+    /// since we now need to align for three bytes.
+    #[inline(always)]
+    fn find3(&self, haystack: &[u8]) -> Option<Match> {
+        let zero = self.vb.u8x32_splat(0);
+        let len = haystack.len();
+        let mut prev0 = self.vb.u8x32_splat(0xFF);
+        let mut prev1 = self.vb.u8x32_splat(0xFF);
+        let mut pos = 2;
+
+        while pos <= len - BLOCK_SIZE {
+            let h = unsafe {
+                // I tried and failed to eliminate bounds checks in safe code.
+                // This is safe because of our loop invariant: pos is always
+                // <= len-32.
+                let p = haystack.get_unchecked(pos..);
+                self.vb.u8x32_load_unchecked_unaligned(p)
+            };
+            let (res0, res1, res2) = self.masks.members3(h);
+
+            let res0prev0 = res0.alignr_14(prev0);
+            let res1prev1 = res1.alignr_15(prev1);
+            let res = res0prev0.and(res1prev1).and(res2);
+
+            prev0 = res0;
+            prev1 = res1;
+
+            let bitfield = res.ne(zero).movemask();
+            if bitfield != 0 {
+                let pos = pos.checked_sub(2).unwrap();
+                if let Some(m) = self.verify(haystack, pos, res, bitfield) {
+                    return Some(m);
+                }
+            }
+            pos += BLOCK_SIZE;
+        }
+        // The windowing above doesn't check the last two bytes in the last
+        // window, so start the slow search at the penultimate byte of the
+        // last window.
+        // self.slow(haystack, pos.saturating_sub(2))
+        self.slow(haystack, pos.checked_sub(2).unwrap())
+    }
+
+    /// Runs the verification procedure on `res` (i.e., `C` from the module
+    /// documentation), where the haystack block starts at `pos` in
+    /// `haystack`. `bitfield` has ones in the bit positions that `res` has
+    /// non-zero bytes.
+    ///
+    /// If a match exists, it returns the first one.
+    #[inline(always)]
+    fn verify(
+        &self,
+        haystack: &[u8],
+        pos: usize,
+        res: u8x32,
+        mut bitfield: u32,
+    ) -> Option<Match> {
+        while bitfield != 0 {
+            // The next offset, relative to pos, where some fingerprint
+            // matched.
+            let byte_pos = bitfield.trailing_zeros() as usize;
+            bitfield &= !(1 << byte_pos);
+
+            // Offset relative to the beginning of the haystack.
+            let start = pos + byte_pos;
+
+            // The bitfield telling us which patterns had fingerprints that
+            // match at this starting position.
+            let mut patterns = res.extract(byte_pos);
+            while patterns != 0 {
+                let bucket = patterns.trailing_zeros() as usize;
+                patterns &= !(1 << bucket);
+
+                // Actual substring search verification.
+                if let Some(m) = self.verify_bucket(haystack, bucket, start) {
+                    return Some(m);
+                }
+            }
+        }
+
+        None
+    }
+
+    /// Verifies whether any substring in the given bucket matches in haystack
+    /// at the given starting position.
+    #[inline(always)]
+    fn verify_bucket(
+        &self,
+        haystack: &[u8],
+        bucket: usize,
+        start: usize,
+    ) -> Option<Match> {
+        // This cycles through the patterns in the bucket in the order that
+        // the patterns were given. Therefore, we guarantee leftmost-first
+        // semantics.
+        for &pati in &self.buckets[bucket] {
+            let pat = &*self.pats[pati];
+            if start + pat.len() > haystack.len() {
+                continue;
+            }
+            if pat == &haystack[start..start + pat.len()] {
+                return Some(Match {
+                    pat: pati,
+                    start: start,
+                    end: start + pat.len(),
+                });
+            }
+        }
+        None
+    }
+
+    /// Slow substring search through all patterns in this matcher.
+    ///
+    /// This is used when we don't have enough bytes in the haystack for our
+    /// block based approach.
+    #[inline(never)]
+    fn slow(&self, haystack: &[u8], pos: usize) -> Option<Match> {
+        self.ac.find(&haystack[pos..]).next().map(|m| {
+            Match {
+                pat: m.pati,
+                start: pos + m.start,
+                end: pos + m.end,
+            }
+        })
+    }
+}
+
+/// A list of masks. This has length equal to the length of the fingerprint.
+/// The length of the fingerprint is always `min(3, len(smallest_substring))`.
+#[derive(Debug, Clone)]
+struct Masks {
+    vb: AVX2VectorBuilder,
+    masks: [Mask; 3],
+    size: usize,
+}
+
+impl Masks {
+    /// Create a new set of masks of size `n`, where `n` corresponds to the
+    /// number of bytes in a fingerprint.
+    fn new(vb: AVX2VectorBuilder, n: usize) -> Masks {
+        Masks {
+            vb: vb,
+            masks: [Mask::new(vb), Mask::new(vb), Mask::new(vb)],
+            size: n,
+        }
+    }
+
+    /// Returns the number of masks.
+    fn len(&self) -> usize {
+        self.size
+    }
+
+    /// Adds the given pattern to the given bucket. The bucket should be a
+    /// power of `2 <= 2^7`.
+    fn add(&mut self, bucket: u8, pat: &[u8]) {
+        for i in 0..self.len() {
+            self.masks[i].add(bucket, pat[i]);
+        }
+    }
+
+    /// Finds the fingerprints that are in the given haystack block. i.e., this
+    /// returns `C` as described in the module documentation.
+    ///
+    /// More specifically, `for i in 0..16` and `j in 0..8, C[i][j] == 1` if and
+    /// only if `haystack_block[i]` corresponds to a fingerprint that is part
+    /// of a pattern in bucket `j`.
+    #[inline(always)]
+    fn members1(&self, haystack_block: u8x32) -> u8x32 {
+        let masklo = self.vb.u8x32_splat(0xF);
+        let hlo = haystack_block.and(masklo);
+        let hhi = haystack_block.bit_shift_right_4().and(masklo);
+
+        self.masks[0].lo.shuffle(hlo).and(self.masks[0].hi.shuffle(hhi))
+    }
+
+    /// Like members1, but computes C for the first and second bytes in the
+    /// fingerprint.
+    #[inline(always)]
+    fn members2(&self, haystack_block: u8x32) -> (u8x32, u8x32) {
+        let masklo = self.vb.u8x32_splat(0xF);
+        let hlo = haystack_block.and(masklo);
+        let hhi = haystack_block.bit_shift_right_4().and(masklo);
+
+        let res0 =
+            self.masks[0].lo.shuffle(hlo).and(self.masks[0].hi.shuffle(hhi));
+        let res1 =
+            self.masks[1].lo.shuffle(hlo).and(self.masks[1].hi.shuffle(hhi));
+        (res0, res1)
+    }
+
+    /// Like `members1`, but computes `C` for the first, second and third bytes
+    /// in the fingerprint.
+    #[inline(always)]
+    fn members3(&self, haystack_block: u8x32) -> (u8x32, u8x32, u8x32) {
+        let masklo = self.vb.u8x32_splat(0xF);
+        let hlo = haystack_block.and(masklo);
+        let hhi = haystack_block.bit_shift_right_4().and(masklo);
+
+        let res0 =
+            self.masks[0].lo.shuffle(hlo).and(self.masks[0].hi.shuffle(hhi));
+        let res1 =
+            self.masks[1].lo.shuffle(hlo).and(self.masks[1].hi.shuffle(hhi));
+        let res2 =
+            self.masks[2].lo.shuffle(hlo).and(self.masks[2].hi.shuffle(hhi));
+        (res0, res1, res2)
+    }
+}
+
+/// A single mask.
+#[derive(Debug, Clone, Copy)]
+struct Mask {
+    /// Bitsets for the low nybbles in a fingerprint.
+    lo: u8x32,
+    /// Bitsets for the high nybbles in a fingerprint.
+    hi: u8x32,
+}
+
+impl Mask {
+    /// Create a new mask with no members.
+    fn new(vb: AVX2VectorBuilder) -> Mask {
+        Mask {
+            lo: vb.u8x32_splat(0),
+            hi: vb.u8x32_splat(0),
+        }
+    }
+
+    /// Adds the given byte to the given bucket.
+    fn add(&mut self, bucket: u8, byte: u8) {
+        // Split our byte into two nybbles, and add each nybble to our
+        // mask.
+        let byte_lo = (byte & 0xF) as usize;
+        let byte_hi = (byte >> 4) as usize;
+
+        let lo = self.lo.extract(byte_lo) | ((1 << bucket) as u8);
+        self.lo.replace(byte_lo, lo);
+        self.lo.replace(byte_lo + 16, lo);
+
+        let hi = self.hi.extract(byte_hi) | ((1 << bucket) as u8);
+        self.hi.replace(byte_hi, hi);
+        self.hi.replace(byte_hi + 16, hi);
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/literal/teddy_avx2/mod.rs
@@ -0,0 +1,16 @@
+pub use self::imp::*;
+
+#[cfg(all(
+    feature = "unstable",
+    regex_runtime_teddy_avx2,
+    any(target_arch = "x86_64"),
+))]
+mod imp;
+
+#[cfg(not(all(
+    feature = "unstable",
+    regex_runtime_teddy_avx2,
+    any(target_arch = "x86_64"),
+)))]
+#[path = "fallback.rs"]
+mod imp;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/literal/teddy_ssse3/fallback.rs
@@ -0,0 +1,20 @@
+use syntax::hir::literal::Literals;
+
+#[derive(Debug, Clone)]
+pub struct Teddy(());
+
+#[derive(Debug, Clone)]
+pub struct Match {
+    pub pat: usize,
+    pub start: usize,
+    pub end: usize,
+}
+
+impl Teddy {
+    pub fn available() -> bool { false }
+    pub fn new(_pats: &Literals) -> Option<Teddy> { None }
+    pub fn patterns(&self) -> &[Vec<u8>] { &[] }
+    pub fn len(&self) -> usize { 0 }
+    pub fn approximate_size(&self) -> usize { 0 }
+    pub fn find(&self, _haystack: &[u8]) -> Option<Match> { None }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/literal/teddy_ssse3/imp.rs
@@ -0,0 +1,776 @@
+/*!
+Teddy is a simd accelerated multiple substring matching algorithm. The name
+and the core ideas in the algorithm were learned from the [Hyperscan][1_u]
+project.
+
+
+Background
+----------
+
+The key idea of Teddy is to do *packed* substring matching. In the literature,
+packed substring matching is the idea of examing multiple bytes in a haystack
+at a time to detect matches. Implementations of, for example, memchr (which
+detects matches of a single byte) have been doing this for years. Only
+recently, with the introduction of various SIMD instructions, has this been
+extended to substring matching. The PCMPESTRI instruction (and its relatives),
+for example, implements substring matching in hardware. It is, however, limited
+to substrings of length 16 bytes or fewer, but this restriction is fine in a
+regex engine, since we rarely care about the performance difference between
+searching for a 16 byte literal and a 16 + N literal; 16 is already long
+enough. The key downside of the PCMPESTRI instruction, on current (2016) CPUs
+at least, is its latency and throughput. As a result, it is often faster to do
+substring search with a Boyer-Moore variant and a well placed memchr to quickly
+skip through the haystack.
+
+There are fewer results from the literature on packed substring matching,
+and even fewer for packed multiple substring matching. Ben-Kiki et al. [2]
+describes use of PCMPESTRI for substring matching, but is mostly theoretical
+and hand-waves performance. There is other theoretical work done by Bille [3]
+as well.
+
+The rest of the work in the field, as far as I'm aware, is by Faro and Kulekci
+and is generally focused on multiple pattern search. Their first paper [4a]
+introduces the concept of a fingerprint, which is computed for every block of
+N bytes in every pattern. The haystack is then scanned N bytes at a time and
+a fingerprint is computed in the same way it was computed for blocks in the
+patterns. If the fingerprint corresponds to one that was found in a pattern,
+then a verification step follows to confirm that one of the substrings with the
+corresponding fingerprint actually matches at the current location. Various
+implementation tricks are employed to make sure the fingerprint lookup is fast;
+typically by truncating the fingerprint. (This may, of course, provoke more
+steps in the verification process, so a balance must be struck.)
+
+The main downside of [4a] is that the minimum substring length is 32 bytes,
+presumably because of how the algorithm uses certain SIMD instructions. This
+essentially makes it useless for general purpose regex matching, where a small
+number of short patterns is far more likely.
+
+Faro and Kulekci published another paper [4b] that is conceptually very similar
+to [4a]. The key difference is that it uses the CRC32 instruction (introduced
+as part of SSE 4.2) to compute fingerprint values. This also enables the
+algorithm to work effectively on substrings as short as 7 bytes with 4 byte
+windows. 7 bytes is unfortunately still too long. The window could be
+technically shrunk to 2 bytes, thereby reducing minimum length to 3, but the
+small window size ends up negating most performance benefits—and it's likely
+the common case in a general purpose regex engine.
+
+Faro and Kulekci also published [4c] that appears to be intended as a
+replacement to using PCMPESTRI. In particular, it is specifically motivated by
+the high throughput/latency time of PCMPESTRI and therefore chooses other SIMD
+instructions that are faster. While this approach works for short substrings,
+I personally couldn't see a way to generalize it to multiple substring search.
+
+Faro and Kulekci have another paper [4d] that I haven't been able to read
+because it is behind a paywall.
+
+
+Teddy
+-----
+
+Finally, we get to Teddy. If the above literature review is complete, then it
+appears that Teddy is a novel algorithm. More than that, in my experience, it
+completely blows away the competition for short substrings, which is exactly
+what we want in a general purpose regex engine. Again, the algorithm appears
+to be developed by the authors of [Hyperscan][1_u]. Hyperscan was open sourced
+late 2015, and no earlier history could be found. Therefore, tracking the exact
+provenance of the algorithm with respect to the published literature seems
+difficult.
+
+DISCLAIMER: My understanding of Teddy is limited to reading auto-generated C
+code, its disassembly and observing its runtime behavior.
+
+At a high level, Teddy works somewhat similarly to the fingerprint algorithms
+published by Faro and Kulekci, but Teddy does it in a way that scales a bit
+better. Namely:
+
+1. Teddy's core algorithm scans the haystack in 16 byte chunks. 16 is
+   significant because it corresponds to the number of bytes in a SIMD vector.
+   If one used AVX2 instructions, then we could scan the haystack in 32 byte
+   chunks. Similarly, if one used AVX512 instructions, we could scan the
+   haystack in 64 byte chunks. Hyperscan implements SSE + AVX2, we only
+   implement SSE for the moment.
+2. Bitwise operations are performed on each chunk to discover if any region of
+   it matches a set of precomputed fingerprints from the patterns. If there are
+   matches, then a verification step is performed. In this implementation, our
+   verification step is naive. This can be improved upon.
+
+The details to make this work are quite clever. First, we must choose how to
+pick our fingerprints. In Hyperscan's implementation, I *believe* they use the
+last N bytes of each substring, where N must be at least the minimum length of
+any substring in the set being searched. In this implementation, we use the
+first N bytes of each substring. (The tradeoffs between these choices aren't
+yet clear to me.) We then must figure out how to quickly test whether an
+occurrence of any fingerprint from the set of patterns appears in a 16 byte
+block from the haystack. To keep things simple, let's assume N = 1 and examine
+some examples to motivate the approach. Here are our patterns:
+
+```ignore
+foo
+bar
+baz
+```
+
+The corresponding fingerprints, for N = 1, are `f`, `b` and `b`. Now let's set
+our 16 byte block to:
+
+```ignore
+bat cat foo bump
+xxxxxxxxxxxxxxxx
+```
+
+To cut to the chase, Teddy works by using bitsets. In particular, Teddy creates
+a mask that allows us to quickly compute membership of a fingerprint in a 16
+byte block that also tells which pattern the fingerprint corresponds to. In
+this case, our fingerprint is a single byte, so an appropriate abstraction is
+a map from a single byte to a list of patterns that contain that fingerprint:
+
+```ignore
+f |--> foo
+b |--> bar, baz
+```
+
+Now, all we need to do is figure out how to represent this map in vector space
+and use normal SIMD operations to perform a lookup. The first simplification
+we can make is to represent our patterns as bit fields occupying a single
+byte. This is important, because a single SIMD vector can store 16 bytes.
+
+```ignore
+f |--> 00000001
+b |--> 00000010, 00000100
+```
+
+How do we perform lookup though? It turns out that SSSE3 introduced a very cool
+instruction called PSHUFB. The instruction takes two SIMD vectors, `A` and `B`,
+and returns a third vector `C`. All vectors are treated as 16 8-bit integers.
+`C` is formed by `C[i] = A[B[i]]`. (This is a bit of a simplification, but true
+for the purposes of this algorithm. For full details, see [Intel's Intrinsics
+Guide][5_u].) This essentially lets us use the values in `B` to lookup values
+in `A`.
+
+If we could somehow cause `B` to contain our 16 byte block from the haystack,
+and if `A` could contain our bitmasks, then we'd end up with something like
+this for `A`:
+
+```ignore
+    0x00 0x01 ... 0x62      ... 0x66      ... 0xFF
+A = 0    0        00000110      00000001      0
+```
+
+And if `B` contains our window from our haystack, we could use shuffle to take
+the values from `B` and use them to look up our bitsets in `A`. But of course,
+we can't do this because `A` in the above example contains 256 bytes, which
+is much larger than the size of a SIMD vector.
+
+Nybbles to the rescue! A nybble is 4 bits. Instead of one mask to hold all of
+our bitsets, we can use two masks, where one mask corresponds to the lower four
+bits of our fingerprint and the other mask corresponds to the upper four bits.
+So our map now looks like:
+
+```ignore
+'f' & 0xF = 0x6 |--> 00000001
+'f' >> 4  = 0x6 |--> 00000111
+'b' & 0xF = 0x2 |--> 00000110
+'b' >> 4  = 0x6 |--> 00000111
+```
+
+Notice that the bitsets for each nybble correspond to the union of all
+fingerprints that contain that nybble. For example, both `f` and `b` have the
+same upper 4 bits but differ on the lower 4 bits. Putting this together, we
+have `A0`, `A1` and `B`, where `A0` is our mask for the lower nybble, `A1` is
+our mask for the upper nybble and `B` is our 16 byte block from the haystack:
+
+```ignore
+      0x00 0x01 0x02      0x03 ... 0x06      ... 0xF
+A0 =  0    0    00000110  0        00000001      0
+A1 =  0    0    0         0        00000111      0
+B  =  b    a    t         _        t             p
+B  =  0x62 0x61 0x74      0x20     0x74          0x70
+```
+
+But of course, we can't use `B` with `PSHUFB` yet, since its values are 8 bits,
+and we need indexes that are at most 4 bits (corresponding to one of 16
+values). We can apply the same transformation to split `B` into lower and upper
+nybbles as we did `A`. As before, `B0` corresponds to the lower nybbles and
+`B1` corresponds to the upper nybbles:
+
+```ignore
+     b   a   t   _   c   a   t   _   f   o   o   _   b   u   m   p
+B0 = 0x2 0x1 0x4 0x0 0x3 0x1 0x4 0x0 0x6 0xF 0xF 0x0 0x2 0x5 0xD 0x0
+B1 = 0x6 0x6 0x7 0x2 0x6 0x6 0x7 0x2 0x6 0x6 0x6 0x2 0x6 0x7 0x6 0x7
+```
+
+And now we have a nice correspondence. `B0` can index `A0` and `B1` can index
+`A1`. Here's what we get when we apply `C0 = PSHUFB(A0, B0)`:
+
+```ignore
+     b         a        ... f         o         ... p
+     A0[0x2]   A0[0x1]      A0[0x6]   A0[0xF]       A0[0x0]
+C0 = 00000110  0            00000001  0             0
+```
+
+And `C1 = PSHUFB(A1, B1)`:
+
+```ignore
+     b         a        ... f         o        ... p
+     A1[0x6]   A1[0x6]      A1[0x6]   A1[0x6]      A1[0x7]
+C1 = 00000111  00000111     00000111  00000111     0
+```
+
+Notice how neither one of `C0` or `C1` is guaranteed to report fully correct
+results all on its own. For example, `C1` claims that `b` is a fingerprint for
+the pattern `foo` (since `A1[0x6] = 00000111`), and that `o` is a fingerprint
+for all of our patterns. But if we combined `C0` and `C1` with an `AND`
+operation:
+
+```ignore
+     b         a        ... f         o        ... p
+C  = 00000110  0            00000001  0            0
+```
+
+Then we now have that `C[i]` contains a bitset corresponding to the matching
+fingerprints in a haystack's 16 byte block, where `i` is the `ith` byte in that
+block.
+
+Once we have that, we can look for the position of the least significant bit
+in `C`. That position, modulo `8`, gives us the pattern that the fingerprint
+matches. That position, integer divided by `8`, also gives us the byte offset
+that the fingerprint occurs in inside the 16 byte haystack block. Using those
+two pieces of information, we can run a verification procedure that tries
+to match all substrings containing that fingerprint at that position in the
+haystack.
+
+
+Implementation notes
+--------------------
+
+The problem with the algorithm as described above is that it uses a single byte
+for a fingerprint. This will work well if the fingerprints are rare in the
+haystack (e.g., capital letters or special characters in normal English text),
+but if the fingerprints are common, you'll wind up spending too much time in
+the verification step, which effectively negate the performance benefits of
+scanning 16 bytes at a time. Remember, the key to the performance of this
+algorithm is to do as little work as possible per 16 bytes.
+
+This algorithm can be extrapolated in a relatively straight-forward way to use
+larger fingerprints. That is, instead of a single byte prefix, we might use a
+three byte prefix. The implementation below implements N = {1, 2, 3} and always
+picks the largest N possible. The rationale is that the bigger the fingerprint,
+the fewer verification steps we'll do. Of course, if N is too large, then we'll
+end up doing too much on each step.
+
+The way to extend it is:
+
+1. Add a mask for each byte in the fingerprint. (Remember that each mask is
+   composed of two SIMD vectors.) This results in a value of `C` for each byte
+   in the fingerprint while searching.
+2. When testing each 16 byte block, each value of `C` must be shifted so that
+   they are aligned. Once aligned, they should all be `AND`'d together. This
+   will give you only the bitsets corresponding to the full match of the
+   fingerprint.
+
+The implementation below is commented to fill in the nitty gritty details.
+
+References
+----------
+
+- **[1]** [Hyperscan on GitHub](https://github.com/01org/hyperscan),
+    [webpage](https://01.org/hyperscan)
+- **[2a]** Ben-Kiki, O., Bille, P., Breslauer, D., Gasieniec, L., Grossi, R.,
+    & Weimann, O. (2011).
+    _Optimal packed string matching_.
+    In LIPIcs-Leibniz International Proceedings in Informatics (Vol. 13).
+    Schloss Dagstuhl-Leibniz-Zentrum fuer Informatik.
+    DOI: 10.4230/LIPIcs.FSTTCS.2011.423.
+    [PDF](http://drops.dagstuhl.de/opus/volltexte/2011/3355/pdf/37.pdf).
+- **[2b]** Ben-Kiki, O., Bille, P., Breslauer, D., Ga̧sieniec, L., Grossi, R.,
+    & Weimann, O. (2014).
+    _Towards optimal packed string matching_.
+    Theoretical Computer Science, 525, 111-129.
+    DOI: 10.1016/j.tcs.2013.06.013.
+    [PDF](http://www.cs.haifa.ac.il/~oren/Publications/bpsm.pdf).
+- **[3]** Bille, P. (2011).
+    _Fast searching in packed strings_.
+    Journal of Discrete Algorithms, 9(1), 49-56.
+    DOI: 10.1016/j.jda.2010.09.003.
+    [PDF](http://www.sciencedirect.com/science/article/pii/S1570866710000353).
+- **[4a]** Faro, S., & Külekci, M. O. (2012, October).
+    _Fast multiple string matching using streaming SIMD extensions technology_.
+    In String Processing and Information Retrieval (pp. 217-228).
+    Springer Berlin Heidelberg.
+    DOI: 10.1007/978-3-642-34109-0_23.
+    [PDF](http://www.dmi.unict.it/~faro/papers/conference/faro32.pdf).
+- **[4b]** Faro, S., & Külekci, M. O. (2013, September).
+    _Towards a Very Fast Multiple String Matching Algorithm for Short Patterns_.
+    In Stringology (pp. 78-91).
+    [PDF](http://www.dmi.unict.it/~faro/papers/conference/faro36.pdf).
+- **[4c]** Faro, S., & Külekci, M. O. (2013, January).
+    _Fast packed string matching for short patterns_.
+    In Proceedings of the Meeting on Algorithm Engineering & Expermiments
+    (pp. 113-121).
+    Society for Industrial and Applied Mathematics.
+    [PDF](http://arxiv.org/pdf/1209.6449.pdf).
+- **[4d]** Faro, S., & Külekci, M. O. (2014).
+    _Fast and flexible packed string matching_.
+    Journal of Discrete Algorithms, 28, 61-72.
+    DOI: 10.1016/j.jda.2014.07.003.
+
+[1_u]: https://github.com/01org/hyperscan
+[5_u]: https://software.intel.com/sites/landingpage/IntrinsicsGuide
+*/
+
+use std::cmp;
+
+use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
+use syntax::hir::literal::Literals;
+
+use vector::ssse3::{SSSE3VectorBuilder, u8x16};
+
+/// Corresponds to the number of bytes read at a time in the haystack.
+const BLOCK_SIZE: usize = 16;
+
+/// Match reports match information.
+#[derive(Debug, Clone)]
+pub struct Match {
+    /// The index of the pattern that matched. The index is in correspondence
+    /// with the order of the patterns given at construction.
+    pub pat: usize,
+    /// The start byte offset of the match.
+    pub start: usize,
+    /// The end byte offset of the match. This is always `start + pat.len()`.
+    pub end: usize,
+}
+
+/// A SIMD accelerated multi substring searcher.
+#[derive(Debug, Clone)]
+pub struct Teddy {
+    /// A builder for SSSE3 empowered vectors.
+    vb: SSSE3VectorBuilder,
+    /// A list of substrings to match.
+    pats: Vec<Vec<u8>>,
+    /// An Aho-Corasick automaton of the patterns. We use this when we need to
+    /// search pieces smaller than the Teddy block size.
+    ac: FullAcAutomaton<Vec<u8>>,
+    /// A set of 8 buckets. Each bucket corresponds to a single member of a
+    /// bitset. A bucket contains zero or more substrings. This is useful
+    /// when the number of substrings exceeds 8, since our bitsets cannot have
+    /// more than 8 members.
+    buckets: Vec<Vec<usize>>,
+    /// Our set of masks. There's one mask for each byte in the fingerprint.
+    masks: Masks,
+}
+
+impl Teddy {
+    /// Returns true if and only if Teddy is supported on this platform.
+    ///
+    /// If this returns `false`, then `Teddy::new(...)` is guaranteed to
+    /// return `None`.
+    pub fn available() -> bool {
+        SSSE3VectorBuilder::new().is_some()
+    }
+
+    /// Create a new `Teddy` multi substring matcher.
+    ///
+    /// If a `Teddy` matcher could not be created (e.g., `pats` is empty or has
+    /// an empty substring), then `None` is returned.
+    pub fn new(pats: &Literals) -> Option<Teddy> {
+        let vb = match SSSE3VectorBuilder::new() {
+            None => return None,
+            Some(vb) => vb,
+        };
+        if !Teddy::available() {
+            return None;
+        }
+
+        let pats: Vec<_> = pats.literals().iter().map(|p|p.to_vec()).collect();
+        let min_len = pats.iter().map(|p| p.len()).min().unwrap_or(0);
+        // Don't allow any empty patterns and require that we have at
+        // least one pattern.
+        if min_len < 1 {
+            return None;
+        }
+        // Pick the largest mask possible, but no larger than 3.
+        let nmasks = cmp::min(3, min_len);
+        let mut masks = Masks::new(vb, nmasks);
+        let mut buckets = vec![vec![]; 8];
+        // Assign a substring to each bucket, and add the bucket's bitfield to
+        // the appropriate position in the mask.
+        for (pati, pat) in pats.iter().enumerate() {
+            let bucket = pati % 8;
+            buckets[bucket].push(pati);
+            masks.add(bucket as u8, pat);
+        }
+        Some(Teddy {
+            vb: vb,
+            pats: pats.to_vec(),
+            ac: AcAutomaton::new(pats.to_vec()).into_full(),
+            buckets: buckets,
+            masks: masks,
+        })
+    }
+
+    /// Returns all of the substrings matched by this `Teddy`.
+    pub fn patterns(&self) -> &[Vec<u8>] {
+        &self.pats
+    }
+
+    /// Returns the number of substrings in this matcher.
+    pub fn len(&self) -> usize {
+        self.pats.len()
+    }
+
+    /// Returns the approximate size on the heap used by this matcher.
+    pub fn approximate_size(&self) -> usize {
+        self.pats.iter().fold(0, |a, b| a + b.len())
+    }
+
+    /// Searches `haystack` for the substrings in this `Teddy`. If a match was
+    /// found, then it is returned. Otherwise, `None` is returned.
+    pub fn find(&self, haystack: &[u8]) -> Option<Match> {
+        // This is safe because the only way we can construct a Teddy type
+        // is if SSSE3 is available.
+        unsafe { self.find_impl(haystack) }
+    }
+
+    #[allow(unused_attributes)]
+    #[target_feature(enable = "ssse3")]
+    unsafe fn find_impl(&self, haystack: &[u8]) -> Option<Match> {
+        // If our haystack is smaller than the block size, then fall back to
+        // a naive brute force search.
+        if haystack.is_empty() || haystack.len() < (BLOCK_SIZE + 2) {
+            return self.slow(haystack, 0);
+        }
+        match self.masks.len() {
+            0 => None,
+            1 => self.find1(haystack),
+            2 => self.find2(haystack),
+            3 => self.find3(haystack),
+            _ => unreachable!(),
+        }
+    }
+
+    /// `find1` is used when there is only 1 mask. This is the easy case and is
+    /// pretty much as described in the module documentation.
+    #[inline(always)]
+    fn find1(&self, haystack: &[u8]) -> Option<Match> {
+        let mut pos = 0;
+        let zero = self.vb.u8x16_splat(0);
+        let len = haystack.len();
+        debug_assert!(len >= BLOCK_SIZE);
+        while pos <= len - BLOCK_SIZE {
+            let h = unsafe {
+                // I tried and failed to eliminate bounds checks in safe code.
+                // This is safe because of our loop invariant: pos is always
+                // <= len-16.
+                let p = haystack.get_unchecked(pos..);
+                self.vb.u8x16_load_unchecked_unaligned(p)
+            };
+            // N.B. `res0` is our `C` in the module documentation.
+            let res0 = self.masks.members1(h);
+            // Only do expensive verification if there are any non-zero bits.
+            let bitfield = res0.ne(zero).movemask();
+            if bitfield != 0 {
+                if let Some(m) = self.verify(haystack, pos, res0, bitfield) {
+                    return Some(m);
+                }
+            }
+            pos += BLOCK_SIZE;
+        }
+        self.slow(haystack, pos)
+    }
+
+    /// `find2` is used when there are 2 masks, e.g., the fingerprint is 2 bytes
+    /// long.
+    #[inline(always)]
+    fn find2(&self, haystack: &[u8]) -> Option<Match> {
+        // This is an exotic way to right shift a SIMD vector across lanes.
+        // See below at use for more details.
+        let zero = self.vb.u8x16_splat(0);
+        let len = haystack.len();
+        // The previous value of `C` (from the module documentation) for the
+        // *first* byte in the fingerprint. On subsequent iterations, we take
+        // the last bitset from the previous `C` and insert it into the first
+        // position of the current `C`, shifting all other bitsets to the right
+        // one lane. This causes `C` for the first byte to line up with `C` for
+        // the second byte, so that they can be `AND`'d together.
+        let mut prev0 = self.vb.u8x16_splat(0xFF);
+        let mut pos = 1;
+        debug_assert!(len >= BLOCK_SIZE);
+        while pos <= len - BLOCK_SIZE {
+            let h = unsafe {
+                // I tried and failed to eliminate bounds checks in safe code.
+                // This is safe because of our loop invariant: pos is always
+                // <= len-16.
+                let p = haystack.get_unchecked(pos..);
+                self.vb.u8x16_load_unchecked_unaligned(p)
+            };
+            let (res0, res1) = self.masks.members2(h);
+
+            // Do this:
+            //
+            //     (prev0 << 15) | (res0 >> 1)
+            //
+            // This lets us line up our C values for each byte.
+            let res0prev0 = res0.alignr_15(prev0);
+
+            // `AND`'s our `C` values together.
+            let res = res0prev0.and(res1);
+            prev0 = res0;
+
+            let bitfield = res.ne(zero).movemask();
+            if bitfield != 0 {
+                let pos = pos.checked_sub(1).unwrap();
+                if let Some(m) = self.verify(haystack, pos, res, bitfield) {
+                    return Some(m);
+                }
+            }
+            pos += BLOCK_SIZE;
+        }
+        // The windowing above doesn't check the last byte in the last
+        // window, so start the slow search at the last byte of the last
+        // window.
+        self.slow(haystack, pos.checked_sub(1).unwrap())
+    }
+
+    /// `find3` is used when there are 3 masks, e.g., the fingerprint is 3 bytes
+    /// long.
+    ///
+    /// N.B. This is a straight-forward extrapolation of `find2`. The only
+    /// difference is that we need to keep track of two previous values of `C`,
+    /// since we now need to align for three bytes.
+    #[inline(always)]
+    fn find3(&self, haystack: &[u8]) -> Option<Match> {
+        let zero = self.vb.u8x16_splat(0);
+        let len = haystack.len();
+        let mut prev0 = self.vb.u8x16_splat(0xFF);
+        let mut prev1 = self.vb.u8x16_splat(0xFF);
+        let mut pos = 2;
+        while pos <= len - BLOCK_SIZE {
+            let h = unsafe {
+                // I tried and failed to eliminate bounds checks in safe code.
+                // This is safe because of our loop invariant: pos is always
+                // <= len-16.
+                let p = haystack.get_unchecked(pos..);
+                self.vb.u8x16_load_unchecked_unaligned(p)
+            };
+            let (res0, res1, res2) = self.masks.members3(h);
+
+            let res0prev0 = res0.alignr_14(prev0);
+            let res1prev1 = res1.alignr_15(prev1);
+            let res = res0prev0.and(res1prev1).and(res2);
+
+            prev0 = res0;
+            prev1 = res1;
+
+            let bitfield = res.ne(zero).movemask();
+            if bitfield != 0 {
+                let pos = pos.checked_sub(2).unwrap();
+                if let Some(m) = self.verify(haystack, pos, res, bitfield) {
+                    return Some(m);
+                }
+            }
+            pos += BLOCK_SIZE;
+        }
+        // The windowing above doesn't check the last two bytes in the last
+        // window, so start the slow search at the penultimate byte of the
+        // last window.
+        // self.slow(haystack, pos.saturating_sub(2))
+        self.slow(haystack, pos.checked_sub(2).unwrap())
+    }
+
+    /// Runs the verification procedure on `res` (i.e., `C` from the module
+    /// documentation), where the haystack block starts at `pos` in
+    /// `haystack`. `bitfield` has ones in the bit positions that `res` has
+    /// non-zero bytes.
+    ///
+    /// If a match exists, it returns the first one.
+    #[inline(always)]
+    fn verify(
+        &self,
+        haystack: &[u8],
+        pos: usize,
+        res: u8x16,
+        mut bitfield: u32,
+    ) -> Option<Match> {
+        while bitfield != 0 {
+            // The next offset, relative to pos, where some fingerprint
+            // matched.
+            let byte_pos = bitfield.trailing_zeros() as usize;
+            bitfield &= !(1 << byte_pos);
+
+            // Offset relative to the beginning of the haystack.
+            let start = pos + byte_pos;
+
+            // The bitfield telling us which patterns had fingerprints that
+            // match at this starting position.
+            let mut patterns = res.extract(byte_pos);
+            while patterns != 0 {
+                let bucket = patterns.trailing_zeros() as usize;
+                patterns &= !(1 << bucket);
+
+                // Actual substring search verification.
+                if let Some(m) = self.verify_bucket(haystack, bucket, start) {
+                    return Some(m);
+                }
+            }
+        }
+
+        None
+    }
+
+    /// Verifies whether any substring in the given bucket matches in haystack
+    /// at the given starting position.
+    #[inline(always)]
+    fn verify_bucket(
+        &self,
+        haystack: &[u8],
+        bucket: usize,
+        start: usize,
+    ) -> Option<Match> {
+        // This cycles through the patterns in the bucket in the order that
+        // the patterns were given. Therefore, we guarantee leftmost-first
+        // semantics.
+        for &pati in &self.buckets[bucket] {
+            let pat = &*self.pats[pati];
+            if start + pat.len() > haystack.len() {
+                continue;
+            }
+            if pat == &haystack[start..start + pat.len()] {
+                return Some(Match {
+                    pat: pati,
+                    start: start,
+                    end: start + pat.len(),
+                });
+            }
+        }
+        None
+    }
+
+    /// Slow substring search through all patterns in this matcher.
+    ///
+    /// This is used when we don't have enough bytes in the haystack for our
+    /// block based approach.
+    #[inline(never)]
+    fn slow(&self, haystack: &[u8], pos: usize) -> Option<Match> {
+        self.ac.find(&haystack[pos..]).next().map(|m| {
+            Match {
+                pat: m.pati,
+                start: pos + m.start,
+                end: pos + m.end,
+            }
+        })
+    }
+}
+
+/// A list of masks. This has length equal to the length of the fingerprint.
+/// The length of the fingerprint is always `min(3, len(smallest_substring))`.
+#[derive(Debug, Clone)]
+struct Masks {
+    vb: SSSE3VectorBuilder,
+    masks: [Mask; 3],
+    size: usize,
+}
+
+impl Masks {
+    /// Create a new set of masks of size `n`, where `n` corresponds to the
+    /// number of bytes in a fingerprint.
+    fn new(vb: SSSE3VectorBuilder, n: usize) -> Masks {
+        Masks {
+            vb: vb,
+            masks: [Mask::new(vb), Mask::new(vb), Mask::new(vb)],
+            size: n,
+        }
+    }
+
+    /// Returns the number of masks.
+    fn len(&self) -> usize {
+        self.size
+    }
+
+    /// Adds the given pattern to the given bucket. The bucket should be a
+    /// power of `2 <= 2^7`.
+    fn add(&mut self, bucket: u8, pat: &[u8]) {
+        for i in 0..self.len() {
+            self.masks[i].add(bucket, pat[i]);
+        }
+    }
+
+    /// Finds the fingerprints that are in the given haystack block. i.e., this
+    /// returns `C` as described in the module documentation.
+    ///
+    /// More specifically, `for i in 0..16` and `j in 0..8, C[i][j] == 1` if and
+    /// only if `haystack_block[i]` corresponds to a fingerprint that is part
+    /// of a pattern in bucket `j`.
+    #[inline(always)]
+    fn members1(&self, haystack_block: u8x16) -> u8x16 {
+        let masklo = self.vb.u8x16_splat(0xF);
+        let hlo = haystack_block.and(masklo);
+        let hhi = haystack_block.bit_shift_right_4().and(masklo);
+
+        self.masks[0].lo.shuffle(hlo).and(self.masks[0].hi.shuffle(hhi))
+    }
+
+    /// Like members1, but computes C for the first and second bytes in the
+    /// fingerprint.
+    #[inline(always)]
+    fn members2(&self, haystack_block: u8x16) -> (u8x16, u8x16) {
+        let masklo = self.vb.u8x16_splat(0xF);
+        let hlo = haystack_block.and(masklo);
+        let hhi = haystack_block.bit_shift_right_4().and(masklo);
+
+        let res0 =
+            self.masks[0].lo.shuffle(hlo).and(self.masks[0].hi.shuffle(hhi));
+        let res1 =
+            self.masks[1].lo.shuffle(hlo).and(self.masks[1].hi.shuffle(hhi));
+        (res0, res1)
+    }
+
+    /// Like `members1`, but computes `C` for the first, second and third bytes
+    /// in the fingerprint.
+    #[inline(always)]
+    fn members3(&self, haystack_block: u8x16) -> (u8x16, u8x16, u8x16) {
+        let masklo = self.vb.u8x16_splat(0xF);
+        let hlo = haystack_block.and(masklo);
+        let hhi = haystack_block.bit_shift_right_4().and(masklo);
+
+        let res0 =
+            self.masks[0].lo.shuffle(hlo).and(self.masks[0].hi.shuffle(hhi));
+        let res1 =
+            self.masks[1].lo.shuffle(hlo).and(self.masks[1].hi.shuffle(hhi));
+        let res2 =
+            self.masks[2].lo.shuffle(hlo).and(self.masks[2].hi.shuffle(hhi));
+        (res0, res1, res2)
+    }
+}
+
+/// A single mask.
+#[derive(Debug, Clone, Copy)]
+struct Mask {
+    /// Bitsets for the low nybbles in a fingerprint.
+    lo: u8x16,
+    /// Bitsets for the high nybbles in a fingerprint.
+    hi: u8x16,
+}
+
+impl Mask {
+    /// Create a new mask with no members.
+    fn new(vb: SSSE3VectorBuilder) -> Mask {
+        Mask {
+            lo: vb.u8x16_splat(0),
+            hi: vb.u8x16_splat(0),
+        }
+    }
+
+    /// Adds the given byte to the given bucket.
+    fn add(&mut self, bucket: u8, byte: u8) {
+        // Split our byte into two nybbles, and add each nybble to our
+        // mask.
+        let byte_lo = (byte & 0xF) as usize;
+        let byte_hi = (byte >> 4) as usize;
+
+        let lo = self.lo.extract(byte_lo);
+        self.lo.replace(byte_lo, ((1 << bucket) as u8) | lo);
+
+        let hi = self.hi.extract(byte_hi);
+        self.hi.replace(byte_hi, ((1 << bucket) as u8) | hi);
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/literal/teddy_ssse3/mod.rs
@@ -0,0 +1,16 @@
+pub use self::imp::*;
+
+#[cfg(all(
+    feature = "unstable",
+    regex_runtime_teddy_ssse3,
+    any(target_arch = "x86", target_arch = "x86_64"),
+))]
+mod imp;
+
+#[cfg(not(all(
+    feature = "unstable",
+    regex_runtime_teddy_ssse3,
+    any(target_arch = "x86", target_arch = "x86_64"),
+)))]
+#[path = "fallback.rs"]
+mod imp;
--- a/third_party/rust/regex/src/pikevm.rs
+++ b/third_party/rust/regex/src/pikevm.rs
@@ -104,17 +104,17 @@ impl<'r, I: Input> Fsm<'r, I> {
         cache: &ProgramCache,
         matches: &mut [bool],
         slots: &mut [Slot],
         quit_after_match: bool,
         input: I,
         start: usize,
     ) -> bool {
         let mut cache = cache.borrow_mut();
-        let mut cache = &mut cache.pikevm;
+        let cache = &mut cache.pikevm;
         cache.clist.resize(prog.len(), prog.captures.len());
         cache.nlist.resize(prog.len(), prog.captures.len());
         let at = input.at(start);
         Fsm {
             prog: prog,
             stack: &mut cache.stack,
             input: input,
         }.exec_(
@@ -336,17 +336,17 @@ impl<'r, I: Input> Fsm<'r, I> {
                     }
                     ip = inst.goto;
                 }
                 Split(ref inst) => {
                     self.stack.push(FollowEpsilon::IP(inst.goto2));
                     ip = inst.goto1;
                 }
                 Match(_) | Char(_) | Ranges(_) | Bytes(_) => {
-                    let mut t = &mut nlist.caps(ip);
+                    let t = &mut nlist.caps(ip);
                     for (slot, val) in t.iter_mut().zip(thread_caps.iter()) {
                         *slot = *val;
                     }
                     return;
                 }
             }
         }
     }
--- a/third_party/rust/regex/src/prog.rs
+++ b/third_party/rust/regex/src/prog.rs
@@ -2,19 +2,19 @@ use std::collections::HashMap;
 use std::cmp::Ordering;
 use std::fmt;
 use std::ops::Deref;
 use std::mem;
 use std::slice;
 use std::sync::Arc;
 
 use input::Char;
-use literals::LiteralSearcher;
+use literal::LiteralSearcher;
 
-/// InstPtr represents the index of an instruction in a regex program.
+/// `InstPtr` represents the index of an instruction in a regex program.
 pub type InstPtr = usize;
 
 /// Program is a sequence of instructions and various facts about thos
 /// instructions.
 #[derive(Clone)]
 pub struct Program {
     /// A sequence of instructions that represents an NFA.
     pub insts: Vec<Inst>,
@@ -182,59 +182,55 @@ impl fmt::Debug for Program {
             use std::ascii::escape_default;
             let escaped = escape_default(b).collect::<Vec<u8>>();
             String::from_utf8_lossy(&escaped).into_owned()
         }
 
         for (pc, inst) in self.iter().enumerate() {
             match *inst {
                 Match(slot) => {
-                    try!(write!(f, "{:04} Match({:?})", pc, slot))
+                    write!(f, "{:04} Match({:?})", pc, slot)?
                 }
                 Save(ref inst) => {
                     let s = format!("{:04} Save({})", pc, inst.slot);
-                    try!(write!(f, "{}", with_goto(pc, inst.goto, s)));
+                    write!(f, "{}", with_goto(pc, inst.goto, s))?;
                 }
                 Split(ref inst) => {
-                    try!(write!(f, "{:04} Split({}, {})",
-                                pc, inst.goto1, inst.goto2));
+                    write!(
+                        f, "{:04} Split({}, {})", pc, inst.goto1, inst.goto2)?;
                 }
                 EmptyLook(ref inst) => {
                     let s = format!("{:?}", inst.look);
-                    try!(write!(f, "{:04} {}",
-                                pc, with_goto(pc, inst.goto, s)));
+                    write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?;
                 }
                 Char(ref inst) => {
                     let s = format!("{:?}", inst.c);
-                    try!(write!(f, "{:04} {}",
-                                pc, with_goto(pc, inst.goto, s)));
+                    write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?;
                 }
                 Ranges(ref inst) => {
                     let ranges = inst.ranges
                         .iter()
                         .map(|r| format!("{:?}-{:?}", r.0, r.1))
                         .collect::<Vec<String>>()
                         .join(", ");
-                    let s = format!("{}", ranges);
-                    try!(write!(f, "{:04} {}",
-                                pc, with_goto(pc, inst.goto, s)));
+                    write!(
+                        f, "{:04} {}", pc, with_goto(pc, inst.goto, ranges))?;
                 }
                 Bytes(ref inst) => {
                     let s = format!(
                         "Bytes({}, {})",
                         visible_byte(inst.start),
                         visible_byte(inst.end));
-                    try!(write!(f, "{:04} {}",
-                                pc, with_goto(pc, inst.goto, s)));
+                    write!(f, "{:04} {}", pc, with_goto(pc, inst.goto, s))?;
                 }
             }
             if pc == self.start {
-                try!(write!(f, " (start)"));
+                write!(f, " (start)")?;
             }
-            try!(write!(f, "\n"));
+            write!(f, "\n")?;
         }
         Ok(())
     }
 }
 
 impl<'a> IntoIterator for &'a Program {
     type Item = &'a Inst;
     type IntoIter = slice::Iter<'a, Inst>;
@@ -315,17 +311,17 @@ pub struct InstSplit {
     /// The first instruction to try. A match resulting from following goto1
     /// has precedence over a match resulting from following goto2.
     pub goto1: InstPtr,
     /// The second instruction to try. A match resulting from following goto1
     /// has precedence over a match resulting from following goto2.
     pub goto2: InstPtr,
 }
 
-/// Representation of the EmptyLook instruction.
+/// Representation of the `EmptyLook` instruction.
 #[derive(Clone, Debug)]
 pub struct InstEmptyLook {
     /// The next location to execute in the program if this instruction
     /// succeeds.
     pub goto: InstPtr,
     /// The type of zero-width assertion to check.
     pub look: EmptyLook,
 }
--- a/third_party/rust/regex/src/re_builder.rs
+++ b/third_party/rust/regex/src/re_builder.rs
@@ -10,36 +10,40 @@
 
 /// The set of user configurable options for compiling zero or more regexes.
 #[derive(Clone, Debug)]
 #[allow(missing_docs)]
 pub struct RegexOptions {
     pub pats: Vec<String>,
     pub size_limit: usize,
     pub dfa_size_limit: usize,
+    pub nest_limit: u32,
     pub case_insensitive: bool,
     pub multi_line: bool,
     pub dot_matches_new_line: bool,
     pub swap_greed: bool,
     pub ignore_whitespace: bool,
     pub unicode: bool,
+    pub octal: bool,
 }
 
 impl Default for RegexOptions {
     fn default() -> Self {
         RegexOptions {
             pats: vec![],
             size_limit: 10 * (1<<20),
             dfa_size_limit: 2 * (1<<20),
+            nest_limit: 250,
             case_insensitive: false,
             multi_line: false,
             dot_matches_new_line: false,
             swap_greed: false,
             ignore_whitespace: false,
             unicode: true,
+            octal: false,
         }
     }
 }
 
 macro_rules! define_builder {
     ($name:ident, $regex_mod:ident, $only_utf8:expr) => {
         pub mod $name {
             use error::Error;
@@ -54,17 +58,17 @@ macro_rules! define_builder {
 /// setting the default flags (which can be overridden in the expression
 /// itself) or setting various limits.
 pub struct RegexBuilder(RegexOptions);
 
 impl RegexBuilder {
     /// Create a new regular expression builder with the given pattern.
     ///
     /// If the pattern is invalid, then an error will be returned when
-    /// `compile` is called.
+    /// `build` is called.
     pub fn new(pattern: &str) -> RegexBuilder {
         let mut builder = RegexBuilder(RegexOptions::default());
         builder.0.pats.push(pattern.to_owned());
         builder
     }
 
     /// Consume the builder and compile the regular expression.
     ///
@@ -74,22 +78,30 @@ impl RegexBuilder {
     pub fn build(&self) -> Result<Regex, Error> {
         ExecBuilder::new_options(self.0.clone())
             .only_utf8($only_utf8)
             .build()
             .map(Regex::from)
     }
 
     /// Set the value for the case insensitive (`i`) flag.
+    ///
+    /// When enabled, letters in the pattern will match both upper case and
+    /// lower case variants.
     pub fn case_insensitive(&mut self, yes: bool) -> &mut RegexBuilder {
         self.0.case_insensitive = yes;
         self
     }
 
     /// Set the value for the multi-line matching (`m`) flag.
+    ///
+    /// When enabled, `^` matches the beginning of lines and `$` matches the
+    /// end of lines.
+    ///
+    /// By default, they match beginning/end of the input.
     pub fn multi_line(&mut self, yes: bool) -> &mut RegexBuilder {
         self.0.multi_line = yes;
         self
     }
 
     /// Set the value for the any character (`s`) flag, where in `.` matches
     /// anything when `s` is set and matches anything except for new line when
     /// it is not set (the default).
@@ -98,33 +110,65 @@ impl RegexBuilder {
     /// expressions and means "any Unicode scalar value" for `regex::Regex`
     /// expressions.
     pub fn dot_matches_new_line(&mut self, yes: bool) -> &mut RegexBuilder {
         self.0.dot_matches_new_line = yes;
         self
     }
 
     /// Set the value for the greedy swap (`U`) flag.
+    ///
+    /// When enabled, a pattern like `a*` is lazy (tries to find shortest
+    /// match) and `a*?` is greedy (tries to find longest match).
+    ///
+    /// By default, `a*` is greedy and `a*?` is lazy.
     pub fn swap_greed(&mut self, yes: bool) -> &mut RegexBuilder {
         self.0.swap_greed = yes;
         self
     }
 
     /// Set the value for the ignore whitespace (`x`) flag.
+    ///
+    /// When enabled, whitespace such as new lines and spaces will be ignored
+    /// between expressions of the pattern, and `#` can be used to start a
+    /// comment until the next new line.
     pub fn ignore_whitespace(&mut self, yes: bool) -> &mut RegexBuilder {
         self.0.ignore_whitespace = yes;
         self
     }
 
     /// Set the value for the Unicode (`u`) flag.
+    ///
+    /// Enabled by default. When disabled, character classes such as `\w` only
+    /// match ASCII word characters instead of all Unicode word characters.
     pub fn unicode(&mut self, yes: bool) -> &mut RegexBuilder {
         self.0.unicode = yes;
         self
     }
 
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\0` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(&mut self, yes: bool) -> &mut RegexBuilder {
+        self.0.octal = yes;
+        self
+    }
+
     /// Set the approximate size limit of the compiled regular expression.
     ///
     /// This roughly corresponds to the number of bytes occupied by a single
     /// compiled program. If the program exceeds this number, then a
     /// compilation error is returned.
     pub fn size_limit(&mut self, limit: usize) -> &mut RegexBuilder {
         self.0.size_limit = limit;
         self
@@ -132,22 +176,52 @@ impl RegexBuilder {
 
     /// Set the approximate size of the cache used by the DFA.
     ///
     /// This roughly corresponds to the number of bytes that the DFA will
     /// use while searching.
     ///
     /// Note that this is a *per thread* limit. There is no way to set a global
     /// limit. In particular, if a regex is used from multiple threads
-    /// simulanteously, then each thread may use up to the number of bytes
+    /// simultaneously, then each thread may use up to the number of bytes
     /// specified here.
     pub fn dfa_size_limit(&mut self, limit: usize) -> &mut RegexBuilder {
         self.0.dfa_size_limit = limit;
         self
     }
+
+    /// Set the nesting limit for this parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow for consumers that do structural induction on an `Ast` using
+    /// explicit recursion. While this crate never does this (instead using
+    /// constant stack space and moving the call stack to the heap), other
+    /// crates may.
+    ///
+    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since this parser
+    /// implementation will limit itself to heap space proportional to the
+    /// lenth of the pattern string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation, which results in a nest
+    /// depth of `1`. In general, a nest limit is not something that manifests
+    /// in an obvious way in the concrete syntax, therefore, it should not be
+    /// used in a granular way.
+    pub fn nest_limit(&mut self, limit: u32) -> &mut RegexBuilder {
+        self.0.nest_limit = limit;
+        self
+    }
 }
         }
     }
 }
 
 define_builder!(bytes, re_bytes, false);
 define_builder!(unicode, re_unicode, true);
 
@@ -166,17 +240,17 @@ macro_rules! define_set_builder {
 /// by setting the default flags (which can be overridden in the expression
 /// itself) or setting various limits.
 pub struct RegexSetBuilder(RegexOptions);
 
 impl RegexSetBuilder {
     /// Create a new regular expression builder with the given pattern.
     ///
     /// If the pattern is invalid, then an error will be returned when
-    /// `compile` is called.
+    /// `build` is called.
     pub fn new<I, S>(patterns: I) -> RegexSetBuilder
             where S: AsRef<str>, I: IntoIterator<Item=S> {
         let mut builder = RegexSetBuilder(RegexOptions::default());
         for pat in patterns {
             builder.0.pats.push(pat.as_ref().to_owned());
         }
         builder
     }
@@ -226,16 +300,36 @@ impl RegexSetBuilder {
     }
 
     /// Set the value for the Unicode (`u`) flag.
     pub fn unicode(&mut self, yes: bool) -> &mut RegexSetBuilder {
         self.0.unicode = yes;
         self
     }
 
+    /// Whether to support octal syntax or not.
+    ///
+    /// Octal syntax is a little-known way of uttering Unicode codepoints in
+    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
+    /// `\141` are all equivalent regular expressions, where the last example
+    /// shows octal syntax.
+    ///
+    /// While supporting octal syntax isn't in and of itself a problem, it does
+    /// make good error messages harder. That is, in PCRE based regex engines,
+    /// syntax like `\0` invokes a backreference, which is explicitly
+    /// unsupported in Rust's regex engine. However, many users expect it to
+    /// be supported. Therefore, when octal support is disabled, the error
+    /// message will explicitly mention that backreferences aren't supported.
+    ///
+    /// Octal syntax is disabled by default.
+    pub fn octal(&mut self, yes: bool) -> &mut RegexSetBuilder {
+        self.0.octal = yes;
+        self
+    }
+
     /// Set the approximate size limit of the compiled regular expression.
     ///
     /// This roughly corresponds to the number of bytes occupied by a single
     /// compiled program. If the program exceeds this number, then a
     /// compilation error is returned.
     pub fn size_limit(&mut self, limit: usize) -> &mut RegexSetBuilder {
         self.0.size_limit = limit;
         self
@@ -249,15 +343,46 @@ impl RegexSetBuilder {
     /// Note that this is a *per thread* limit. There is no way to set a global
     /// limit. In particular, if a regex is used from multiple threads
     /// simulanteously, then each thread may use up to the number of bytes
     /// specified here.
     pub fn dfa_size_limit(&mut self, limit: usize) -> &mut RegexSetBuilder {
         self.0.dfa_size_limit = limit;
         self
     }
+
+    /// Set the nesting limit for this parser.
+    ///
+    /// The nesting limit controls how deep the abstract syntax tree is allowed
+    /// to be. If the AST exceeds the given limit (e.g., with too many nested
+    /// groups), then an error is returned by the parser.
+    ///
+    /// The purpose of this limit is to act as a heuristic to prevent stack
+    /// overflow for consumers that do structural induction on an `Ast` using
+    /// explicit recursion. While this crate never does this (instead using
+    /// constant stack space and moving the call stack to the heap), other
+    /// crates may.
+    ///
+    /// This limit is not checked until the entire Ast is parsed. Therefore,
+    /// if callers want to put a limit on the amount of heap space used, then
+    /// they should impose a limit on the length, in bytes, of the concrete
+    /// pattern string. In particular, this is viable since this parser
+    /// implementation will limit itself to heap space proportional to the
+    /// lenth of the pattern string.
+    ///
+    /// Note that a nest limit of `0` will return a nest limit error for most
+    /// patterns but not all. For example, a nest limit of `0` permits `a` but
+    /// not `ab`, since `ab` requires a concatenation, which results in a nest
+    /// depth of `1`. In general, a nest limit is not something that manifests
+    /// in an obvious way in the concrete syntax, therefore, it should not be
+    /// used in a granular way.
+    pub fn nest_limit(&mut self, limit: u32) -> &mut RegexSetBuilder {
+        self.0.nest_limit = limit;
+        self
+    }
+
 }
         }
     }
 }
 
 define_set_builder!(set_bytes, bytes, false);
 define_set_builder!(set_unicode, unicode, true);
--- a/third_party/rust/regex/src/re_bytes.rs
+++ b/third_party/rust/regex/src/re_bytes.rs
@@ -245,17 +245,17 @@ impl Regex {
     /// ```
     ///
     /// Here we name the capture groups, which we can access with the `name`
     /// method or the `Index` notation with a `&str`. Note that the named
     /// capture groups are still accessible with `get` or the `Index` notation
     /// with a `usize`.
     ///
     /// The `0`th capture group is always unnamed, so it must always be
-    /// accessed with `at(0)` or `[0]`.
+    /// accessed with `get(0)` or `[0]`.
     pub fn captures<'t>(&self, text: &'t [u8]) -> Option<Captures<'t>> {
         let mut locs = self.locations();
         self.read_captures_at(&mut locs, text, 0).map(|_| Captures {
             text: text,
             locs: locs,
             named_groups: self.0.capture_name_idx().clone(),
         })
     }
@@ -489,29 +489,30 @@ impl Regex {
     /// capturing group matches in the replacement text.
     pub fn replacen<'t, R: Replacer>(
         &self,
         text: &'t [u8],
         limit: usize,
         mut rep: R,
     ) -> Cow<'t, [u8]> {
         if let Some(rep) = rep.no_expansion() {
+            let mut it = self.find_iter(text).enumerate().peekable();
+            if it.peek().is_none() {
+                return Cow::Borrowed(text);
+            }
             let mut new = Vec::with_capacity(text.len());
             let mut last_match = 0;
-            for (i, m) in self.find_iter(text).enumerate() {
+            for (i, m) in it {
                 if limit > 0 && i >= limit {
                     break
                 }
                 new.extend_from_slice(&text[last_match..m.start()]);
                 new.extend_from_slice(&rep);
                 last_match = m.end();
             }
-            if last_match == 0 {
-                return Cow::Borrowed(text);
-            }
             new.extend_from_slice(&text[last_match..]);
             return Cow::Owned(new);
         }
 
         // The slower path, which we use if the replacement needs access to
         // capture groups.
         let mut it = self.captures_iter(text).enumerate().peekable();
         if it.peek().is_none() {
@@ -822,18 +823,18 @@ impl<'t> Captures<'t> {
     /// The first match always corresponds to the overall match of the regex.
     pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
         SubCaptureMatches {
             caps: self,
             it: self.locs.iter(),
         }
     }
 
-    /// Expands all instances of `$name` in `text` to the corresponding capture
-    /// group `name`, and writes them to the `dst` buffer given.
+    /// Expands all instances of `$name` in `replacement` to the corresponding
+    /// capture group `name`, and writes them to the `dst` buffer given.
     ///
     /// `name` may be an integer corresponding to the index of the
     /// capture group (counted by order of opening parenthesis where `0` is the
     /// entire match) or it can be a name (consisting of letters, digits or
     /// underscores) corresponding to a named capture group.
     ///
     /// If `name` isn't a valid capture group (whether the name doesn't exist
     /// or isn't a valid index), then it is replaced with the empty string.
@@ -884,17 +885,17 @@ impl<'c, 't> fmt::Debug for CapturesDebu
 
         // We'd like to show something nice here, even if it means an
         // allocation to build a reverse index.
         let slot_to_name: HashMap<&usize, &String> =
             self.0.named_groups.iter().map(|(a, b)| (b, a)).collect();
         let mut map = f.debug_map();
         for (slot, m) in self.0.locs.iter().enumerate() {
             let m = m.map(|(s, e)| escape_bytes(&self.0.text[s..e]));
-            if let Some(ref name) = slot_to_name.get(&slot) {
+            if let Some(name) = slot_to_name.get(&slot) {
                 map.entry(&name, &m);
             } else {
                 map.entry(&slot, &m);
             }
         }
         map.finish()
     }
 }
@@ -984,48 +985,88 @@ pub trait Replacer {
     /// When doing replacements, if access to `Captures` is not needed (e.g.,
     /// the replacement byte string does not need `$` expansion), then it can
     /// be beneficial to avoid finding sub-captures.
     ///
     /// In general, this is called once for every call to `replacen`.
     fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
         None
     }
+
+    /// Return a `Replacer` that borrows and wraps this `Replacer`.
+    ///
+    /// This is useful when you want to take a generic `Replacer` (which might
+    /// not be cloneable) and use it without consuming it, so it can be used
+    /// more than once.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::bytes::{Regex, Replacer};
+    ///
+    /// fn replace_all_twice<R: Replacer>(
+    ///     re: Regex,
+    ///     src: &[u8],
+    ///     mut rep: R,
+    /// ) -> Vec<u8> {
+    ///     let dst = re.replace_all(src, rep.by_ref());
+    ///     let dst = re.replace_all(&dst, rep.by_ref());
+    ///     dst.into_owned()
+    /// }
+    /// ```
+    fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
+        ReplacerRef(self)
+    }
+}
+
+/// By-reference adaptor for a `Replacer`
+///
+/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
+#[derive(Debug)]
+pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
+
+impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
+    fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
+        self.0.replace_append(caps, dst)
+    }
+    fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
+        self.0.no_expansion()
+    }
 }
 
 impl<'a> Replacer for &'a [u8] {
     fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
         caps.expand(*self, dst);
     }
 
-    fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
+    fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
         match memchr(b'$', *self) {
             Some(_) => None,
             None => Some(Cow::Borrowed(*self)),
         }
     }
 }
 
 impl<F> Replacer for F where F: FnMut(&Captures) -> Vec<u8> {
     fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
         dst.extend_from_slice(&(*self)(caps));
     }
 }
 
-/// NoExpand indicates literal byte string replacement.
+/// `NoExpand` indicates literal byte string replacement.
 ///
 /// It can be used with `replace` and `replace_all` to do a literal byte string
 /// replacement without expanding `$name` to their corresponding capture
 /// groups. This can be both convenient (to avoid escaping `$`, for example)
 /// and performant (since capture groups don't need to be found).
 ///
 /// `'t` is the lifetime of the literal text.
 pub struct NoExpand<'t>(pub &'t [u8]);
 
 impl<'t> Replacer for NoExpand<'t> {
     fn replace_append(&mut self, _: &Captures, dst: &mut Vec<u8>) {
         dst.extend_from_slice(self.0);
     }
 
-    fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, [u8]>> {
+    fn no_expansion(&mut self) -> Option<Cow<[u8]>> {
         Some(Cow::Borrowed(self.0))
     }
 }
--- a/third_party/rust/regex/src/re_set.rs
+++ b/third_party/rust/regex/src/re_set.rs
@@ -244,16 +244,20 @@ impl SetMatches {
     }
 
     /// The total number of regexes in the set that created these matches.
     pub fn len(&self) -> usize {
         self.matches.len()
     }
 
     /// Returns an iterator over indexes in the regex that matched.
+    ///
+    /// This will always produces matches in ascending order of index, where
+    /// the index corresponds to the index of the regex that matched with
+    /// respect to its position when initially building the set.
     pub fn iter(&self) -> SetMatchesIter {
         SetMatchesIter((&*self.matches).into_iter().enumerate())
     }
 }
 
 impl IntoIterator for SetMatches {
     type IntoIter = SetMatchesIntoIter;
     type Item = usize;
@@ -268,16 +272,20 @@ impl<'a> IntoIterator for &'a SetMatches
     type Item = usize;
 
     fn into_iter(self) -> Self::IntoIter {
         self.iter()
     }
 }
 
 /// An owned iterator over the set of matches from a regex set.
+///
+/// This will always produces matches in ascending order of index, where the
+/// index corresponds to the index of the regex that matched with respect to
+/// its position when initially building the set.
 pub struct SetMatchesIntoIter(iter::Enumerate<vec::IntoIter<bool>>);
 
 impl Iterator for SetMatchesIntoIter {
     type Item = usize;
 
     fn next(&mut self) -> Option<usize> {
         loop {
             match self.0.next() {
@@ -299,16 +307,20 @@ impl DoubleEndedIterator for SetMatchesI
             }
         }
     }
 }
 
 /// A borrowed iterator over the set of matches from a regex set.
 ///
 /// The lifetime `'a` refers to the lifetime of a `SetMatches` value.
+///
+/// This will always produces matches in ascending order of index, where the
+/// index corresponds to the index of the regex that matched with respect to
+/// its position when initially building the set.
 #[derive(Clone)]
 pub struct SetMatchesIter<'a>(iter::Enumerate<slice::Iter<'a, bool>>);
 
 impl<'a> Iterator for SetMatchesIter<'a> {
     type Item = usize;
 
     fn next(&mut self) -> Option<usize> {
         loop {
--- a/third_party/rust/regex/src/re_trait.rs
+++ b/third_party/rust/regex/src/re_trait.rs
@@ -32,17 +32,17 @@ impl Locations {
             _ => None,
         }
     }
 
     /// Creates an iterator of all the capture group positions in order of
     /// appearance in the regular expression. Positions are byte indices
     /// in terms of the original string matched.
     pub fn iter(&self) -> SubCapturesPosIter {
-        SubCapturesPosIter { idx: 0, locs: &self }
+        SubCapturesPosIter { idx: 0, locs: self }
     }
 
     /// Returns the total number of capturing groups.
     ///
     /// This is always at least `1` since every regex has at least `1`
     /// capturing group that corresponds to the entire match.
     pub fn len(&self) -> usize {
         self.0.len() / 2
@@ -79,17 +79,17 @@ impl<'c> Iterator for SubCapturesPosIter
                 Some(Some((s, e)))
             }
         };
         self.idx += 1;
         x
     }
 }
 
-/// RegularExpression describes types that can implement regex searching.
+/// `RegularExpression` describes types that can implement regex searching.
 ///
 /// This trait is my attempt at reducing code duplication and to standardize
 /// the internal API. Specific duplication that is avoided are the `find`
 /// and `capture` iterators, which are slightly tricky.
 ///
 /// It's not clear whether this trait is worth it, and it also isn't
 /// clear whether it's useful as a public trait or not. Methods like
 /// `next_after_empty` reak of bad design, but the rest of the methods seem
@@ -143,34 +143,34 @@ pub trait RegularExpression: Sized {
         &self,
         locs: &mut Locations,
         text: &Self::Text,
         start: usize,
     ) -> Option<(usize, usize)>;
 
     /// Returns an iterator over all non-overlapping successive leftmost-first
     /// matches.
-    fn find_iter<'t>(
+    fn find_iter (
         self,
-        text: &'t Self::Text,
-    ) -> Matches<'t, Self> {
+        text: &Self::Text,
+    ) -> Matches<Self> {
         Matches {
             re: self,
             text: text,
             last_end: 0,
             last_match: None,
         }
     }
 
     /// Returns an iterator over all non-overlapping successive leftmost-first
     /// matches with captures.
-    fn captures_iter<'t>(
+    fn captures_iter(
         self,
-        text: &'t Self::Text,
-    ) -> CaptureMatches<'t, Self> {
+        text: &Self::Text,
+    ) -> CaptureMatches<Self> {
         CaptureMatches(self.find_iter(text))
     }
 }
 
 /// An iterator over all non-overlapping successive leftmost-first matches.
 pub struct Matches<'t, R> where R: RegularExpression, R::Text: 't {
     re: R,
     text: &'t R::Text,
@@ -201,17 +201,17 @@ impl<'t, R> Iterator for Matches<'t, R>
         let (s, e) = match self.re.find_at(self.text, self.last_end) {
             None => return None,
             Some((s, e)) => (s, e),
         };
         if s == e {
             // This is an empty match. To ensure we make progress, start
             // the next search at the smallest possible starting position
             // of the next match following this one.
-            self.last_end = self.re.next_after_empty(&self.text, e);
+            self.last_end = self.re.next_after_empty(self.text, e);
             // Don't accept empty matches immediately following a match.
             // Just move on to the next match.
             if Some(e) == self.last_match {
                 return self.next();
             }
         } else {
             self.last_end = e;
         }
@@ -250,17 +250,17 @@ impl<'t, R> Iterator for CaptureMatches<
             &mut locs,
             self.0.text,
             self.0.last_end,
         ) {
             None => return None,
             Some((s, e)) => (s, e),
         };
         if s == e {
-            self.0.last_end = self.0.re.next_after_empty(&self.0.text, e);
+            self.0.last_end = self.0.re.next_after_empty(self.0.text, e);
             if Some(e) == self.0.last_match {
                 return self.next();
             }
         } else {
             self.0.last_end = e;
         }
         self.0.last_match = Some(e);
         Some(locs)
--- a/third_party/rust/regex/src/re_unicode.rs
+++ b/third_party/rust/regex/src/re_unicode.rs
@@ -17,17 +17,16 @@ use std::sync::Arc;
 
 use memchr::memchr;
 use syntax;
 
 use error::Error;
 use exec::{Exec, ExecNoSyncStr};
 use expand::expand_str;
 use re_builder::unicode::RegexBuilder;
-use re_plugin::Plugin;
 use re_trait::{self, RegularExpression, Locations, SubCapturesPosIter};
 
 /// Escapes all regular expression meta characters in `text`.
 ///
 /// The string returned may be safely used as a literal in a regular
 /// expression.
 pub fn escape(text: &str) -> String {
     syntax::escape(text)
@@ -68,16 +67,22 @@ impl<'t> Match<'t> {
         Match {
             text: haystack,
             start: start,
             end: end,
         }
     }
 }
 
+impl<'t> From<Match<'t>> for &'t str {
+    fn from(m: Match<'t>) -> &'t str {
+        m.as_str()
+    }
+}
+
 /// A compiled regular expression for matching Unicode strings.
 ///
 /// It is represented as either a sequence of bytecode instructions (dynamic)
 /// or as a specialized Rust function (native). It can be used to search, split
 /// or replace text. All searching is done with an implicit `.*?` at the
 /// beginning and end of an expression. To force an expression to match the
 /// whole string (or a prefix or a suffix), you must use an anchor like `^` or
 /// `$` (or `\A` and `\z`).
@@ -124,31 +129,17 @@ impl<'t> Match<'t> {
 ///
 /// assert!(haystack.contains(&re));
 /// assert_eq!(haystack.find(&re), Some(1));
 /// assert_eq!(haystack.match_indices(&re).collect::<Vec<_>>(),
 ///            vec![(1, 4), (5, 8)]);
 /// assert_eq!(haystack.split(&re).collect::<Vec<_>>(), vec!["a", "b", "c"]);
 /// ```
 #[derive(Clone)]
-pub struct Regex(#[doc(hidden)] pub _Regex);
-
-#[derive(Clone)]
-#[doc(hidden)]
-pub enum _Regex {
-    // The representation of `Regex` is exported to support the `regex!`
-    // syntax extension. Do not rely on it.
-    //
-    // See the comments for the `internal` module in `lib.rs` for a more
-    // detailed explanation for what `regex!` requires.
-    #[doc(hidden)]
-    Dynamic(Exec),
-    #[doc(hidden)]
-    Plugin(Plugin),
-}
+pub struct Regex(Exec);
 
 impl fmt::Display for Regex {
     /// Shows the original regular expression.
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         write!(f, "{}", self.as_str())
     }
 }
 
@@ -157,17 +148,17 @@ impl fmt::Debug for Regex {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         fmt::Display::fmt(self, f)
     }
 }
 
 #[doc(hidden)]
 impl From<Exec> for Regex {
     fn from(exec: Exec) -> Regex {
-        Regex(_Regex::Dynamic(exec))
+        Regex(exec)
     }
 }
 
 impl FromStr for Regex {
     type Err = Error;
 
     /// Attempts to parse a string into a regular expression
     fn from_str(s: &str) -> Result<Regex, Error> {
@@ -246,26 +237,17 @@ impl Regex {
     /// # fn main() {
     /// let text = "Retroactively relinquishing remunerations is reprehensible.";
     /// for mat in Regex::new(r"\b\w{13}\b").unwrap().find_iter(text) {
     ///     println!("{:?}", mat);
     /// }
     /// # }
     /// ```
     pub fn find_iter<'r, 't>(&'r self, text: &'t str) -> Matches<'r, 't> {
-        match self.0 {
-            _Regex::Dynamic(ref exec) => {
-                let it = exec.searcher_str().find_iter(text);
-                Matches(MatchesInner::Dynamic(it))
-            }
-            _Regex::Plugin(ref plug) => {
-                let it = plug.find_iter(text);
-                Matches(MatchesInner::Plugin(it))
-            }
-        }
+        Matches(self.0.searcher_str().find_iter(text))
     }
 
     /// Returns the capture groups corresponding to the leftmost-first
     /// match in `text`. Capture group `0` always corresponds to the entire
     /// match. If no match is found, then `None` is returned.
     ///
     /// You should only use `captures` if you need access to the location of
     /// capturing group matches. Otherwise, `find` is faster for discovering
@@ -320,23 +302,23 @@ impl Regex {
     /// ```
     ///
     /// Here we name the capture groups, which we can access with the `name`
     /// method or the `Index` notation with a `&str`. Note that the named
     /// capture groups are still accessible with `get` or the `Index` notation
     /// with a `usize`.
     ///
     /// The `0`th capture group is always unnamed, so it must always be
-    /// accessed with `at(0)` or `[0]`.
+    /// accessed with `get(0)` or `[0]`.
     pub fn captures<'t>(&self, text: &'t str) -> Option<Captures<'t>> {
         let mut locs = self.locations();
         self.read_captures_at(&mut locs, text, 0).map(|_| Captures {
             text: text,
             locs: locs,
-            named_groups: NamedGroups::from_regex(self)
+            named_groups: self.0.capture_name_idx().clone(),
         })
     }
 
     /// Returns an iterator over all the non-overlapping capture groups matched
     /// in `text`. This is operationally the same as `find_iter`, except it
     /// yields information about capturing group matches.
     ///
     /// # Example
@@ -359,26 +341,17 @@ impl Regex {
     /// // Movie: The Wizard of Oz, Released: 1939
     /// // Movie: M, Released: 1931
     /// # }
     /// ```
     pub fn captures_iter<'r, 't>(
         &'r self,
         text: &'t str,
     ) -> CaptureMatches<'r, 't> {
-        match self.0 {
-            _Regex::Dynamic(ref exec) => {
-                let it = exec.searcher_str().captures_iter(text);
-                CaptureMatches(CaptureMatchesInner::Dynamic(it))
-            }
-            _Regex::Plugin(ref plug) => {
-                let it = plug.captures_iter(text);
-                CaptureMatches(CaptureMatchesInner::Plugin(it))
-            }
-        }
+        CaptureMatches(self.0.searcher_str().captures_iter(text))
     }
 
     /// Returns an iterator of substrings of `text` delimited by a match of the
     /// regular expression. Namely, each element of the iterator corresponds to
     /// text that *isn't* matched by the regular expression.
     ///
     /// This method will *not* copy the text given.
     ///
@@ -572,29 +545,30 @@ impl Regex {
         // difference:
         //
         //   1) We use `find_iter` instead of `captures_iter`. Not asking for
         //      captures generally makes the regex engines faster.
         //   2) We don't need to look up all of the capture groups and do
         //      replacements inside the replacement string. We just push it
         //      at each match and be done with it.
         if let Some(rep) = rep.no_expansion() {
+            let mut it = self.find_iter(text).enumerate().peekable();
+            if it.peek().is_none() {
+                return Cow::Borrowed(text);
+            }
             let mut new = String::with_capacity(text.len());
             let mut last_match = 0;
-            for (i, m) in self.find_iter(text).enumerate() {
+            for (i, m) in it {
                 if limit > 0 && i >= limit {
                     break
                 }
                 new.push_str(&text[last_match..m.start()]);
                 new.push_str(&rep);
                 last_match = m.end();
             }
-            if last_match == 0 {
-                return Cow::Borrowed(text);
-            }
             new.push_str(&text[last_match..]);
             return Cow::Owned(new);
         }
 
         // The slower path, which we use if the replacement needs access to
         // capture groups.
         let mut it = self.captures_iter(text).enumerate().peekable();
         if it.peek().is_none() {
@@ -651,22 +625,17 @@ impl Regex {
     /// context into consideration. For example, the `\A` anchor can only
     /// match when `start == 0`.
     #[doc(hidden)]
     pub fn shortest_match_at(
         &self,
         text: &str,
         start: usize,
     ) -> Option<usize> {
-        match self.0 {
-            _Regex::Dynamic(ref exec) => {
-                exec.searcher_str().shortest_match_at(text, start)
-            }
-            _Regex::Plugin(ref plug) => plug.shortest_match_at(text, start),
-        }
+        self.0.searcher_str().shortest_match_at(text, start)
     }
 
     /// Returns the same as is_match, but starts the search at the given
     /// offset.
     ///
     /// The significance of the starting point is that it takes the surrounding
     /// context into consideration. For example, the `\A` anchor can only
     /// match when `start == 0`.
@@ -682,142 +651,103 @@ impl Regex {
     /// context into consideration. For example, the `\A` anchor can only
     /// match when `start == 0`.
     #[doc(hidden)]
     pub fn find_at<'t>(
         &self,
         text: &'t str,
         start: usize,
     ) -> Option<Match<'t>> {
-        match self.0 {
-            _Regex::Dynamic(ref exec) => {
-                exec.searcher_str().find_at(text, start).map(|(s, e)| {
-                    Match::new(text, s, e)
-                })
-            }
-            _Regex::Plugin(ref plug) => {
-                plug.find_at(text, start).map(|(s, e)| Match::new(text, s, e))
-            }
-        }
+        self.0.searcher_str().find_at(text, start).map(|(s, e)| {
+            Match::new(text, s, e)
+        })
     }
 
     /// Returns the same as captures, but starts the search at the given
     /// offset and populates the capture locations given.
     ///
     /// The significance of the starting point is that it takes the surrounding
     /// context into consideration. For example, the `\A` anchor can only
     /// match when `start == 0`.
     #[doc(hidden)]
     pub fn read_captures_at<'t>(
         &self,
         locs: &mut Locations,
         text: &'t str,
         start: usize,
     ) -> Option<Match<'t>> {
-        match self.0 {
-            _Regex::Dynamic(ref exec) => {
-                exec.searcher_str().read_captures_at(locs, text, start)
-                    .map(|(s, e)| Match::new(text, s, e))
-            }
-            _Regex::Plugin(ref plug) => {
-                plug.read_captures_at(locs, text, start)
-                    .map(|(s, e)| Match::new(text, s, e))
-            }
-        }
+        self.0
+            .searcher_str()
+            .read_captures_at(locs, text, start)
+            .map(|(s, e)| Match::new(text, s, e))
     }
 }
 
 /// Auxiliary methods.
 impl Regex {
     /// Returns the original string of this regex.
     pub fn as_str(&self) -> &str {
-        match self.0 {
-            _Regex::Dynamic(ref exec) => &exec.regex_strings()[0],
-            _Regex::Plugin(ref plug) => &plug.original,
-        }
+        &self.0.regex_strings()[0]
     }
 
     /// Returns an iterator over the capture names.
     pub fn capture_names(&self) -> CaptureNames {
-        CaptureNames(match self.0 {
-            _Regex::Plugin(ref n) => _CaptureNames::Plugin(n.names.iter()),
-            _Regex::Dynamic(ref d) => {
-                _CaptureNames::Dynamic(d.capture_names().iter())
-            }
-        })
+        CaptureNames(self.0.capture_names().iter())
     }
 
     /// Returns the number of captures.
     pub fn captures_len(&self) -> usize {
-        match self.0 {
-            _Regex::Plugin(ref n) => n.names.len(),
-            _Regex::Dynamic(ref d) => d.capture_names().len()
-        }
+        self.0.capture_names().len()
     }
 
     /// Returns an empty set of locations that can be reused in multiple calls
     /// to `read_captures`.
     #[doc(hidden)]
     pub fn locations(&self) -> Locations {
-        match self.0 {
-            _Regex::Dynamic(ref exec) => {
-                exec.searcher_str().locations()
-            }
-            _Regex::Plugin(ref plug) => plug.locations(),
-        }
+        self.0.searcher_str().locations()
     }
 }
 
 /// An iterator over the names of all possible captures.
 ///
 /// `None` indicates an unnamed capture; the first element (capture 0, the
 /// whole matched region) is always unnamed.
 ///
 /// `'r` is the lifetime of the compiled regular expression.
-pub struct CaptureNames<'r>(_CaptureNames<'r>);
-
-enum _CaptureNames<'r> {
-    Plugin(::std::slice::Iter<'r, Option<&'static str>>),
-    Dynamic(::std::slice::Iter<'r, Option<String>>)
-}
+pub struct CaptureNames<'r>(::std::slice::Iter<'r, Option<String>>);
 
 impl<'r> Iterator for CaptureNames<'r> {
     type Item = Option<&'r str>;
 
     fn next(&mut self) -> Option<Option<&'r str>> {
-        match self.0 {
-            _CaptureNames::Plugin(ref mut i) => i.next().cloned(),
-            _CaptureNames::Dynamic(ref mut i) => {
-                i.next().as_ref().map(|o| o.as_ref().map(|s| s.as_ref()))
-            }
-        }
+        self.0
+            .next()
+            .as_ref()
+            .map(|slot| slot.as_ref().map(|name| name.as_ref()))
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
-        match self.0 {
-            _CaptureNames::Plugin(ref i)  => i.size_hint(),
-            _CaptureNames::Dynamic(ref i) => i.size_hint(),
-        }
+        self.0.size_hint()
     }
 }
 
 /// Yields all substrings delimited by a regular expression match.
 ///
 /// `'r` is the lifetime of the compiled regular expression and `'t` is the
 /// lifetime of the string being split.
 pub struct Split<'r, 't> {
     finder: Matches<'r, 't>,
     last: usize,
 }
 
 impl<'r, 't> Iterator for Split<'r, 't> {
     type Item = &'t str;
 
     fn next(&mut self) -> Option<&'t str> {
-        let text = self.finder.text();
+        let text = self.finder.0.text();
         match self.finder.next() {
             None => {
                 if self.last >= text.len() {
                     None
                 } else {
                     let s = &text[self.last..];
                     self.last = text.len();
                     Some(s)
@@ -847,92 +777,39 @@ impl<'r, 't> Iterator for SplitN<'r, 't>
     type Item = &'t str;
 
     fn next(&mut self) -> Option<&'t str> {
         if self.n == 0 {
             return None
         }
         self.n -= 1;
         if self.n == 0 {
-            let text = self.splits.finder.text();
+            let text = self.splits.finder.0.text();
             Some(&text[self.splits.last..])
         } else {
             self.splits.next()
         }
     }
 }
 
-enum NamedGroups {
-    Plugin(&'static [(&'static str, usize)]),
-    Dynamic(Arc<HashMap<String, usize>>),
-}
-
-impl NamedGroups {
-    fn from_regex(regex: &Regex) -> NamedGroups {
-        match regex.0 {
-            _Regex::Plugin(ref plug) => NamedGroups::Plugin(&plug.groups),
-            _Regex::Dynamic(ref exec) => {
-                NamedGroups::Dynamic(exec.capture_name_idx().clone())
-            }
-        }
-    }
-
-    fn pos(&self, name: &str) -> Option<usize> {
-        match *self {
-            NamedGroups::Plugin(groups) => {
-                groups.binary_search_by(|&(n, _)| n.cmp(name))
-                      .ok().map(|i| groups[i].1)
-            },
-            NamedGroups::Dynamic(ref groups) => {
-                groups.get(name).map(|i| *i)
-            },
-        }
-    }
-
-    fn iter<'n>(&'n self) -> NamedGroupsIter<'n> {
-        match *self {
-            NamedGroups::Plugin(g) => NamedGroupsIter::Plugin(g.iter()),
-            NamedGroups::Dynamic(ref g) => NamedGroupsIter::Dynamic(g.iter()),
-        }
-    }
-}
-
-enum NamedGroupsIter<'n> {
-    Plugin(::std::slice::Iter<'static, (&'static str, usize)>),
-    Dynamic(::std::collections::hash_map::Iter<'n, String, usize>),
-}
-
-impl<'n> Iterator for NamedGroupsIter<'n> {
-    type Item = (&'n str, usize);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match *self {
-            NamedGroupsIter::Plugin(ref mut it) => it.next().map(|&v| v),
-            NamedGroupsIter::Dynamic(ref mut it) => {
-                it.next().map(|(s, i)| (s.as_ref(), *i))
-            }
-        }
-    }
-}
-
 /// Captures represents a group of captured strings for a single match.
 ///
 /// The 0th capture always corresponds to the entire match. Each subsequent
 /// index corresponds to the next capture group in the regex. If a capture
 /// group is named, then the matched string is *also* available via the `name`
 /// method. (Note that the 0th capture is always unnamed and so must be
 /// accessed with the `get` method.)
 ///
 /// Positions returned from a capture group are always byte indices.
 ///
 /// `'t` is the lifetime of the matched text.
 pub struct Captures<'t> {
     text: &'t str,
     locs: Locations,
-    named_groups: NamedGroups,
+    named_groups: Arc<HashMap<String, usize>>,
 }
 
 impl<'t> Captures<'t> {
     /// Returns the match associated with the capture group at index `i`. If
     /// `i` does not correspond to a capture group, or if the capture group
     /// did not participate in the match, then `None` is returned.
     ///
     /// # Examples
@@ -952,33 +829,33 @@ impl<'t> Captures<'t> {
     /// ```
     pub fn get(&self, i: usize) -> Option<Match<'t>> {
         self.locs.pos(i).map(|(s, e)| Match::new(self.text, s, e))
     }
 
     /// Returns the match for the capture group named `name`. If `name` isn't a
     /// valid capture group or didn't match anything, then `None` is returned.
     pub fn name(&self, name: &str) -> Option<Match<'t>> {
-        self.named_groups.pos(name).and_then(|i| self.get(i))
+        self.named_groups.get(name).and_then(|&i| self.get(i))
     }
 
     /// An iterator that yields all capturing matches in the order in which
     /// they appear in the regex. If a particular capture group didn't
     /// participate in the match, then `None` is yielded for that capture.
     ///
     /// The first match always corresponds to the overall match of the regex.
     pub fn iter<'c>(&'c self) -> SubCaptureMatches<'c, 't> {
         SubCaptureMatches {
             caps: self,
             it: self.locs.iter(),
         }
     }
 
-    /// Expands all instances of `$name` in `text` to the corresponding capture
-    /// group `name`, and writes them to the `dst` buffer given.
+    /// Expands all instances of `$name` in `replacement` to the corresponding
+    /// capture group `name`, and writes them to the `dst` buffer given.
     ///
     /// `name` may be an integer corresponding to the index of the
     /// capture group (counted by order of opening parenthesis where `0` is the
     /// entire match) or it can be a name (consisting of letters, digits or
     /// underscores) corresponding to a named capture group.
     ///
     /// If `name` isn't a valid capture group (whether the name doesn't exist
     /// or isn't a valid index), then it is replaced with the empty string.
@@ -1009,22 +886,22 @@ impl<'t> fmt::Debug for Captures<'t> {
 }
 
 struct CapturesDebug<'c, 't: 'c>(&'c Captures<'t>);
 
 impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> {
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         // We'd like to show something nice here, even if it means an
         // allocation to build a reverse index.
-        let slot_to_name: HashMap<usize, &str> =
+        let slot_to_name: HashMap<&usize, &String> =
             self.0.named_groups.iter().map(|(a, b)| (b, a)).collect();
         let mut map = f.debug_map();
         for (slot, m) in self.0.locs.iter().enumerate() {
             let m = m.map(|(s, e)| &self.0.text[s..e]);
-            if let Some(ref name) = slot_to_name.get(&slot) {
+            if let Some(name) = slot_to_name.get(&slot) {
                 map.entry(&name, &m);
             } else {
                 map.entry(&slot, &m);
             }
         }
         map.finish()
     }
 }
@@ -1095,83 +972,45 @@ impl<'c, 't> Iterator for SubCaptureMatc
 
 /// An iterator that yields all non-overlapping capture groups matching a
 /// particular regular expression.
 ///
 /// The iterator stops when no more matches can be found.
 ///
 /// `'r` is the lifetime of the compiled regular expression and `'t` is the
 /// lifetime of the matched string.
-pub struct CaptureMatches<'r, 't>(CaptureMatchesInner<'r, 't>);
-
-enum CaptureMatchesInner<'r, 't> {
-    Dynamic(re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>),
-    Plugin(re_trait::CaptureMatches<'t, Plugin>),
-}
+pub struct CaptureMatches<'r, 't>(re_trait::CaptureMatches<'t, ExecNoSyncStr<'r>>);
 
 impl<'r, 't> Iterator for CaptureMatches<'r, 't> {
     type Item = Captures<'t>;
 
     fn next(&mut self) -> Option<Captures<'t>> {
-        match self.0 {
-            CaptureMatchesInner::Dynamic(ref mut it) => {
-                let named = it.regex().capture_name_idx().clone();
-                it.next().map(|locs| Captures {
-                    text: it.text(),
-                    locs: locs,
-                    named_groups: NamedGroups::Dynamic(named),
-                })
-            }
-            CaptureMatchesInner::Plugin(ref mut it) => {
-                it.next().map(|locs| Captures {
-                    text: it.text(),
-                    locs: locs,
-                    named_groups: NamedGroups::Plugin(it.regex().groups),
-                })
-            }
-        }
+        self.0.next().map(|locs| Captures {
+            text: self.0.text(),
+            locs: locs,
+            named_groups: self.0.regex().capture_name_idx().clone(),
+        })
     }
 }
 
 /// An iterator over all non-overlapping matches for a particular string.
 ///
 /// The iterator yields a `Match` value. The iterator stops when no more
 /// matches can be found.
 ///
 /// `'r` is the lifetime of the compiled regular expression and `'t` is the
 /// lifetime of the matched string.
-pub struct Matches<'r, 't>(MatchesInner<'r, 't>);
-
-enum MatchesInner<'r, 't> {
-    Dynamic(re_trait::Matches<'t, ExecNoSyncStr<'r>>),
-    Plugin(re_trait::Matches<'t, Plugin>),
-}
-
-impl<'r, 't> Matches<'r, 't> {
-    fn text(&self) -> &'t str {
-        match self.0 {
-            MatchesInner::Dynamic(ref it) => it.text(),
-            MatchesInner::Plugin(ref it) => it.text(),
-        }
-    }
-}
+pub struct Matches<'r, 't>(re_trait::Matches<'t, ExecNoSyncStr<'r>>);
 
 impl<'r, 't> Iterator for Matches<'r, 't> {
     type Item = Match<'t>;
 
     fn next(&mut self) -> Option<Match<'t>> {
-        let text = self.text();
-        match self.0 {
-            MatchesInner::Dynamic(ref mut it) => {
-                it.next().map(|(s, e)| Match::new(text, s, e))
-            }
-            MatchesInner::Plugin(ref mut it) => {
-                it.next().map(|(s, e)| Match::new(text, s, e))
-            }
-        }
+        let text = self.0.text();
+        self.0.next().map(|(s, e)| Match::new(text, s, e))
     }
 }
 
 /// Replacer describes types that can be used to replace matches in a string.
 ///
 /// In general, users of this crate shouldn't need to implement this trait,
 /// since implementations are already provided for `&str` and
 /// `FnMut(&Captures) -> String`, which covers most use cases.
@@ -1190,48 +1029,88 @@ pub trait Replacer {
     /// When doing replacements, if access to `Captures` is not needed (e.g.,
     /// the replacement byte string does not need `$` expansion), then it can
     /// be beneficial to avoid finding sub-captures.
     ///
     /// In general, this is called once for every call to `replacen`.
     fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> {
         None
     }
+
+    /// Return a `Replacer` that borrows and wraps this `Replacer`.
+    ///
+    /// This is useful when you want to take a generic `Replacer` (which might
+    /// not be cloneable) and use it without consuming it, so it can be used
+    /// more than once.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// use regex::{Regex, Replacer};
+    ///
+    /// fn replace_all_twice<R: Replacer>(
+    ///     re: Regex,
+    ///     src: &str,
+    ///     mut rep: R,
+    /// ) -> String {
+    ///     let dst = re.replace_all(src, rep.by_ref());
+    ///     let dst = re.replace_all(&dst, rep.by_ref());
+    ///     dst.into_owned()
+    /// }
+    /// ```
+    fn by_ref<'r>(&'r mut self) -> ReplacerRef<'r, Self> {
+        ReplacerRef(self)
+    }
+}
+
+/// By-reference adaptor for a `Replacer`
+///
+/// Returned by [`Replacer::by_ref`](trait.Replacer.html#method.by_ref).
+#[derive(Debug)]
+pub struct ReplacerRef<'a, R: ?Sized + 'a>(&'a mut R);
+
+impl<'a, R: Replacer + ?Sized + 'a> Replacer for ReplacerRef<'a, R> {
+    fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
+        self.0.replace_append(caps, dst)
+    }
+    fn no_expansion(&mut self) -> Option<Cow<str>> {
+        self.0.no_expansion()
+    }
 }
 
 impl<'a> Replacer for &'a str {
     fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
         caps.expand(*self, dst);
     }
 
-    fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> {
+    fn no_expansion(&mut self) -> Option<Cow<str>> {
         match memchr(b'$', self.as_bytes()) {
             Some(_) => None,
             None => Some(Cow::Borrowed(*self)),
         }
     }
 }
 
 impl<F> Replacer for F where F: FnMut(&Captures) -> String {
     fn replace_append(&mut self, caps: &Captures, dst: &mut String) {
         dst.push_str(&(*self)(caps));
     }
 }
 
-/// NoExpand indicates literal string replacement.
+/// `NoExpand` indicates literal string replacement.
 ///
 /// It can be used with `replace` and `replace_all` to do a literal string
 /// replacement without expanding `$name` to their corresponding capture
 /// groups. This can be both convenient (to avoid escaping `$`, for example)
 /// and performant (since capture groups don't need to be found).
 ///
 /// `'t` is the lifetime of the literal text.
 pub struct NoExpand<'t>(pub &'t str);
 
 impl<'t> Replacer for NoExpand<'t> {
     fn replace_append(&mut self, _: &Captures, dst: &mut String) {
         dst.push_str(self.0);
     }
 
-    fn no_expansion<'r>(&'r mut self) -> Option<Cow<'r, str>> {
+    fn no_expansion(&mut self) -> Option<Cow<str>> {
         Some(Cow::Borrowed(self.0))
     }
 }
--- a/third_party/rust/regex/src/utf8.rs
+++ b/third_party/rust/regex/src/utf8.rs
@@ -33,47 +33,16 @@ pub fn next_utf8(text: &[u8], i: usize) 
     } else if b <= 0b1110_1111 {
         3
     } else {
         4
     };
     i + inc
 }
 
-/// Encode the given Unicode character to `dst` as a single UTF-8 sequence.
-///
-/// If `dst` is not long enough, then `None` is returned. Otherwise, the number
-/// of bytes written is returned.
-#[allow(dead_code)]
-#[inline]
-pub fn encode_utf8(character: char, dst: &mut [u8]) -> Option<usize> {
-    let code = character as u32;
-    if code <= 0x7F && !dst.is_empty() {
-        dst[0] = code as u8;
-        Some(1)
-    } else if code <= 0x7FF && dst.len() >= 2 {
-        dst[0] = (code >> 6 & 0x1F) as u8 | TAG_TWO;
-        dst[1] = (code & 0x3F) as u8 | TAG_CONT;
-        Some(2)
-    } else if code <= 0xFFFF && dst.len() >= 3  {
-        dst[0] = (code >> 12 & 0x0F) as u8 | TAG_THREE;
-        dst[1] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
-        dst[2] = (code & 0x3F) as u8 | TAG_CONT;
-        Some(3)
-    } else if dst.len() >= 4 {
-        dst[0] = (code >> 18 & 0x07) as u8 | TAG_FOUR;
-        dst[1] = (code >> 12 & 0x3F) as u8 | TAG_CONT;
-        dst[2] = (code >>  6 & 0x3F) as u8 | TAG_CONT;
-        dst[3] = (code & 0x3F) as u8 | TAG_CONT;
-        Some(4)
-    } else {
-        None
-    }
-}
-
 /// Decode a single UTF-8 sequence into a single Unicode codepoint from `src`.
 ///
 /// If no valid UTF-8 sequence could be found, then `None` is returned.
 /// Otherwise, the decoded codepoint and the number of bytes read is returned.
 /// The number of bytes read (for a valid UTF-8 sequence) is guaranteed to be
 /// 1, 2, 3 or 4.
 ///
 /// Note that a UTF-8 sequence is invalid if it is incorrect UTF-8, encodes a
@@ -144,18 +113,18 @@ pub fn decode_utf8(src: &[u8]) -> Option
                 0x10000 ... 0x10FFFF => char::from_u32(cp).map(|cp| (cp, 4)),
                 _ => None,
             }
         }
         _ => None,
     }
 }
 
-/// Like decode_utf8, but decodes the last UTF-8 sequence in `src` instead of
-/// the first.
+/// Like `decode_utf8`, but decodes the last UTF-8 sequence in `src` instead
+/// of the first.
 pub fn decode_last_utf8(src: &[u8]) -> Option<(char, usize)> {
     if src.is_empty() {
         return None;
     }
     let mut start = src.len() - 1;
     if src[start] <= 0x7F {
         return Some((src[start] as char, 1));
     }
@@ -179,71 +148,71 @@ fn is_start_byte(b: u8) -> bool {
 #[cfg(test)]
 mod tests {
     use std::str;
 
     use quickcheck::quickcheck;
 
     use super::{
         TAG_CONT, TAG_TWO, TAG_THREE, TAG_FOUR,
-        decode_utf8, decode_last_utf8, encode_utf8,
+        decode_utf8, decode_last_utf8,
     };
 
     #[test]
     fn prop_roundtrip() {
         fn p(given_cp: char) -> bool {
             let mut tmp = [0; 4];
-            let encoded_len = encode_utf8(given_cp, &mut tmp).unwrap();
+            let encoded_len = given_cp.encode_utf8(&mut tmp).len();
             let (got_cp, got_len) = decode_utf8(&tmp[..encoded_len]).unwrap();
             encoded_len == got_len && given_cp == got_cp
         }
         quickcheck(p as fn(char) -> bool)
     }
 
     #[test]
     fn prop_roundtrip_last() {
         fn p(given_cp: char) -> bool {
             let mut tmp = [0; 4];
-            let encoded_len = encode_utf8(given_cp, &mut tmp).unwrap();
+            let encoded_len = given_cp.encode_utf8(&mut tmp).len();
             let (got_cp, got_len) =
                 decode_last_utf8(&tmp[..encoded_len]).unwrap();
             encoded_len == got_len && given_cp == got_cp
         }
         quickcheck(p as fn(char) -> bool)
     }
 
     #[test]
     fn prop_encode_matches_std() {
         fn p(cp: char) -> bool {
             let mut got = [0; 4];
-            let n = encode_utf8(cp, &mut got).unwrap();
+            let n = cp.encode_utf8(&mut got).len();
             let expected = cp.to_string();
             &got[..n] == expected.as_bytes()
         }
         quickcheck(p as fn(char) -> bool)
     }
 
     #[test]
     fn prop_decode_matches_std() {
         fn p(given_cp: char) -> bool {
             let mut tmp = [0; 4];
-            let n = encode_utf8(given_cp, &mut tmp).unwrap();
+            let n = given_cp.encode_utf8(&mut tmp).len();
             let (got_cp, _) = decode_utf8(&tmp[..n]).unwrap();
             let expected_cp =
                 str::from_utf8(&tmp[..n]).unwrap().chars().next().unwrap();
             got_cp == expected_cp
         }
         quickcheck(p as fn(char) -> bool)
     }
 
     #[test]
     fn prop_decode_last_matches_std() {
         fn p(given_cp: char) -> bool {
             let mut tmp = [0; 4];
-            let n = encode_utf8(given_cp, &mut tmp).unwrap();
+            let n = given_cp.encode_utf8(&mut tmp).len();
             let (got_cp, _) = decode_last_utf8(&tmp[..n]).unwrap();
             let expected_cp =
                 str::from_utf8(&tmp[..n]).unwrap()
                     .chars().rev().next().unwrap();
             got_cp == expected_cp
         }
         quickcheck(p as fn(char) -> bool)
     }
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/vector/avx2.rs
@@ -0,0 +1,187 @@
+#![allow(dead_code)]
+
+use std::arch::x86_64::*;
+use std::fmt;
+
+#[derive(Clone, Copy, Debug)]
+pub struct AVX2VectorBuilder(());
+
+impl AVX2VectorBuilder {
+    pub fn new() -> Option<AVX2VectorBuilder> {
+        if is_x86_feature_detected!("avx2") {
+            Some(AVX2VectorBuilder(()))
+        } else {
+            None
+        }
+    }
+
+    /// Create a new u8x32 AVX2 vector where all of the bytes are set to
+    /// the given value.
+    #[inline]
+    pub fn u8x32_splat(self, n: u8) -> u8x32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe { u8x32::splat(n) }
+    }
+
+    /// Load 32 bytes from the given slice, with bounds checks.
+    #[inline]
+    pub fn u8x32_load_unaligned(self, slice: &[u8]) -> u8x32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe { u8x32::load_unaligned(slice) }
+    }
+
+    /// Load 32 bytes from the given slice, without bounds checks.
+    #[inline]
+    pub unsafe fn u8x32_load_unchecked_unaligned(self, slice: &[u8]) -> u8x32 {
+        // Safe because we know AVX2 is enabled, but still unsafe
+        // because we aren't doing bounds checks.
+        u8x32::load_unchecked_unaligned(slice)
+    }
+
+    /// Load 32 bytes from the given slice, with bound and alignment checks.
+    #[inline]
+    pub fn u8x32_load(self, slice: &[u8]) -> u8x32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe { u8x32::load(slice) }
+    }
+
+    /// Load 32 bytes from the given slice, without bound or alignment checks.
+    #[inline]
+    pub unsafe fn u8x32_load_unchecked(self, slice: &[u8]) -> u8x32 {
+        // Safe because we know AVX2 is enabled, but still unsafe
+        // because we aren't doing bounds checks.
+        u8x32::load_unchecked(slice)
+    }
+}
+
+#[derive(Clone, Copy)]
+#[allow(non_camel_case_types)]
+pub union u8x32 {
+    vector: __m256i,
+    bytes: [u8; 32],
+}
+
+impl u8x32 {
+    #[inline]
+    unsafe fn splat(n: u8) -> u8x32 {
+        u8x32 { vector: _mm256_set1_epi8(n as i8) }
+    }
+
+    #[inline]
+    unsafe fn load_unaligned(slice: &[u8]) -> u8x32 {
+        assert!(slice.len() >= 32);
+        u8x32::load_unchecked_unaligned(slice)
+    }
+
+    #[inline]
+    unsafe fn load_unchecked_unaligned(slice: &[u8]) -> u8x32 {
+        let p = slice.as_ptr() as *const u8 as *const __m256i;
+        u8x32 { vector: _mm256_loadu_si256(p) }
+    }
+
+    #[inline]
+    unsafe fn load(slice: &[u8]) -> u8x32 {
+        assert!(slice.len() >= 32);
+        assert!(slice.as_ptr() as usize % 32 == 0);
+        u8x32::load_unchecked(slice)
+    }
+
+    #[inline]
+    unsafe fn load_unchecked(slice: &[u8]) -> u8x32 {
+        let p = slice.as_ptr() as *const u8 as *const __m256i;
+        u8x32 { vector: _mm256_load_si256(p) }
+    }
+
+    #[inline]
+    pub fn extract(self, i: usize) -> u8 {
+        // Safe because `bytes` is always accessible.
+        unsafe { self.bytes[i] }
+    }
+
+    #[inline]
+    pub fn replace(&mut self, i: usize, byte: u8) {
+        // Safe because `bytes` is always accessible.
+        unsafe { self.bytes[i] = byte; }
+    }
+
+    #[inline]
+    pub fn shuffle(self, indices: u8x32) -> u8x32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe {
+            u8x32 { vector: _mm256_shuffle_epi8(self.vector, indices.vector) }
+        }
+    }
+
+    #[inline]
+    pub fn ne(self, other: u8x32) -> u8x32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe {
+            let boolv = _mm256_cmpeq_epi8(self.vector, other.vector);
+            let ones = _mm256_set1_epi8(0xFF as u8 as i8);
+            u8x32 { vector: _mm256_andnot_si256(boolv, ones) }
+        }
+    }
+
+    #[inline]
+    pub fn and(self, other: u8x32) -> u8x32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe {
+            u8x32 { vector: _mm256_and_si256(self.vector, other.vector) }
+        }
+    }
+
+    #[inline]
+    pub fn movemask(self) -> u32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe {
+            _mm256_movemask_epi8(self.vector) as u32
+        }
+    }
+
+    #[inline]
+    pub fn alignr_14(self, other: u8x32) -> u8x32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe {
+            // Credit goes to jneem for figuring this out:
+            // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
+            //
+            // TL;DR avx2's PALIGNR instruction is actually just two 128-bit
+            // PALIGNR instructions, which is not what we want, so we need to
+            // do some extra shuffling.
+            let v = _mm256_permute2x128_si256(other.vector, self.vector, 0x21);
+            let v = _mm256_alignr_epi8(self.vector, v, 14);
+            u8x32 { vector: v }
+        }
+    }
+
+    #[inline]
+    pub fn alignr_15(self, other: u8x32) -> u8x32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe {
+            // Credit goes to jneem for figuring this out:
+            // https://github.com/jneem/teddy/blob/9ab5e899ad6ef6911aecd3cf1033f1abe6e1f66c/src/x86/teddy_simd.rs#L145-L184
+            //
+            // TL;DR avx2's PALIGNR instruction is actually just two 128-bit
+            // PALIGNR instructions, which is not what we want, so we need to
+            // do some extra shuffling.
+            let v = _mm256_permute2x128_si256(other.vector, self.vector, 0x21);
+            let v = _mm256_alignr_epi8(self.vector, v, 15);
+            u8x32 { vector: v }
+        }
+    }
+
+    #[inline]
+    pub fn bit_shift_right_4(self) -> u8x32 {
+        // Safe because we know AVX2 is enabled.
+        unsafe {
+            u8x32 { vector: _mm256_srli_epi16(self.vector, 4) }
+        }
+    }
+}
+
+impl fmt::Debug for u8x32 {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // Safe because `bytes` is always accessible.
+        unsafe { self.bytes.fmt(f) }
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/vector/mod.rs
@@ -0,0 +1,4 @@
+#[cfg(target_arch = "x86_64")]
+pub mod avx2;
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+pub mod ssse3;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/regex/src/vector/ssse3.rs
@@ -0,0 +1,192 @@
+#![allow(dead_code)]
+
+use std::arch::x86_64::*;
+use std::fmt;
+
+/// A builder for SSSE3 empowered vectors.
+///
+/// This builder represents a receipt that the SSSE3 target feature is enabled
+/// on the currently running CPU. Namely, the only way to get a value of this
+/// type is if the SSSE3 feature is enabled.
+///
+/// This type can then be used to build vector types that use SSSE3 features
+/// safely.
+#[derive(Clone, Copy, Debug)]
+pub struct SSSE3VectorBuilder(());
+
+impl SSSE3VectorBuilder {
+    /// Create a new SSSE3 vector builder.
+    ///
+    /// If the SSSE3 feature is not enabled for the current target, then
+    /// return `None`.
+    pub fn new() -> Option<SSSE3VectorBuilder> {
+        if is_x86_feature_detected!("ssse3") {
+            Some(SSSE3VectorBuilder(()))
+        } else {
+            None
+        }
+    }
+
+    /// Create a new u8x16 SSSE3 vector where all of the bytes are set to
+    /// the given value.
+    #[inline]
+    pub fn u8x16_splat(self, n: u8) -> u8x16 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe { u8x16::splat(n) }
+    }
+
+    /// Load 16 bytes from the given slice, with bounds checks.
+    #[inline]
+    pub fn u8x16_load_unaligned(self, slice: &[u8]) -> u8x16 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe { u8x16::load_unaligned(slice) }
+    }
+
+    /// Load 16 bytes from the given slice, without bounds checks.
+    #[inline]
+    pub unsafe fn u8x16_load_unchecked_unaligned(self, slice: &[u8]) -> u8x16 {
+        // Safe because we know SSSE3 is enabled, but still unsafe
+        // because we aren't doing bounds checks.
+        u8x16::load_unchecked_unaligned(slice)
+    }
+
+    /// Load 16 bytes from the given slice, with bound and alignment checks.
+    #[inline]
+    pub fn u8x16_load(self, slice: &[u8]) -> u8x16 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe { u8x16::load(slice) }
+    }
+
+    /// Load 16 bytes from the given slice, without bound or alignment checks.
+    #[inline]
+    pub unsafe fn u8x16_load_unchecked(self, slice: &[u8]) -> u8x16 {
+        // Safe because we know SSSE3 is enabled, but still unsafe
+        // because we aren't doing bounds checks.
+        u8x16::load_unchecked(slice)
+    }
+}
+
+/// A u8x16 is a 128-bit vector with 16 single-byte lanes.
+///
+/// It provides a safe API that uses only SSE2 or SSSE3 instructions.
+/// The only way for callers to construct a value of this type is
+/// through the SSSE3VectorBuilder type, and the only way to get a
+/// SSSE3VectorBuilder is if the `ssse3` target feature is enabled.
+///
+/// Note that generally speaking, all uses of this type should get
+/// inlined, otherwise you probably have a performance bug.
+#[derive(Clone, Copy)]
+#[allow(non_camel_case_types)]
+pub union u8x16 {
+    vector: __m128i,
+    bytes: [u8; 16],
+}
+
+impl u8x16 {
+    #[inline]
+    unsafe fn splat(n: u8) -> u8x16 {
+        u8x16 { vector: _mm_set1_epi8(n as i8) }
+    }
+
+    #[inline]
+    unsafe fn load_unaligned(slice: &[u8]) -> u8x16 {
+        assert!(slice.len() >= 16);
+        u8x16::load_unchecked(slice)
+    }
+
+    #[inline]
+    unsafe fn load_unchecked_unaligned(slice: &[u8]) -> u8x16 {
+        let v = _mm_loadu_si128(slice.as_ptr() as *const u8 as *const __m128i);
+        u8x16 { vector: v }
+    }
+
+    #[inline]
+    unsafe fn load(slice: &[u8]) -> u8x16 {
+        assert!(slice.len() >= 16);
+        assert!(slice.as_ptr() as usize % 16 == 0);
+        u8x16::load_unchecked(slice)
+    }
+
+    #[inline]
+    unsafe fn load_unchecked(slice: &[u8]) -> u8x16 {
+        let v = _mm_load_si128(slice.as_ptr() as *const u8 as *const __m128i);
+        u8x16 { vector: v }
+    }
+
+    #[inline]
+    pub fn extract(self, i: usize) -> u8 {
+        // Safe because `bytes` is always accessible.
+        unsafe { self.bytes[i] }
+    }
+
+    #[inline]
+    pub fn replace(&mut self, i: usize, byte: u8) {
+        // Safe because `bytes` is always accessible.
+        unsafe { self.bytes[i] = byte; }
+    }
+
+    #[inline]
+    pub fn shuffle(self, indices: u8x16) -> u8x16 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe {
+            u8x16 { vector: _mm_shuffle_epi8(self.vector, indices.vector) }
+        }
+    }
+
+    #[inline]
+    pub fn ne(self, other: u8x16) -> u8x16 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe {
+            let boolv = _mm_cmpeq_epi8(self.vector, other.vector);
+            let ones = _mm_set1_epi8(0xFF as u8 as i8);
+            u8x16 { vector: _mm_andnot_si128(boolv, ones) }
+        }
+    }
+
+    #[inline]
+    pub fn and(self, other: u8x16) -> u8x16 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe {
+            u8x16 { vector: _mm_and_si128(self.vector, other.vector) }
+        }
+    }
+
+    #[inline]
+    pub fn movemask(self) -> u32 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe {
+            _mm_movemask_epi8(self.vector) as u32
+        }
+    }
+
+    #[inline]
+    pub fn alignr_14(self, other: u8x16) -> u8x16 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe {
+            u8x16 { vector: _mm_alignr_epi8(self.vector, other.vector, 14) }
+        }
+    }
+
+    #[inline]
+    pub fn alignr_15(self, other: u8x16) -> u8x16 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe {
+            u8x16 { vector: _mm_alignr_epi8(self.vector, other.vector, 15) }
+        }
+    }
+
+    #[inline]
+    pub fn bit_shift_right_4(self) -> u8x16 {
+        // Safe because we know SSSE3 is enabled.
+        unsafe {
+            u8x16 { vector: _mm_srli_epi16(self.vector, 4) }
+        }
+    }
+}
+
+impl fmt::Debug for u8x16 {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        // Safe because `bytes` is always accessible.
+        unsafe { self.bytes.fmt(f) }
+    }
+}
--- a/third_party/rust/regex/tests/api_str.rs
+++ b/third_party/rust/regex/tests/api_str.rs
@@ -15,8 +15,17 @@ fn empty_match_unicode_captures_iter() {
     // Same as empty_match_unicode_find_iter, but tests capture iteration.
     let re = regex!(r".*?");
     let ms: Vec<_> = re.captures_iter(text!("Ⅰ1Ⅱ2"))
                        .map(|c| c.get(0).unwrap())
                        .map(|m| (m.start(), m.end()))
                        .collect();
     assert_eq!(vec![(0, 0), (3, 3), (4, 4), (7, 7), (8, 8)], ms);
 }
+
+#[test]
+fn match_as_str() {
+    let re = regex!(r"fo+");
+    let caps = re.captures("barfoobar").unwrap();
+    assert_eq!(caps.get(0).map(|m| m.as_str()), Some("foo"));
+    assert_eq!(caps.get(0).map(From::from), Some("foo"));
+    assert_eq!(caps.get(0).map(Into::into), Some("foo"));
+}
--- a/third_party/rust/regex/tests/bytes.rs
+++ b/third_party/rust/regex/tests/bytes.rs
@@ -1,14 +1,14 @@
 // These are tests specifically crafted for regexes that can match arbitrary
 // bytes.
 
 // A silly wrapper to make it possible to write and match raw bytes.
 struct R<'a>(&'a [u8]);
-impl<'a> R<'a> { fn as_bytes(&self) -> &'a [u8] { &self.0 } }
+impl<'a> R<'a> { fn as_bytes(&self) -> &'a [u8] { self.0 } }
 
 mat!(word_boundary, r"(?-u) \b", " δ", None);
 mat!(word_boundary_unicode, r" \b", " δ", Some((0, 1)));
 mat!(word_not_boundary, r"(?-u) \B", " δ", Some((0, 1)));
 mat!(word_not_boundary_unicode, r" \B", " δ", None);
 
 mat!(perl_w_ascii, r"(?-u)\w+", "aδ", Some((0, 1)));
 mat!(perl_w_unicode, r"\w+", "aδ", Some((0, 3)));
@@ -55,8 +55,18 @@ matiter!(invalidutf8_anchor3,
          R(b"\x8d#;\x1a\xa4s3\x05foobarX\\\x0f0t\xe4\x9b\xa4"),
          (0, 0));
 
 // See https://github.com/rust-lang/regex/issues/303
 #[test]
 fn negated_full_byte_range() {
      assert!(::regex::bytes::Regex::new(r#"(?-u)[^\x00-\xff]"#).is_err());
 }
+
+matiter!(word_boundary_ascii1, r"(?-u:\B)x(?-u:\B)", "áxβ");
+matiter!(word_boundary_ascii2, r"(?-u:\B)", "0\u{7EF5E}", (2, 2), (3, 3), (4, 4), (5, 5));
+
+// See: https://github.com/rust-lang/regex/issues/264
+mat!(ascii_boundary_no_capture, r"(?-u)\B", "\u{28f3e}", Some((0, 0)));
+mat!(ascii_boundary_capture, r"(?-u)(\B)", "\u{28f3e}", Some((0, 0)));
+
+// See: https://github.com/rust-lang/regex/issues/271
+mat!(end_not_wb, r"$(?-u:\B)", "\u{5c124}\u{b576c}", Some((8, 8)));
--- a/third_party/rust/regex/tests/crazy.rs
+++ b/third_party/rust/regex/tests/crazy.rs
@@ -42,16 +42,43 @@ mat!(negclass_letters, r"[^ac]", "acx", 
 mat!(negclass_letter_comma, r"[^a,]", "a,x", Some((2, 3)));
 mat!(negclass_letter_space, r"[^a\s]", "a x", Some((2, 3)));
 mat!(negclass_comma, r"[^,]", ",,x", Some((2, 3)));
 mat!(negclass_space, r"[^\s]", " a", Some((1, 2)));
 mat!(negclass_space_comma, r"[^,\s]", ", a", Some((2, 3)));
 mat!(negclass_comma_space, r"[^\s,]", " ,a", Some((2, 3)));
 mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));
 
+// Test that repeated empty expressions don't loop forever.
+mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
+mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
+mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
+mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
+mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
+mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
+mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
+mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
+mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
+mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
+mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
+mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
+
+// Test that we handle various flavors of empty expressions.
+matiter!(match_empty1, r"", "", (0, 0));
+matiter!(match_empty2, r"", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty3, r"()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty4, r"()*", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty5, r"()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty6, r"()?", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty7, r"()()", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty8, r"()+|z", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty9, r"z|()+", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty10, r"()+|b", "abc", (0, 0), (1, 1), (2, 2), (3, 3));
+matiter!(match_empty11, r"b|()+", "abc", (0, 0), (1, 2), (3, 3));
+
 // Test that the DFA can handle pathological cases.
 // (This should result in the DFA's cache being flushed too frequently, which
 // should cause it to quit and fall back to the NFA algorithm.)
 #[test]
 fn dfa_handles_pathological_case() {
     fn ones_and_zeroes(count: usize) -> String {
         use rand::{Rng, thread_rng};
 
@@ -71,8 +98,304 @@ fn dfa_handles_pathological_case() {
     let text = {
         let mut pieces = ones_and_zeroes(100_000);
         pieces.push('1');
         pieces.push_str(&ones_and_zeroes(20));
         pieces
     };
     assert!(re.is_match(text!(&*text)));
 }
+
+#[test]
+fn nest_limit_makes_it_parse() {
+    use regex::RegexBuilder;
+
+    RegexBuilder::new(
+        r#"
+        2(?:
+          [45]\d{3}|
+          7(?:
+            1[0-267]|
+            2[0-289]|
+            3[0-29]|
+            4[01]|
+            5[1-3]|
+            6[013]|
+            7[0178]|
+            91
+          )|
+          8(?:
+            0[125]|
+            [139][1-6]|
+            2[0157-9]|
+            41|
+            6[1-35]|
+            7[1-5]|
+            8[1-8]|
+            90
+          )|
+          9(?:
+            0[0-2]|
+            1[0-4]|
+            2[568]|
+            3[3-6]|
+            5[5-7]|
+            6[0167]|
+            7[15]|
+            8[0146-9]
+          )
+        )\d{4}|
+        3(?:
+          12?[5-7]\d{2}|
+          0(?:
+            2(?:
+              [025-79]\d|
+              [348]\d{1,2}
+            )|
+            3(?:
+              [2-4]\d|
+              [56]\d?
+            )
+          )|
+          2(?:
+            1\d{2}|
+            2(?:
+              [12]\d|
+              [35]\d{1,2}|
+              4\d?
+            )
+          )|
+          3(?:
+            1\d{2}|
+            2(?:
+              [2356]\d|
+              4\d{1,2}
+            )
+          )|
+          4(?:
+            1\d{2}|
+            2(?:
+              2\d{1,2}|
+              [47]|
+              5\d{2}
+            )
+          )|
+          5(?:
+            1\d{2}|
+            29
+          )|
+          [67]1\d{2}|
+          8(?:
+            1\d{2}|
+            2(?:
+              2\d{2}|
+              3|
+              4\d
+            )
+          )
+        )\d{3}|
+        4(?:
+          0(?:
+            2(?:
+              [09]\d|
+              7
+            )|
+            33\d{2}
+          )|
+          1\d{3}|
+          2(?:
+            1\d{2}|
+            2(?:
+              [25]\d?|
+              [348]\d|
+              [67]\d{1,2}
+            )
+          )|
+          3(?:
+            1\d{2}(?:
+              \d{2}
+            )?|
+            2(?:
+              [045]\d|
+              [236-9]\d{1,2}
+            )|
+            32\d{2}
+          )|
+          4(?:
+            [18]\d{2}|
+            2(?:
+              [2-46]\d{2}|
+              3
+            )|
+            5[25]\d{2}
+          )|
+          5(?:
+            1\d{2}|
+            2(?:
+              3\d|
+              5
+            )
+          )|
+          6(?:
+            [18]\d{2}|
+            2(?:
+              3(?:
+                \d{2}
+              )?|
+              [46]\d{1,2}|
+              5\d{2}|
+              7\d
+            )|
+            5(?:
+              3\d?|
+              4\d|
+              [57]\d{1,2}|
+              6\d{2}|
+              8
+            )
+          )|
+          71\d{2}|
+          8(?:
+            [18]\d{2}|
+            23\d{2}|
+            54\d{2}
+          )|
+          9(?:
+            [18]\d{2}|
+            2[2-5]\d{2}|
+            53\d{1,2}
+          )
+        )\d{3}|
+        5(?:
+          02[03489]\d{2}|
+          1\d{2}|
+          2(?:
+            1\d{2}|
+            2(?:
+              2(?:
+                \d{2}
+              )?|
+              [457]\d{2}
+            )
+          )|
+          3(?:
+            1\d{2}|
+            2(?:
+              [37](?:
+                \d{2}
+              )?|
+              [569]\d{2}
+            )
+          )|
+          4(?:
+            1\d{2}|
+            2[46]\d{2}
+          )|
+          5(?:
+            1\d{2}|
+            26\d{1,2}
+          )|
+          6(?:
+            [18]\d{2}|
+            2|
+            53\d{2}
+          )|
+          7(?:
+            1|
+            24
+          )\d{2}|
+          8(?:
+            1|
+            26
+          )\d{2}|
+          91\d{2}
+        )\d{3}|
+        6(?:
+          0(?:
+            1\d{2}|
+            2(?:
+              3\d{2}|
+              4\d{1,2}
+            )
+          )|
+          2(?:
+            2[2-5]\d{2}|
+            5(?:
+              [3-5]\d{2}|
+              7
+            )|
+            8\d{2}
+          )|
+          3(?:
+            1|
+            2[3478]
+          )\d{2}|
+          4(?:
+            1|
+            2[34]
+          )\d{2}|
+          5(?:
+            1|
+            2[47]
+          )\d{2}|
+          6(?:
+            [18]\d{2}|
+            6(?:
+              2(?:
+                2\d|
+                [34]\d{2}
+              )|
+              5(?:
+                [24]\d{2}|
+                3\d|
+                5\d{1,2}
+              )
+            )
+          )|
+          72[2-5]\d{2}|
+          8(?:
+            1\d{2}|
+            2[2-5]\d{2}
+          )|
+          9(?:
+            1\d{2}|
+            2[2-6]\d{2}
+          )
+        )\d{3}|
+        7(?:
+          (?:
+            02|
+            [3-589]1|
+            6[12]|
+            72[24]
+          )\d{2}|
+          21\d{3}|
+          32
+        )\d{3}|
+        8(?:
+          (?:
+            4[12]|
+            [5-7]2|
+            1\d?
+          )|
+          (?:
+            0|
+            3[12]|
+            [5-7]1|
+            217
+          )\d
+        )\d{4}|
+        9(?:
+          [35]1|
+          (?:
+            [024]2|
+            81
+          )\d|
+          (?:
+            1|
+            [24]1
+          )\d{2}
+        )\d{3}
+        "#
+    )
+    .build()
+    .unwrap();
+}
--- a/third_party/rust/regex/tests/macros.rs
+++ b/third_party/rust/regex/tests/macros.rs
@@ -9,17 +9,17 @@ macro_rules! findall {
 
 // Macros for automatically producing tests.
 
 macro_rules! ismatch {
     ($name:ident, $re:expr, $text:expr, $ismatch:expr) => {
         #[test]
         fn $name() {
             let re = regex!($re);
-            assert!($ismatch == re.is_match(text!($text)));
+            assert_eq!($ismatch, re.is_match(text!($text)));
         }
     };
 }
 
 macro_rules! mat(
     ($name:ident, $re:expr, $text:expr, $($loc:tt)+) => (
         #[test]
         fn $name() {
--- a/third_party/rust/regex/tests/macros_bytes.rs
+++ b/third_party/rust/regex/tests/macros_bytes.rs
@@ -1,17 +1,14 @@
 // Macros for use in writing tests generic over &str/&[u8].
 macro_rules! text { ($text:expr) => { $text.as_bytes() } }
 macro_rules! t { ($re:expr) => { text!($re) } }
 macro_rules! match_text { ($text:expr) => { $text.as_bytes() } }
 
 macro_rules! bytes { ($text:expr) => { $text } }
-macro_rules! b { ($text:expr) => { bytes!($text) } }
-
-// macro_rules! u { ($re:expr) => { concat!("(?u)", $re) } }
 
 macro_rules! no_expand {
     ($text:expr) => {{
         use regex::bytes::NoExpand;
         NoExpand(text!($text))
     }}
 }
 
--- a/third_party/rust/regex/tests/macros_str.rs
+++ b/third_party/rust/regex/tests/macros_str.rs
@@ -1,18 +1,13 @@
 // Macros for use in writing tests generic over &str/&[u8].
 macro_rules! text { ($text:expr) => { $text } }
 macro_rules! t { ($text:expr) => { text!($text) } }
 macro_rules! match_text { ($text:expr) => { $text.as_str() } }
 
-macro_rules! bytes { ($text:expr) => { $text.as_bytes() } }
-macro_rules! b { ($text:expr) => { bytes!($text) } }
-
-// macro_rules! u { ($re:expr) => { $re } }
-
 macro_rules! no_expand {
     ($text:expr) => {{
         use regex::NoExpand;
         NoExpand(text!($text))
     }}
 }
 
 macro_rules! show { ($text:expr) => { $text } }
--- a/third_party/rust/regex/tests/noparse.rs
+++ b/third_party/rust/regex/tests/noparse.rs
@@ -6,43 +6,45 @@ macro_rules! noparse(
             match regex_new!(re) {
                 Err(_) => {},
                 Ok(_) => panic!("Regex '{}' should cause a parse error.", re),
             }
         }
     );
 );
 
-noparse!(fail_double_repeat, "a**");
 noparse!(fail_no_repeat_arg, "*");
 noparse!(fail_incomplete_escape, "\\");
 noparse!(fail_class_incomplete, "[A-");
 noparse!(fail_class_not_closed, "[A");
 noparse!(fail_class_no_begin, r"[\A]");
 noparse!(fail_class_no_end, r"[\z]");
 noparse!(fail_class_no_boundary, r"[\b]");
 noparse!(fail_open_paren, "(");
 noparse!(fail_close_paren, ")");
 noparse!(fail_invalid_range, "[a-Z]");
 noparse!(fail_empty_capture_name, "(?P<>a)");
-noparse!(fail_empty_capture_exp, "(?P<name>)");
 noparse!(fail_bad_capture_name, "(?P<na-me>)");
 noparse!(fail_bad_flag, "(?a)a");
-noparse!(fail_empty_alt_before, "|a");
-noparse!(fail_empty_alt_after, "a|");
 noparse!(fail_too_big, "a{10000000}");
 noparse!(fail_counted_no_close, "a{1001");
 noparse!(fail_unfinished_cap, "(?");
 noparse!(fail_unfinished_escape, "\\");
 noparse!(fail_octal_digit, r"\8");
 noparse!(fail_hex_digit, r"\xG0");
 noparse!(fail_hex_short, r"\xF");
 noparse!(fail_hex_long_digits, r"\x{fffg}");
 noparse!(fail_flag_bad, "(?a)");
 noparse!(fail_flag_empty, "(?)");
 noparse!(fail_double_neg, "(?-i-i)");
 noparse!(fail_neg_empty, "(?i-)");
-noparse!(fail_empty_group, "()");
 noparse!(fail_dupe_named, "(?P<a>.)(?P<a>.)");
 noparse!(fail_range_end_no_class, "[a-[:lower:]]");
 noparse!(fail_range_end_no_begin, r"[a-\A]");
 noparse!(fail_range_end_no_end, r"[a-\z]");
 noparse!(fail_range_end_no_boundary, r"[a-\b]");
+noparse!(fail_empty_alt1, r"|z");
+noparse!(fail_empty_alt2, r"z|");
+noparse!(fail_empty_alt3, r"|");
+noparse!(fail_empty_alt4, r"||");
+noparse!(fail_empty_alt5, r"()|z");
+noparse!(fail_empty_alt6, r"z|()");
+noparse!(fail_empty_alt7, r"(|)");
--- a/third_party/rust/regex/tests/regression.rs
+++ b/third_party/rust/regex/tests/regression.rs
@@ -56,37 +56,39 @@ split!(split_on_word_boundary, r"\b", r"
        &[t!(""), t!("Should"), t!(" "), t!("this"),
          t!(" ("), t!("work"), t!("?)")]);
 matiter!(word_boundary_dfa, r"\b", "a b c",
          (0, 0), (1, 1), (2, 2), (3, 3), (4, 4), (5, 5));
 
 // See: https://github.com/rust-lang/regex/issues/268
 matiter!(partial_anchor, r"^a|b", "ba", (0, 1));
 
-// See: https://github.com/rust-lang/regex/issues/264
-mat!(ascii_boundary_no_capture, r"(?-u)\B", "\u{28f3e}", Some((0, 0)));
-mat!(ascii_boundary_capture, r"(?-u)(\B)", "\u{28f3e}", Some((0, 0)));
-
 // See: https://github.com/rust-lang/regex/issues/280
 ismatch!(partial_anchor_alternate_begin, r"^a|z", "yyyyya", false);
 ismatch!(partial_anchor_alternate_end, r"a$|z", "ayyyyy", false);
 
 // See: https://github.com/rust-lang/regex/issues/289
 mat!(lits_unambiguous1, r"(ABC|CDA|BC)X", "CDAX", Some((0, 4)));
 
 // See: https://github.com/rust-lang/regex/issues/291
 mat!(lits_unambiguous2, r"((IMG|CAM|MG|MB2)_|(DSCN|CIMG))(?P<n>[0-9]+)$",
      "CIMG2341", Some((0, 8)), Some((0, 4)), None, Some((0, 4)), Some((4, 8)));
 
 // See: https://github.com/rust-lang/regex/issues/271
-mat!(end_not_wb, r"$(?-u:\B)", "\u{5c124}\u{b576c}", Some((8, 8)));
 mat!(endl_or_wb, r"(?m:$)|(?-u:\b)", "\u{6084e}", Some((4, 4)));
 mat!(zero_or_end, r"(?i-u:\x00)|$", "\u{e682f}", Some((4, 4)));
 mat!(y_or_endl, r"(?i-u:y)|(?m:$)", "\u{b4331}", Some((4, 4)));
 mat!(wb_start_x, r"(?u:\b)^(?-u:X)", "X", Some((0, 1)));
 
 // See: https://github.com/rust-lang/regex/issues/321
 ismatch!(strange_anchor_non_complete_prefix, r"a^{2}", "", false);
 ismatch!(strange_anchor_non_complete_suffix, r"${2}a", "", false);
 
 // See: https://github.com/rust-lang/regex/issues/334
 mat!(captures_after_dfa_premature_end, r"a(b*(X|$))?", "abcbX",
      Some((0, 1)), None, None);
+
+// See: https://github.com/rust-lang/regex/issues/437
+ismatch!(
+    literal_panic,
+    r"typename type\-parameter\-\d+\-\d+::.+",
+    "test",
+    false);
--- a/third_party/rust/regex/tests/replace.rs
+++ b/third_party/rust/regex/tests/replace.rs
@@ -31,8 +31,14 @@ replace!(literal_dollar2, replace_all,
          r"(\w+) (\w+)", "a b", t!("$2 $$c $1"), "b $c a");
 replace!(no_expand1, replace,
          r"(\S+)\s+(\S+)", "w1 w2", no_expand!("$2 $1"), "$2 $1");
 replace!(no_expand2, replace,
          r"(\S+)\s+(\S+)", "w1 w2", no_expand!("$$1"), "$$1");
 
 // See https://github.com/rust-lang/regex/issues/314
 replace!(match_at_start_replace_with_empty, replace_all, r"foo", "foobar", t!(""), "bar");
+
+// See https://github.com/rust-lang/regex/issues/393
+replace!(single_empty_match, replace, r"^", "bar", t!("foo"), "foobar");
+
+// See https://github.com/rust-lang/regex/issues/399
+replace!(capture_longest_possible_name, replace_all, r"(.)", "b", t!("${1}a $1a"), "ba ");
--- a/third_party/rust/regex/tests/test_default.rs
+++ b/third_party/rust/regex/tests/test_default.rs
@@ -70,8 +70,18 @@ mod word_boundary_unicode;
 
 #[test]
 fn disallow_non_utf8() {
     assert!(regex::Regex::new(r"(?-u)\xFF").is_err());
     assert!(regex::Regex::new(r"(?-u).").is_err());
     assert!(regex::Regex::new(r"(?-u)[\xFF]").is_err());
     assert!(regex::Regex::new(r"(?-u)☃").is_err());
 }
+
+#[test]
+fn disallow_octal() {
+    assert!(regex::Regex::new(r"\0").is_err());
+}
+
+#[test]
+fn allow_octal() {
+    assert!(regex::RegexBuilder::new(r"\0").octal(true).build().is_ok());
+}
--- a/third_party/rust/regex/tests/test_default_bytes.rs
+++ b/third_party/rust/regex/tests/test_default_bytes.rs
@@ -38,17 +38,17 @@ macro_rules! regex_set {
 }
 
 // Must come before other module definitions.
 include!("macros_bytes.rs");
 include!("macros.rs");
 
 // A silly wrapper to make it possible to write and match raw bytes.
 struct R<'a>(&'a [u8]);
-impl<'a> R<'a> { fn as_bytes(&self) -> &'a [u8] { &self.0 } }
+impl<'a> R<'a> { fn as_bytes(&self) -> &'a [u8] { self.0 } }
 
 // See: https://github.com/rust-lang/regex/issues/321
 //
 // These tests are here because they do not have the same behavior in every
 // regex engine.
 mat!(invalid_utf8_nfa1, r".", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), Some((2, 3)));
 mat!(invalid_utf8_nfa2, r"${2}ä", R(b"\xD4\xC2\x65\x2B\x0E\xFE"), None);
 mat!(invalid_utf8_nfa3, r".", R(b"\x0A\xDB\x82\x6E\x33\x01\xDD\x33\xCD"),
--- a/third_party/rust/regex/tests/unicode.rs
+++ b/third_party/rust/regex/tests/unicode.rs
@@ -24,8 +24,87 @@ mat!(uni_perl_s, r"\s+", " ", Some((0, 3)));
 mat!(uni_perl_s_not, r"\s+", "☃", None);
 mat!(uni_perl_s_neg, r"\S+", "☃", Some((0, 3)));
 
 // And do the same for word boundaries.
 mat!(uni_boundary_none, r"\d\b", "6δ", None);
 mat!(uni_boundary_ogham, r"\d\b", "6 ", Some((0, 1)));
 mat!(uni_not_boundary_none, r"\d\B", "6δ", Some((0, 1)));
 mat!(uni_not_boundary_ogham, r"\d\B", "6 ", None);
+
+// Test general categories.
+//
+// We should test more, but there's a lot. Write a script to generate more of
+// these tests.
+mat!(uni_class_gencat_cased_letter,
+     r"\p{Cased_Letter}", "Ａ", Some((0, 3)));
+mat!(uni_class_gencat_close_punctuation,
+     r"\p{Close_Punctuation}", "❯", Some((0, 3)));
+mat!(uni_class_gencat_connector_punctuation,
+     r"\p{Connector_Punctuation}", "⁀", Some((0, 3)));
+mat!(uni_class_gencat_control,
+     r"\p{Control}", "\u{9f}", Some((0, 2)));
+mat!(uni_class_gencat_currency_symbol,
+     r"\p{Currency_Symbol}", "￡", Some((0, 3)));
+mat!(uni_class_gencat_dash_punctuation,
+     r"\p{Dash_Punctuation}", "〰", Some((0, 3)));
+mat!(uni_class_gencat_decimal_numer,
+     r"\p{Decimal_Number}", "𑓙", Some((0, 4)));
+mat!(uni_class_gencat_enclosing_mark,
+     r"\p{Enclosing_Mark}", "\u{A672}", Some((0, 3)));
+mat!(uni_class_gencat_final_punctuation,
+     r"\p{Final_Punctuation}", "⸡", Some((0, 3)));
+mat!(uni_class_gencat_format,
+     r"\p{Format}", "\u{E007F}", Some((0, 4)));
+mat!(uni_class_gencat_initial_punctuation,
+     r"\p{Initial_Punctuation}", "⸜", Some((0, 3)));
+mat!(uni_class_gencat_letter,
+     r"\p{Letter}", "Έ", Some((0, 2)));
+mat!(uni_class_gencat_letter_number,
+     r"\p{Letter_Number}", "ↂ", Some((0, 3)));
+mat!(uni_class_gencat_line_separator,
+     r"\p{Line_Separator}", "\u{2028}", Some((0, 3)));
+mat!(uni_class_gencat_lowercase_letter,
+     r"\p{Lowercase_Letter}", "ϛ", Some((0, 2)));
+mat!(uni_class_gencat_mark,
+     r"\p{Mark}", "\u{E01EF}", Some((0, 4)));
+mat!(uni_class_gencat_math,
+     r"\p{Math}", "⋿", Some((0, 3)));
+mat!(uni_class_gencat_modifier_letter,
+     r"\p{Modifier_Letter}", "𖭃", Some((0, 4)));
+mat!(uni_class_gencat_modifier_symbol,
+     r"\p{Modifier_Symbol}", "🏿", Some((0, 4)));
+mat!(uni_class_gencat_nonspacing_mark,
+     r"\p{Nonspacing_Mark}", "\u{1E94A}", Some((0, 4)));
+mat!(uni_class_gencat_number,
+     r"\p{Number}", "⓿", Some((0, 3)));
+mat!(uni_class_gencat_open_punctuation,
+     r"\p{Open_Punctuation}", "｟", Some((0, 3)));
+mat!(uni_class_gencat_other,
+     r"\p{Other}", "\u{bc9}", Some((0, 3)));
+mat!(uni_class_gencat_other_letter,
+     r"\p{Other_Letter}", "ꓷ", Some((0, 3)));
+mat!(uni_class_gencat_other_number,
+     r"\p{Other_Number}", "㉏", Some((0, 3)));
+mat!(uni_class_gencat_other_punctuation,
+     r"\p{Other_Punctuation}", "𞥞", Some((0, 4)));
+mat!(uni_class_gencat_other_symbol,
+     r"\p{Other_Symbol}", "⅌", Some((0, 3)));
+mat!(uni_class_gencat_paragraph_separator,
+     r"\p{Paragraph_Separator}", "\u{2029}", Some((0, 3)));
+mat!(uni_class_gencat_private_use,
+     r"\p{Private_Use}", "\u{10FFFD}", Some((0, 4)));
+mat!(uni_class_gencat_punctuation,
+     r"\p{Punctuation}", "𑁍", Some((0, 4)));
+mat!(uni_class_gencat_separator,
+     r"\p{Separator}", "\u{3000}", Some((0, 3)));
+mat!(uni_class_gencat_space_separator,
+     r"\p{Space_Separator}", "\u{205F}", Some((0, 3)));
+mat!(uni_class_gencat_spacing_mark,
+     r"\p{Spacing_Mark}", "\u{16F7E}", Some((0, 4)));
+mat!(uni_class_gencat_symbol,
+     r"\p{Symbol}", "⯈", Some((0, 3)));
+mat!(uni_class_gencat_titlecase_letter,
+     r"\p{Titlecase_Letter}", "ῼ", Some((0, 3)));
+mat!(uni_class_gencat_unassigned,
+     r"\p{Unassigned}", "\u{10FFFF}", Some((0, 4)));
+mat!(uni_class_gencat_uppercase_letter,
+     r"\p{Uppercase_Letter}", "Ꝋ", Some((0, 3)));
--- a/third_party/rust/regex/tests/word_boundary_unicode.rs
+++ b/third_party/rust/regex/tests/word_boundary_unicode.rs
@@ -1,8 +1,6 @@
 // Unicode word boundaries know about Unicode characters.
 // For ASCII word boundaries, the tests are precisely inverted.
 matiter!(unicode1, r"\bx\b", "áxβ");
 matiter!(unicode2, r"\Bx\B", "áxβ", (2, 3));
 
 matiter!(ascii1, r"(?-u:\b)x(?-u:\b)", "áxβ", (2, 3));
-matiter!(ascii2, r"(?-u:\B)x(?-u:\B)", "áxβ");
-matiter!(ascii3, r"(?-u:\B)", "0\u{7EF5E}", (5, 5));
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/.cargo-checksum.json
@@ -0,0 +1,1 @@
+{"files":{"Cargo.toml":"e6e01e3bdb2baed522658098495701ca8020cd0c59a8fdcb63367fe40ac001b0","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"147712a0aa07cb69e3635ce7a0e8cee2bc45d35988a7af75d352ee18b4cdf86f","src/hangul.rs":"304c4986a9d9ae2da04a96727294cede38dba412acdb5d70c470ff51627e63ee","src/ideograph.rs":"e14e315a6c800e7962564eeb0c26e1a000f7f90c518aa98b06d7fba3124ec8a7","src/lib.rs":"234fdc5d7d94c4a184fe482ffc28d4f2994905375e4a16a9e011ea7f7df3241c","src/name.rs":"a350733283862ce670d520e15f4d4854fbc3ebcbfeab100e2c364ce250a53c68","src/property.rs":"ff22f15d83bb0d96049d8bd6fef5fdb07d71a612c77cb3fd434f528f01ef953f","src/unicode_tables/jamo_short_name.rs":"65b6319fc199a88aa73302c0f307029f21692d9d93308aefe4a942934de87f1c","src/unicode_tables/mod.rs":"4aad26e2df3b9611dc7906a814e3dc442b7ca3928ccdd6e2a31173d5e7dbf677","src/unicode_tables/property_names.rs":"68689cdabe287bd16d8379250e02d9d0f209114a35daf72f88c4d15bc6d51646","src/unicode_tables/property_values.rs":"89cf36aa70d5d09367f2c6e03f2d48353be2f71c94ae5c769ab834f628f7e680"},"package":"fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d"}
\ No newline at end of file
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/Cargo.toml
@@ -0,0 +1,23 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g. crates.io) dependencies
+#
+# If you believe there's an error in this file please file an
+# issue against the rust-lang/cargo repository. If you're
+# editing this file be aware that the upstream Cargo.toml
+# will likely look very different (and much more reasonable)
+
+[package]
+name = "ucd-util"
+version = "0.1.1"
+authors = ["Andrew Gallant <jamslam@gmail.com>"]
+description = "A small utility library for working with the Unicode character database.\n"
+homepage = "https://github.com/BurntSushi/rucd"
+documentation = "https://github.com/BurntSushi/rucd"
+readme = "README.md"
+keywords = ["unicode", "database", "character", "property"]
+license = "MIT/Apache-2.0"
+repository = "https://github.com/BurntSushi/rucd"
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/LICENSE-APACHE
@@ -0,0 +1,201 @@
+                              Apache License
+                        Version 2.0, January 2004
+                     http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+   "License" shall mean the terms and conditions for use, reproduction,
+   and distribution as defined by Sections 1 through 9 of this document.
+
+   "Licensor" shall mean the copyright owner or entity authorized by
+   the copyright owner that is granting the License.
+
+   "Legal Entity" shall mean the union of the acting entity and all
+   other entities that control, are controlled by, or are under common
+   control with that entity. For the purposes of this definition,
+   "control" means (i) the power, direct or indirect, to cause the
+   direction or management of such entity, whether by contract or
+   otherwise, or (ii) ownership of fifty percent (50%) or more of the
+   outstanding shares, or (iii) beneficial ownership of such entity.
+
+   "You" (or "Your") shall mean an individual or Legal Entity
+   exercising permissions granted by this License.
+
+   "Source" form shall mean the preferred form for making modifications,
+   including but not limited to software source code, documentation
+   source, and configuration files.
+
+   "Object" form shall mean any form resulting from mechanical
+   transformation or translation of a Source form, including but
+   not limited to compiled object code, generated documentation,
+   and conversions to other media types.
+
+   "Work" shall mean the work of authorship, whether in Source or
+   Object form, made available under the License, as indicated by a
+   copyright notice that is included in or attached to the work
+   (an example is provided in the Appendix below).
+
+   "Derivative Works" shall mean any work, whether in Source or Object
+   form, that is based on (or derived from) the Work and for which the
+   editorial revisions, annotations, elaborations, or other modifications
+   represent, as a whole, an original work of authorship. For the purposes
+   of this License, Derivative Works shall not include works that remain
+   separable from, or merely link (or bind by name) to the interfaces of,
+   the Work and Derivative Works thereof.
+
+   "Contribution" shall mean any work of authorship, including
+   the original version of the Work and any modifications or additions
+   to that Work or Derivative Works thereof, that is intentionally
+   submitted to Licensor for inclusion in the Work by the copyright owner
+   or by an individual or Legal Entity authorized to submit on behalf of
+   the copyright owner. For the purposes of this definition, "submitted"
+   means any form of electronic, verbal, or written communication sent
+   to the Licensor or its representatives, including but not limited to
+   communication on electronic mailing lists, source code control systems,
+   and issue tracking systems that are managed by, or on behalf of, the
+   Licensor for the purpose of discussing and improving the Work, but
+   excluding communication that is conspicuously marked or otherwise
+   designated in writing by the copyright owner as "Not a Contribution."
+
+   "Contributor" shall mean Licensor and any individual or Legal Entity
+   on behalf of whom a Contribution has been received by Licensor and
+   subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   copyright license to reproduce, prepare Derivative Works of,
+   publicly display, publicly perform, sublicense, and distribute the
+   Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+   this License, each Contributor hereby grants to You a perpetual,
+   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+   (except as stated in this section) patent license to make, have made,
+   use, offer to sell, sell, import, and otherwise transfer the Work,
+   where such license applies only to those patent claims licensable
+   by such Contributor that are necessarily infringed by their
+   Contribution(s) alone or by combination of their Contribution(s)
+   with the Work to which such Contribution(s) was submitted. If You
+   institute patent litigation against any entity (including a
+   cross-claim or counterclaim in a lawsuit) alleging that the Work
+   or a Contribution incorporated within the Work constitutes direct
+   or contributory patent infringement, then any patent licenses
+   granted to You under this License for that Work shall terminate
+   as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+   Work or Derivative Works thereof in any medium, with or without
+   modifications, and in Source or Object form, provided that You
+   meet the following conditions:
+
+   (a) You must give any other recipients of the Work or
+       Derivative Works a copy of this License; and
+
+   (b) You must cause any modified files to carry prominent notices
+       stating that You changed the files; and
+
+   (c) You must retain, in the Source form of any Derivative Works
+       that You distribute, all copyright, patent, trademark, and
+       attribution notices from the Source form of the Work,
+       excluding those notices that do not pertain to any part of
+       the Derivative Works; and
+
+   (d) If the Work includes a "NOTICE" text file as part of its
+       distribution, then any Derivative Works that You distribute must
+       include a readable copy of the attribution notices contained
+       within such NOTICE file, excluding those notices that do not
+       pertain to any part of the Derivative Works, in at least one
+       of the following places: within a NOTICE text file distributed
+       as part of the Derivative Works; within the Source form or
+       documentation, if provided along with the Derivative Works; or,
+       within a display generated by the Derivative Works, if and
+       wherever such third-party notices normally appear. The contents
+       of the NOTICE file are for informational purposes only and
+       do not modify the License. You may add Your own attribution
+       notices within Derivative Works that You distribute, alongside
+       or as an addendum to the NOTICE text from the Work, provided
+       that such additional attribution notices cannot be construed
+       as modifying the License.
+
+   You may add Your own copyright statement to Your modifications and
+   may provide additional or different license terms and conditions
+   for use, reproduction, or distribution of Your modifications, or
+   for any such Derivative Works as a whole, provided Your use,
+   reproduction, and distribution of the Work otherwise complies with
+   the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+   any Contribution intentionally submitted for inclusion in the Work
+   by You to the Licensor shall be under the terms and conditions of
+   this License, without any additional terms or conditions.
+   Notwithstanding the above, nothing herein shall supersede or modify
+   the terms of any separate license agreement you may have executed
+   with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+   names, trademarks, service marks, or product names of the Licensor,
+   except as required for reasonable and customary use in describing the
+   origin of the Work and reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+   agreed to in writing, Licensor provides the Work (and each
+   Contributor provides its Contributions) on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+   implied, including, without limitation, any warranties or conditions
+   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+   PARTICULAR PURPOSE. You are solely responsible for determining the
+   appropriateness of using or redistributing the Work and assume any
+   risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+   whether in tort (including negligence), contract, or otherwise,
+   unless required by applicable law (such as deliberate and grossly
+   negligent acts) or agreed to in writing, shall any Contributor be
+   liable to You for damages, including any direct, indirect, special,
+   incidental, or consequential damages of any character arising as a
+   result of this License or out of the use or inability to use the
+   Work (including but not limited to damages for loss of goodwill,
+   work stoppage, computer failure or malfunction, or any and all
+   other commercial damages or losses), even if such Contributor
+   has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+   the Work or Derivative Works thereof, You may choose to offer,
+   and charge a fee for, acceptance of support, warranty, indemnity,
+   or other liability obligations and/or rights consistent with this
+   License. However, in accepting such obligations, You may act only
+   on Your own behalf and on Your sole responsibility, not on behalf
+   of any other Contributor, and only if You agree to indemnify,
+   defend, and hold each Contributor harmless for any liability
+   incurred by, or claims asserted against, such Contributor by reason
+   of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work.
+
+   To apply the Apache License to your work, attach the following
+   boilerplate notice, with the fields enclosed by brackets "[]"
+   replaced with your own identifying information. (Don't include
+   the brackets!)  The text should be enclosed in the appropriate
+   comment syntax for the file format. We also recommend that a
+   file or class name and description of purpose be included on the
+   same "printed page" as the copyright notice for easier
+   identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner]
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/LICENSE-MIT
@@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2015 Andrew Gallant
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/README.md
@@ -0,0 +1,23 @@
+ucd-util
+========
+A library for small auxiliary Unicode functions. This includes things like
+symbol or character name canonicalization, ideograph name generation and helper
+functions for searching property name and value tables.
+
+[![Linux build status](https://api.travis-ci.org/BurntSushi/ucd-generate.png)](https://travis-ci.org/BurntSushi/ucd-generate)
+[![](http://meritbadge.herokuapp.com/ucd-generate)](https://crates.io/crates/ucd-util)
+
+
+### Documentation
+
+https://docs.rs/ucd-util
+
+
+### License
+
+This project is licensed under either of
+ * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or
+   http://www.apache.org/licenses/LICENSE-2.0)
+ * MIT license ([LICENSE-MIT](LICENSE-MIT) or
+   http://opensource.org/licenses/MIT)
+at your option.
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/src/hangul.rs
@@ -0,0 +1,105 @@
+use unicode_tables::jamo_short_name::JAMO_SHORT_NAME;
+
+// This implementation should correspond to the algorithms described in
+// Unicode 3.12.
+
+/// A set of ranges that corresponds to the set of all Hangul syllable
+/// codepoints.
+///
+/// These ranges are defined in Unicode 4.8 Table 4-13.
+pub const RANGE_HANGUL_SYLLABLE: &'static [(u32, u32)] = &[
+    (0xAC00, 0xD7A3),
+];
+
+const S_BASE: u32 = 0xAC00;
+const L_BASE: u32 = 0x1100;
+const V_BASE: u32 = 0x1161;
+const T_BASE: u32 = 0x11A7;
+const T_COUNT: u32 = 28;
+const N_COUNT: u32 = 588;
+
+/// Return the character name of the given precomposed Hangul codepoint.
+///
+/// If the given codepoint does not correspond to a precomposed Hangul
+/// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
+///
+/// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
+pub fn hangul_name(cp: u32) -> Option<String> {
+    let mut name = "HANGUL SYLLABLE ".to_string();
+    let (lpart, vpart, tpart) = match hangul_full_canonical_decomposition(cp) {
+        None => return None,
+        Some(triple) => triple,
+    };
+
+    name.push_str(jamo_short_name(lpart));
+    name.push_str(jamo_short_name(vpart));
+    name.push_str(tpart.map_or("", jamo_short_name));
+    Some(name)
+}
+
+/// Return the full canonical decomposition of the given precomposed Hangul
+/// codepoint.
+///
+/// If the decomposition does not have any trailing consonant, then the third
+/// part of the tuple returned is `None`.
+///
+/// If the given codepoint does not correspond to a precomposed Hangul
+/// codepoint in the inclusive range `AC00..D7A3`, then this returns `None`.
+///
+/// This implements the algorithms described in Unicode 3.12 and Unicode 4.8.
+pub fn hangul_full_canonical_decomposition(
+    cp: u32,
+) -> Option<(u32, u32, Option<u32>)> {
+    if !(0xAC00 <= cp && cp <= 0xD7A3) {
+        return None;
+    }
+
+    let s_index = cp - S_BASE;
+    let l_index = s_index / N_COUNT;
+    let v_index = (s_index % N_COUNT) / T_COUNT;
+    let t_index = s_index % T_COUNT;
+
+    let l_part = L_BASE + l_index;
+    let v_part = V_BASE + v_index;
+    let t_part =
+        if t_index == 0 {
+            None
+        } else {
+            Some(T_BASE + t_index)
+        };
+    Some((l_part, v_part, t_part))
+}
+
+fn jamo_short_name(cp: u32) -> &'static str {
+    let i = JAMO_SHORT_NAME.binary_search_by_key(&cp, |p| p.0).unwrap();
+    JAMO_SHORT_NAME[i].1
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{hangul_name, hangul_full_canonical_decomposition};
+
+    #[test]
+    fn canon_decomp() {
+        assert_eq!(
+            hangul_full_canonical_decomposition(0xD4DB),
+            Some((0x1111, 0x1171, Some(0x11B6))));
+    }
+
+    #[test]
+    fn name() {
+        assert_eq!(hangul_name(0xD4DB).unwrap(), "HANGUL SYLLABLE PWILH");
+    }
+
+    #[test]
+    fn all() {
+        for cp in 0xAC00..(0xD7A3 + 1) {
+            hangul_name(cp).unwrap();
+        }
+    }
+
+    #[test]
+    fn invalid() {
+        assert!(hangul_name(0).is_none());
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/src/ideograph.rs
@@ -0,0 +1,83 @@
+/// A set of ranges that corresponds to the set of all ideograph codepoints.
+///
+/// These ranges are defined in Unicode 4.8 Table 4-13.
+pub const RANGE_IDEOGRAPH: &'static [(u32, u32)] = &[
+    (0x3400, 0x4DB5),
+    (0x4E00, 0x9FD5),
+    (0x4E00, 0x9FD5),
+    (0x20000, 0x2A6D6),
+    (0x2A700, 0x2B734),
+    (0x2B740, 0x2B81D),
+    (0x2B820, 0x2CEA1),
+    (0x17000, 0x187EC),
+    (0xF900, 0xFA6D),
+    (0xFA70, 0xFAD9),
+    (0x2F800, 0x2FA1D),
+];
+
+/// Return the character name of the given ideograph codepoint.
+///
+/// This operation is only defined on ideographic codepoints. This includes
+/// precisely the following inclusive ranges:
+///
+/// * `3400..4DB5`
+/// * `4E00..9FD5`
+/// * `20000..2A6D6`
+/// * `2A700..2B734`
+/// * `2B740..2B81D`
+/// * `2B820..2CEA1`
+/// * `17000..187EC`
+/// * `F900..FA6D`
+/// * `FA70..FAD9`
+/// * `2F800..2FA1D`
+///
+/// If the given codepoint is not in any of the above ranges, then `None` is
+/// returned.
+///
+/// This implements the algorithm described in Unicode 4.8.
+pub fn ideograph_name(cp: u32) -> Option<String> {
+    // This match should be in sync with the `RANGE_IDEOGRAPH` constant.
+    match cp {
+        0x3400...0x4DB5
+        | 0x4E00...0x9FD5
+        | 0x20000...0x2A6D6
+        | 0x2A700...0x2B734
+        | 0x2B740...0x2B81D
+        | 0x2B820...0x2CEA1 => {
+            Some(format!("CJK UNIFIED IDEOGRAPH-{:04X}", cp))
+        }
+        0x17000...0x187EC => {
+            Some(format!("TANGUT IDEOGRAPH-{:04X}", cp))
+        }
+        0xF900...0xFA6D | 0xFA70...0xFAD9 | 0x2F800...0x2FA1D => {
+            Some(format!("CJK COMPATIBILITY IDEOGRAPH-{:04X}", cp))
+        }
+        _ => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::ideograph_name;
+
+    #[test]
+    fn name() {
+        assert_eq!(
+            ideograph_name(0x4E00).unwrap(),
+            "CJK UNIFIED IDEOGRAPH-4E00");
+        assert_eq!(
+            ideograph_name(0x9FD5).unwrap(),
+            "CJK UNIFIED IDEOGRAPH-9FD5");
+        assert_eq!(
+            ideograph_name(0x17000).unwrap(),
+            "TANGUT IDEOGRAPH-17000");
+        assert_eq!(
+            ideograph_name(0xF900).unwrap(),
+            "CJK COMPATIBILITY IDEOGRAPH-F900");
+    }
+
+    #[test]
+    fn invalid() {
+        assert!(ideograph_name(0).is_none());
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/src/lib.rs
@@ -0,0 +1,28 @@
+/*!
+The `ucd-util` crate contains a smattering of utility functions that implement
+various algorithms specified by Unicode. There is no specific goal for
+exhaustiveness. Instead, implementations should be added on an as-needed basis.
+
+A *current* design constraint of this crate is that it should not bring in any
+large Unicode tables. For example, to use the various property name and value
+canonicalization functions, you'll need to supply your own table, which can
+be generated using `ucd-generate`.
+*/
+
+#![deny(missing_docs)]
+
+mod hangul;
+mod ideograph;
+mod name;
+mod property;
+mod unicode_tables;
+
+pub use hangul::{
+    RANGE_HANGUL_SYLLABLE, hangul_name, hangul_full_canonical_decomposition,
+};
+pub use ideograph::{RANGE_IDEOGRAPH, ideograph_name};
+pub use name::{character_name_normalize, symbolic_name_normalize};
+pub use property::{
+    PropertyTable, PropertyValueTable, PropertyValues,
+    canonical_property_name, property_values, canonical_property_value,
+};
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/src/name.rs
@@ -0,0 +1,180 @@
+/// Normalize the given character name in place according to UAX44-LM2.
+///
+/// See: http://unicode.org/reports/tr44/#UAX44-LM2
+pub fn character_name_normalize(string: &mut String) {
+    let bytes = unsafe {
+        // SAFETY: `character_name_normalize_bytes` guarantees that
+        // `bytes[..len]` is valid UTF-8.
+        string.as_mut_vec()
+    };
+    let len = character_name_normalize_bytes(bytes).len();
+    bytes.truncate(len);
+}
+
+/// Normalize the given character name in place according to UAX44-LM2.
+///
+/// The slice returned is guaranteed to be valid UTF-8 for all possible values
+/// of `slice`.
+///
+/// See: http://unicode.org/reports/tr44/#UAX44-LM2
+fn character_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] {
+    // According to Unicode 4.8, character names consist only of Latin
+    // capital letters A to Z, ASCII digits, ASCII space or ASCII hypen.
+    // Therefore, we can do very simplistic case folding and operate on the
+    // raw bytes, since everything is ASCII. Note that we don't actually know
+    // whether `slice` is all ASCII or not, so we drop all non-ASCII bytes.
+    let mut next_write = 0;
+    let mut prev_space = true;
+    for i in 0..slice.len() {
+        // SAFETY ARGUMENT: To guarantee that the resulting slice is valid
+        // UTF-8, we ensure that the slice contains only ASCII bytes. In
+        // particular, we drop every non-ASCII byte from the normalized string.
+        let b = slice[i];
+        if b == b' ' {
+            prev_space = true;
+            continue;
+        } else if b == b'_' {
+            // Drop the underscore.
+        } else if b == b'-' {
+            let mut keep_hyphen = prev_space || slice.get(i+1) == Some(&b' ');
+            // We want to keep the hypen only if it isn't medial, which means
+            // it has at least one adjacent space character. However, there
+            // is one exception. We need to keep the hypen in the character
+            // (U+1180) named `HANGUL JUNGSEONG O-E`. So we check for that
+            // here.
+            let rest_e = slice[i+1..] == b"E"[..] || slice[i+1..] == b"e"[..];
+            if !keep_hyphen && rest_e {
+                keep_hyphen = slice[..next_write] == b"hanguljungseongo"[..];
+            }
+            if keep_hyphen {
+                slice[next_write] = b;
+                next_write += 1;
+            }
+        } else if b'A' <= b && b <= b'Z' {
+            slice[next_write] = b + (b'a' - b'A');
+            next_write += 1;
+        } else if b <= 0x7F {
+            slice[next_write] = b;
+            next_write += 1;
+        }
+        prev_space = false;
+    }
+    &mut slice[..next_write]
+}
+
+/// Normalize the given symbolic name in place according to UAX44-LM3.
+///
+/// A "symbolic name" typically corresponds to property names and property
+/// value aliases. Note, though, that it should not be applied to property
+/// string values.
+///
+/// See: http://unicode.org/reports/tr44/#UAX44-LM2
+pub fn symbolic_name_normalize(string: &mut String) {
+    let bytes = unsafe {
+        // SAFETY: `symbolic_name_normalize_bytes` guarantees that
+        // `bytes[..len]` is valid UTF-8.
+        string.as_mut_vec()
+    };
+    let len = symbolic_name_normalize_bytes(bytes).len();
+    bytes.truncate(len);
+}
+
+/// Normalize the given symbolic name in place according to UAX44-LM3.
+///
+/// A "symbolic name" typically corresponds to property names and property
+/// value aliases. Note, though, that it should not be applied to property
+/// string values.
+///
+/// The slice returned is guaranteed to be valid UTF-8 for all possible values
+/// of `slice`.
+///
+/// See: http://unicode.org/reports/tr44/#UAX44-LM3
+fn symbolic_name_normalize_bytes(slice: &mut [u8]) -> &mut [u8] {
+    // I couldn't find a place in the standard that specified that property
+    // names/aliases had a particular structure (unlike character names), but
+    // we assume that it's ASCII only and drop anything that isn't ASCII.
+    let mut start = 0;
+    if slice.len() >= 2 {
+        // Ignore any "is" prefix.
+        let starts_with_is =
+            slice[0..2] == b"is"[..]
+            || slice[0..2] == b"IS"[..]
+            || slice[0..2] == b"iS"[..]
+            || slice[0..2] == b"Is"[..];
+        if starts_with_is {
+            start = 2;
+        }
+    }
+    let mut next_write = 0;
+    for i in start..slice.len() {
+        // SAFETY ARGUMENT: To guarantee that the resulting slice is valid
+        // UTF-8, we ensure that the slice contains only ASCII bytes. In
+        // particular, we drop every non-ASCII byte from the normalized string.
+        let b = slice[i];
+        if b == b' ' || b == b'_' || b == b'-' {
+            continue;
+        } else if b'A' <= b && b <= b'Z' {
+            slice[next_write] = b + (b'a' - b'A');
+            next_write += 1;
+        } else if b <= 0x7F {
+            slice[next_write] = b;
+            next_write += 1;
+        }
+    }
+    &mut slice[..next_write]
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{
+        character_name_normalize, character_name_normalize_bytes,
+        symbolic_name_normalize, symbolic_name_normalize_bytes,
+    };
+
+    fn char_norm(s: &str) -> String {
+        let mut s = s.to_string();
+        character_name_normalize(&mut s);
+        s
+    }
+
+    fn sym_norm(s: &str) -> String {
+        let mut s = s.to_string();
+        symbolic_name_normalize(&mut s);
+        s
+    }
+
+    #[test]
+    fn char_normalize() {
+        assert_eq!(char_norm("HANGUL JUNGSEONG O-E"), "hanguljungseongo-e");
+        assert_eq!(char_norm("zero-width space"), "zerowidthspace");
+        assert_eq!(char_norm("zerowidthspace"), "zerowidthspace");
+        assert_eq!(char_norm("ZERO WIDTH SPACE"), "zerowidthspace");
+        assert_eq!(char_norm("TIBETAN MARK TSA -PHRU"), "tibetanmarktsa-phru");
+    }
+
+    #[test]
+    fn sym_normalize() {
+        assert_eq!(sym_norm("Line_Break"), "linebreak");
+        assert_eq!(sym_norm("Line-break"), "linebreak");
+        assert_eq!(sym_norm("linebreak"), "linebreak");
+        assert_eq!(sym_norm("BA"), "ba");
+        assert_eq!(sym_norm("ba"), "ba");
+        assert_eq!(sym_norm("Greek"), "greek");
+        assert_eq!(sym_norm("isGreek"), "greek");
+        assert_eq!(sym_norm("IS_Greek"), "greek");
+    }
+
+    #[test]
+    fn valid_utf8_character() {
+        let mut x = b"abc\xFFxyz".to_vec();
+        let y = character_name_normalize_bytes(&mut x);
+        assert_eq!(y, b"abcxyz");
+    }
+
+    #[test]
+    fn valid_utf8_symbolic() {
+        let mut x = b"abc\xFFxyz".to_vec();
+        let y = symbolic_name_normalize_bytes(&mut x);
+        assert_eq!(y, b"abcxyz");
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/src/property.rs
@@ -0,0 +1,124 @@
+/// The type of a property name table.
+///
+/// A property name table is a sequence of sorted tuples, where the first
+/// value in each tuple is a normalized property name and the second value of
+/// each tuple is the corresponding canonical property name.
+pub type PropertyTable = &'static [(&'static str, &'static str)];
+
+/// Find the canonical property name for the given normalized property name.
+///
+/// If no such property exists, then `None` is returned.
+///
+/// The normalized property name must have been normalized according to
+/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
+pub fn canonical_property_name(
+    property_table: PropertyTable,
+    normalized_property_name: &str,
+) -> Option<&'static str> {
+    property_table
+        .binary_search_by_key(&normalized_property_name, |&(n, _)| n)
+        .ok()
+        .map(|i| property_table[i].1)
+}
+
+/// Type of a property value table.
+///
+/// A property value table maps property names to a mapping of property values,
+/// where the mapping of property values is represented by a sequence of
+/// tuples. The first element of each tuple is a normalized property value
+/// while the second element of each tuple is the corresponding canonical
+/// property value.
+///
+/// Note that a property value table only includes values for properties that
+/// are catalogs, enumerations or binary properties. Properties that have
+/// string values (such as case or decomposition mappings), numeric values
+/// or are miscellaneous are not represented in this table.
+pub type PropertyValueTable = &'static [(&'static str, PropertyValues)];
+
+/// A mapping of property values for a specific property.
+///
+/// The first element of each tuple is a normalized property value while the
+/// second element of each tuple is the corresponding canonical property
+/// value.
+pub type PropertyValues = &'static [(&'static str, &'static str)];
+
+/// Find the set of possible property values for a given property.
+///
+/// The set returned is a mapping expressed as a sorted list of tuples.
+/// The first element of each tuple is a normalized property value while the
+/// second element of each tuple is the corresponding canonical property
+/// value.
+///
+/// If no such property exists, then `None` is returned.
+///
+/// The given property name must be in its canonical form, which can be
+/// found using `canonical_property_name`.
+pub fn property_values(
+    property_value_table: PropertyValueTable,
+    canonical_property_name: &str,
+) -> Option<PropertyValues> {
+    property_value_table
+        .binary_search_by_key(&canonical_property_name, |&(n, _)| n)
+        .ok()
+        .map(|i| property_value_table[i].1)
+}
+
+/// Find the canonical property value for the given normalized property
+/// value.
+///
+/// The given property values should correspond to the values for the property
+/// under question, which can be found using `property_values`.
+///
+/// If no such property value exists, then `None` is returned.
+///
+/// The normalized property value must have been normalized according to
+/// UAX44 LM3, which can be done using `symbolic_name_normalize`.
+pub fn canonical_property_value(
+    property_values: PropertyValues,
+    normalized_property_value: &str,
+) -> Option<&'static str> {
+    // This is cute. The types line up, so why not?
+    canonical_property_name(property_values, normalized_property_value)
+}
+
+
+#[cfg(test)]
+mod tests {
+    use unicode_tables::property_names::PROPERTY_NAMES;
+    use unicode_tables::property_values::PROPERTY_VALUES;
+
+    use super::{
+        canonical_property_name, property_values, canonical_property_value,
+    };
+
+    #[test]
+    fn canonical_property_name_1() {
+        assert_eq!(
+            canonical_property_name(PROPERTY_NAMES, "gc"),
+            Some("General_Category"));
+        assert_eq!(
+            canonical_property_name(PROPERTY_NAMES, "generalcategory"),
+            Some("General_Category"));
+        assert_eq!(
+            canonical_property_name(PROPERTY_NAMES, "g c"),
+            None);
+    }
+
+    #[test]
+    fn property_values_1() {
+        assert_eq!(
+            property_values(PROPERTY_VALUES, "White_Space"),
+            Some(&[
+                ("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+                ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+            ][..]));
+    }
+
+    #[test]
+    fn canonical_property_value_1() {
+        let values = property_values(PROPERTY_VALUES, "White_Space").unwrap();
+        assert_eq!(canonical_property_value(values, "false"), Some("No"));
+        assert_eq!(canonical_property_value(values, "t"), Some("Yes"));
+        assert_eq!(canonical_property_value(values, "F"), None);
+    }
+}
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/src/unicode_tables/jamo_short_name.rs
@@ -0,0 +1,22 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate jamo-short-name tmp/ucd-10.0.0/
+//
+// ucd-generate is available on crates.io.
+
+pub const JAMO_SHORT_NAME: &'static [(u32, &'static str)] = &[
+  (4352, "G"), (4353, "GG"), (4354, "N"), (4355, "D"), (4356, "DD"),
+  (4357, "R"), (4358, "M"), (4359, "B"), (4360, "BB"), (4361, "S"),
+  (4362, "SS"), (4363, ""), (4364, "J"), (4365, "JJ"), (4366, "C"),
+  (4367, "K"), (4368, "T"), (4369, "P"), (4370, "H"), (4449, "A"),
+  (4450, "AE"), (4451, "YA"), (4452, "YAE"), (4453, "EO"), (4454, "E"),
+  (4455, "YEO"), (4456, "YE"), (4457, "O"), (4458, "WA"), (4459, "WAE"),
+  (4460, "OE"), (4461, "YO"), (4462, "U"), (4463, "WEO"), (4464, "WE"),
+  (4465, "WI"), (4466, "YU"), (4467, "EU"), (4468, "YI"), (4469, "I"),
+  (4520, "G"), (4521, "GG"), (4522, "GS"), (4523, "N"), (4524, "NJ"),
+  (4525, "NH"), (4526, "D"), (4527, "L"), (4528, "LG"), (4529, "LM"),
+  (4530, "LB"), (4531, "LS"), (4532, "LT"), (4533, "LP"), (4534, "LH"),
+  (4535, "M"), (4536, "B"), (4537, "BS"), (4538, "S"), (4539, "SS"),
+  (4540, "NG"), (4541, "J"), (4542, "C"), (4543, "K"), (4544, "T"),
+  (4545, "P"), (4546, "H"),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/src/unicode_tables/mod.rs
@@ -0,0 +1,5 @@
+pub mod jamo_short_name;
+#[cfg(test)]
+pub mod property_names;
+#[cfg(test)]
+pub mod property_values;
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/src/unicode_tables/property_names.rs
@@ -0,0 +1,146 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-names tmp/ucd-10.0.0/
+//
+// ucd-generate is available on crates.io.
+
+pub const PROPERTY_NAMES: &'static [(&'static str, &'static str)] = &[
+  ("age", "Age"), ("ahex", "ASCII_Hex_Digit"), ("alpha", "Alphabetic"),
+  ("alphabetic", "Alphabetic"), ("asciihexdigit", "ASCII_Hex_Digit"),
+  ("bc", "Bidi_Class"), ("bidic", "Bidi_Control"),
+  ("bidiclass", "Bidi_Class"), ("bidicontrol", "Bidi_Control"),
+  ("bidim", "Bidi_Mirrored"), ("bidimirrored", "Bidi_Mirrored"),
+  ("bidimirroringglyph", "Bidi_Mirroring_Glyph"),
+  ("bidipairedbracket", "Bidi_Paired_Bracket"),
+  ("bidipairedbrackettype", "Bidi_Paired_Bracket_Type"), ("blk", "Block"),
+  ("block", "Block"), ("bmg", "Bidi_Mirroring_Glyph"),
+  ("bpb", "Bidi_Paired_Bracket"), ("bpt", "Bidi_Paired_Bracket_Type"),
+  ("c", "ISO_Comment"),
+  ("canonicalcombiningclass", "Canonical_Combining_Class"),
+  ("cased", "Cased"), ("casefolding", "Case_Folding"),
+  ("caseignorable", "Case_Ignorable"), ("ccc", "Canonical_Combining_Class"),
+  ("ce", "Composition_Exclusion"), ("cf", "Case_Folding"),
+  ("changeswhencasefolded", "Changes_When_Casefolded"),
+  ("changeswhencasemapped", "Changes_When_Casemapped"),
+  ("changeswhenlowercased", "Changes_When_Lowercased"),
+  ("changeswhennfkccasefolded", "Changes_When_NFKC_Casefolded"),
+  ("changeswhentitlecased", "Changes_When_Titlecased"),
+  ("changeswhenuppercased", "Changes_When_Uppercased"),
+  ("ci", "Case_Ignorable"), ("cjkaccountingnumeric", "kAccountingNumeric"),
+  ("cjkcompatibilityvariant", "kCompatibilityVariant"),
+  ("cjkiicore", "kIICore"), ("cjkirggsource", "kIRG_GSource"),
+  ("cjkirghsource", "kIRG_HSource"), ("cjkirgjsource", "kIRG_JSource"),
+  ("cjkirgkpsource", "kIRG_KPSource"), ("cjkirgksource", "kIRG_KSource"),
+  ("cjkirgmsource", "kIRG_MSource"), ("cjkirgtsource", "kIRG_TSource"),
+  ("cjkirgusource", "kIRG_USource"), ("cjkirgvsource", "kIRG_VSource"),
+  ("cjkothernumeric", "kOtherNumeric"),
+  ("cjkprimarynumeric", "kPrimaryNumeric"), ("cjkrsunicode", "kRSUnicode"),
+  ("compex", "Full_Composition_Exclusion"),
+  ("compositionexclusion", "Composition_Exclusion"),
+  ("cwcf", "Changes_When_Casefolded"), ("cwcm", "Changes_When_Casemapped"),
+  ("cwkcf", "Changes_When_NFKC_Casefolded"),
+  ("cwl", "Changes_When_Lowercased"), ("cwt", "Changes_When_Titlecased"),
+  ("cwu", "Changes_When_Uppercased"), ("dash", "Dash"),
+  ("decompositionmapping", "Decomposition_Mapping"),
+  ("decompositiontype", "Decomposition_Type"),
+  ("defaultignorablecodepoint", "Default_Ignorable_Code_Point"),
+  ("dep", "Deprecated"), ("deprecated", "Deprecated"),
+  ("di", "Default_Ignorable_Code_Point"), ("dia", "Diacritic"),
+  ("diacritic", "Diacritic"), ("dm", "Decomposition_Mapping"),
+  ("dt", "Decomposition_Type"), ("ea", "East_Asian_Width"),
+  ("eastasianwidth", "East_Asian_Width"), ("expandsonnfc", "Expands_On_NFC"),
+  ("expandsonnfd", "Expands_On_NFD"), ("expandsonnfkc", "Expands_On_NFKC"),
+  ("expandsonnfkd", "Expands_On_NFKD"), ("ext", "Extender"),
+  ("extender", "Extender"), ("fcnfkc", "FC_NFKC_Closure"),
+  ("fcnfkcclosure", "FC_NFKC_Closure"),
+  ("fullcompositionexclusion", "Full_Composition_Exclusion"),
+  ("gc", "General_Category"), ("gcb", "Grapheme_Cluster_Break"),
+  ("generalcategory", "General_Category"), ("graphemebase", "Grapheme_Base"),
+  ("graphemeclusterbreak", "Grapheme_Cluster_Break"),
+  ("graphemeextend", "Grapheme_Extend"), ("graphemelink", "Grapheme_Link"),
+  ("grbase", "Grapheme_Base"), ("grext", "Grapheme_Extend"),
+  ("grlink", "Grapheme_Link"), ("hangulsyllabletype", "Hangul_Syllable_Type"),
+  ("hex", "Hex_Digit"), ("hexdigit", "Hex_Digit"),
+  ("hst", "Hangul_Syllable_Type"), ("hyphen", "Hyphen"),
+  ("idc", "ID_Continue"), ("idcontinue", "ID_Continue"),
+  ("ideo", "Ideographic"), ("ideographic", "Ideographic"),
+  ("ids", "ID_Start"), ("idsb", "IDS_Binary_Operator"),
+  ("idsbinaryoperator", "IDS_Binary_Operator"),
+  ("idst", "IDS_Trinary_Operator"), ("idstart", "ID_Start"),
+  ("idstrinaryoperator", "IDS_Trinary_Operator"),
+  ("indicpositionalcategory", "Indic_Positional_Category"),
+  ("indicsyllabiccategory", "Indic_Syllabic_Category"),
+  ("inpc", "Indic_Positional_Category"), ("insc", "Indic_Syllabic_Category"),
+  ("jamoshortname", "Jamo_Short_Name"), ("jg", "Joining_Group"),
+  ("joinc", "Join_Control"), ("joincontrol", "Join_Control"),
+  ("joininggroup", "Joining_Group"), ("joiningtype", "Joining_Type"),
+  ("jsn", "Jamo_Short_Name"), ("jt", "Joining_Type"),
+  ("kaccountingnumeric", "kAccountingNumeric"),
+  ("kcompatibilityvariant", "kCompatibilityVariant"), ("kiicore", "kIICore"),
+  ("kirggsource", "kIRG_GSource"), ("kirghsource", "kIRG_HSource"),
+  ("kirgjsource", "kIRG_JSource"), ("kirgkpsource", "kIRG_KPSource"),
+  ("kirgksource", "kIRG_KSource"), ("kirgmsource", "kIRG_MSource"),
+  ("kirgtsource", "kIRG_TSource"), ("kirgusource", "kIRG_USource"),
+  ("kirgvsource", "kIRG_VSource"), ("kothernumeric", "kOtherNumeric"),
+  ("kprimarynumeric", "kPrimaryNumeric"), ("krsunicode", "kRSUnicode"),
+  ("lb", "Line_Break"), ("lc", "Lowercase_Mapping"),
+  ("linebreak", "Line_Break"), ("loe", "Logical_Order_Exception"),
+  ("logicalorderexception", "Logical_Order_Exception"),
+  ("lower", "Lowercase"), ("lowercase", "Lowercase"),
+  ("lowercasemapping", "Lowercase_Mapping"), ("math", "Math"), ("na", "Name"),
+  ("na1", "Unicode_1_Name"), ("name", "Name"), ("namealias", "Name_Alias"),
+  ("nchar", "Noncharacter_Code_Point"), ("nfcqc", "NFC_Quick_Check"),
+  ("nfcquickcheck", "NFC_Quick_Check"), ("nfdqc", "NFD_Quick_Check"),
+  ("nfdquickcheck", "NFD_Quick_Check"), ("nfkccasefold", "NFKC_Casefold"),
+  ("nfkccf", "NFKC_Casefold"), ("nfkcqc", "NFKC_Quick_Check"),
+  ("nfkcquickcheck", "NFKC_Quick_Check"), ("nfkdqc", "NFKD_Quick_Check"),
+  ("nfkdquickcheck", "NFKD_Quick_Check"),
+  ("noncharactercodepoint", "Noncharacter_Code_Point"),
+  ("nt", "Numeric_Type"), ("numerictype", "Numeric_Type"),
+  ("numericvalue", "Numeric_Value"), ("nv", "Numeric_Value"),
+  ("oalpha", "Other_Alphabetic"), ("ocomment", "ISO_Comment"),
+  ("odi", "Other_Default_Ignorable_Code_Point"),
+  ("ogrext", "Other_Grapheme_Extend"), ("oidc", "Other_ID_Continue"),
+  ("oids", "Other_ID_Start"), ("olower", "Other_Lowercase"),
+  ("omath", "Other_Math"), ("otheralphabetic", "Other_Alphabetic"),
+  ("otherdefaultignorablecodepoint", "Other_Default_Ignorable_Code_Point"),
+  ("othergraphemeextend", "Other_Grapheme_Extend"),
+  ("otheridcontinue", "Other_ID_Continue"),
+  ("otheridstart", "Other_ID_Start"), ("otherlowercase", "Other_Lowercase"),
+  ("othermath", "Other_Math"), ("otheruppercase", "Other_Uppercase"),
+  ("oupper", "Other_Uppercase"), ("patsyn", "Pattern_Syntax"),
+  ("patternsyntax", "Pattern_Syntax"),
+  ("patternwhitespace", "Pattern_White_Space"),
+  ("patws", "Pattern_White_Space"), ("pcm", "Prepended_Concatenation_Mark"),
+  ("prependedconcatenationmark", "Prepended_Concatenation_Mark"),
+  ("qmark", "Quotation_Mark"), ("quotationmark", "Quotation_Mark"),
+  ("radical", "Radical"), ("regionalindicator", "Regional_Indicator"),
+  ("ri", "Regional_Indicator"), ("sb", "Sentence_Break"), ("sc", "Script"),
+  ("scf", "Simple_Case_Folding"), ("script", "Script"),
+  ("scriptextensions", "Script_Extensions"), ("scx", "Script_Extensions"),
+  ("sd", "Soft_Dotted"), ("sentencebreak", "Sentence_Break"),
+  ("sentenceterminal", "Sentence_Terminal"), ("sfc", "Simple_Case_Folding"),
+  ("simplecasefolding", "Simple_Case_Folding"),
+  ("simplelowercasemapping", "Simple_Lowercase_Mapping"),
+  ("simpletitlecasemapping", "Simple_Titlecase_Mapping"),
+  ("simpleuppercasemapping", "Simple_Uppercase_Mapping"),
+  ("slc", "Simple_Lowercase_Mapping"), ("softdotted", "Soft_Dotted"),
+  ("space", "White_Space"), ("stc", "Simple_Titlecase_Mapping"),
+  ("sterm", "Sentence_Terminal"), ("suc", "Simple_Uppercase_Mapping"),
+  ("tc", "Titlecase_Mapping"), ("term", "Terminal_Punctuation"),
+  ("terminalpunctuation", "Terminal_Punctuation"),
+  ("titlecasemapping", "Titlecase_Mapping"), ("uc", "Uppercase_Mapping"),
+  ("uideo", "Unified_Ideograph"), ("unicode1name", "Unicode_1_Name"),
+  ("unicoderadicalstroke", "kRSUnicode"),
+  ("unifiedideograph", "Unified_Ideograph"), ("upper", "Uppercase"),
+  ("uppercase", "Uppercase"), ("uppercasemapping", "Uppercase_Mapping"),
+  ("urs", "kRSUnicode"), ("variationselector", "Variation_Selector"),
+  ("verticalorientation", "Vertical_Orientation"),
+  ("vo", "Vertical_Orientation"), ("vs", "Variation_Selector"),
+  ("wb", "Word_Break"), ("whitespace", "White_Space"),
+  ("wordbreak", "Word_Break"), ("wspace", "White_Space"),
+  ("xidc", "XID_Continue"), ("xidcontinue", "XID_Continue"),
+  ("xids", "XID_Start"), ("xidstart", "XID_Start"),
+  ("xonfc", "Expands_On_NFC"), ("xonfd", "Expands_On_NFD"),
+  ("xonfkc", "Expands_On_NFKC"), ("xonfkd", "Expands_On_NFKD"),
+];
new file mode 100644
--- /dev/null
+++ b/third_party/rust/ucd-util/src/unicode_tables/property_values.rs
@@ -0,0 +1,992 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//  ucd-generate property-values tmp/ucd-10.0.0/
+//
+// ucd-generate is available on crates.io.
+
+pub const PROPERTY_VALUES: &'static [(&'static str, &'static [(&'static str, &'static str)])] = &[
+  ("ASCII_Hex_Digit", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Age", &[("1.1", "V1_1"), ("10.0", "V10_0"), ("2.0", "V2_0"),
+  ("2.1", "V2_1"), ("3.0", "V3_0"), ("3.1", "V3_1"), ("3.2", "V3_2"),
+  ("4.0", "V4_0"), ("4.1", "V4_1"), ("5.0", "V5_0"), ("5.1", "V5_1"),
+  ("5.2", "V5_2"), ("6.0", "V6_0"), ("6.1", "V6_1"), ("6.2", "V6_2"),
+  ("6.3", "V6_3"), ("7.0", "V7_0"), ("8.0", "V8_0"), ("9.0", "V9_0"),
+  ("na", "Unassigned"), ("unassigned", "Unassigned"), ("v100", "V10_0"),
+  ("v11", "V1_1"), ("v20", "V2_0"), ("v21", "V2_1"), ("v30", "V3_0"),
+  ("v31", "V3_1"), ("v32", "V3_2"), ("v40", "V4_0"), ("v41", "V4_1"),
+  ("v50", "V5_0"), ("v51", "V5_1"), ("v52", "V5_2"), ("v60", "V6_0"),
+  ("v61", "V6_1"), ("v62", "V6_2"), ("v63", "V6_3"), ("v70", "V7_0"),
+  ("v80", "V8_0"), ("v90", "V9_0"), ]),
+
+  ("Alphabetic", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Bidi_Class", &[("al", "Arabic_Letter"), ("an", "Arabic_Number"),
+  ("arabicletter", "Arabic_Letter"), ("arabicnumber", "Arabic_Number"),
+  ("b", "Paragraph_Separator"), ("bn", "Boundary_Neutral"),
+  ("boundaryneutral", "Boundary_Neutral"),
+  ("commonseparator", "Common_Separator"), ("cs", "Common_Separator"),
+  ("en", "European_Number"), ("es", "European_Separator"),
+  ("et", "European_Terminator"), ("europeannumber", "European_Number"),
+  ("europeanseparator", "European_Separator"),
+  ("europeanterminator", "European_Terminator"),
+  ("firststrongisolate", "First_Strong_Isolate"),
+  ("fsi", "First_Strong_Isolate"), ("l", "Left_To_Right"),
+  ("lefttoright", "Left_To_Right"),
+  ("lefttorightembedding", "Left_To_Right_Embedding"),
+  ("lefttorightisolate", "Left_To_Right_Isolate"),
+  ("lefttorightoverride", "Left_To_Right_Override"),
+  ("lre", "Left_To_Right_Embedding"), ("lri", "Left_To_Right_Isolate"),
+  ("lro", "Left_To_Right_Override"), ("nonspacingmark", "Nonspacing_Mark"),
+  ("nsm", "Nonspacing_Mark"), ("on", "Other_Neutral"),
+  ("otherneutral", "Other_Neutral"),
+  ("paragraphseparator", "Paragraph_Separator"),
+  ("pdf", "Pop_Directional_Format"), ("pdi", "Pop_Directional_Isolate"),
+  ("popdirectionalformat", "Pop_Directional_Format"),
+  ("popdirectionalisolate", "Pop_Directional_Isolate"),
+  ("r", "Right_To_Left"), ("righttoleft", "Right_To_Left"),
+  ("righttoleftembedding", "Right_To_Left_Embedding"),
+  ("righttoleftisolate", "Right_To_Left_Isolate"),
+  ("righttoleftoverride", "Right_To_Left_Override"),
+  ("rle", "Right_To_Left_Embedding"), ("rli", "Right_To_Left_Isolate"),
+  ("rlo", "Right_To_Left_Override"), ("s", "Segment_Separator"),
+  ("segmentseparator", "Segment_Separator"), ("whitespace", "White_Space"),
+  ("ws", "White_Space"), ]),
+
+  ("Bidi_Control", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Bidi_Mirrored", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Bidi_Paired_Bracket_Type", &[("c", "Close"), ("close", "Close"),
+  ("n", "None"), ("none", "None"), ("o", "Open"), ("open", "Open"), ]),
+
+  ("Block", &[("adlam", "Adlam"), ("aegeannumbers", "Aegean_Numbers"),
+  ("ahom", "Ahom"), ("alchemical", "Alchemical_Symbols"),
+  ("alchemicalsymbols", "Alchemical_Symbols"),
+  ("alphabeticpf", "Alphabetic_Presentation_Forms"),
+  ("alphabeticpresentationforms", "Alphabetic_Presentation_Forms"),
+  ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"),
+  ("ancientgreekmusic", "Ancient_Greek_Musical_Notation"),
+  ("ancientgreekmusicalnotation", "Ancient_Greek_Musical_Notation"),
+  ("ancientgreeknumbers", "Ancient_Greek_Numbers"),
+  ("ancientsymbols", "Ancient_Symbols"), ("arabic", "Arabic"),
+  ("arabicexta", "Arabic_Extended_A"),
+  ("arabicextendeda", "Arabic_Extended_A"),
+  ("arabicmath", "Arabic_Mathematical_Alphabetic_Symbols"),
+  ("arabicmathematicalalphabeticsymbols", "Arabic_Mathematical_Alphabetic_Symbols"),
+  ("arabicpfa", "Arabic_Presentation_Forms_A"),
+  ("arabicpfb", "Arabic_Presentation_Forms_B"),
+  ("arabicpresentationformsa", "Arabic_Presentation_Forms_A"),
+  ("arabicpresentationformsb", "Arabic_Presentation_Forms_B"),
+  ("arabicsup", "Arabic_Supplement"),
+  ("arabicsupplement", "Arabic_Supplement"), ("armenian", "Armenian"),
+  ("arrows", "Arrows"), ("ascii", "Basic_Latin"), ("avestan", "Avestan"),
+  ("balinese", "Balinese"), ("bamum", "Bamum"),
+  ("bamumsup", "Bamum_Supplement"), ("bamumsupplement", "Bamum_Supplement"),
+  ("basiclatin", "Basic_Latin"), ("bassavah", "Bassa_Vah"),
+  ("batak", "Batak"), ("bengali", "Bengali"), ("bhaiksuki", "Bhaiksuki"),
+  ("blockelements", "Block_Elements"), ("bopomofo", "Bopomofo"),
+  ("bopomofoext", "Bopomofo_Extended"),
+  ("bopomofoextended", "Bopomofo_Extended"), ("boxdrawing", "Box_Drawing"),
+  ("brahmi", "Brahmi"), ("braille", "Braille_Patterns"),
+  ("braillepatterns", "Braille_Patterns"), ("buginese", "Buginese"),
+  ("buhid", "Buhid"), ("byzantinemusic", "Byzantine_Musical_Symbols"),
+  ("byzantinemusicalsymbols", "Byzantine_Musical_Symbols"),
+  ("canadiansyllabics", "Unified_Canadian_Aboriginal_Syllabics"),
+  ("carian", "Carian"), ("caucasianalbanian", "Caucasian_Albanian"),
+  ("chakma", "Chakma"), ("cham", "Cham"), ("cherokee", "Cherokee"),
+  ("cherokeesup", "Cherokee_Supplement"),
+  ("cherokeesupplement", "Cherokee_Supplement"),
+  ("cjk", "CJK_Unified_Ideographs"), ("cjkcompat", "CJK_Compatibility"),
+  ("cjkcompatforms", "CJK_Compatibility_Forms"),
+  ("cjkcompatibility", "CJK_Compatibility"),
+  ("cjkcompatibilityforms", "CJK_Compatibility_Forms"),
+  ("cjkcompatibilityideographs", "CJK_Compatibility_Ideographs"),
+  ("cjkcompatibilityideographssupplement", "CJK_Compatibility_Ideographs_Supplement"),
+  ("cjkcompatideographs", "CJK_Compatibility_Ideographs"),
+  ("cjkcompatideographssup", "CJK_Compatibility_Ideographs_Supplement"),
+  ("cjkexta", "CJK_Unified_Ideographs_Extension_A"),
+  ("cjkextb", "CJK_Unified_Ideographs_Extension_B"),
+  ("cjkextc", "CJK_Unified_Ideographs_Extension_C"),
+  ("cjkextd", "CJK_Unified_Ideographs_Extension_D"),
+  ("cjkexte", "CJK_Unified_Ideographs_Extension_E"),
+  ("cjkextf", "CJK_Unified_Ideographs_Extension_F"),
+  ("cjkradicalssup", "CJK_Radicals_Supplement"),
+  ("cjkradicalssupplement", "CJK_Radicals_Supplement"),
+  ("cjkstrokes", "CJK_Strokes"),
+  ("cjksymbols", "CJK_Symbols_And_Punctuation"),
+  ("cjksymbolsandpunctuation", "CJK_Symbols_And_Punctuation"),
+  ("cjkunifiedideographs", "CJK_Unified_Ideographs"),
+  ("cjkunifiedideographsextensiona", "CJK_Unified_Ideographs_Extension_A"),
+  ("cjkunifiedideographsextensionb", "CJK_Unified_Ideographs_Extension_B"),
+  ("cjkunifiedideographsextensionc", "CJK_Unified_Ideographs_Extension_C"),
+  ("cjkunifiedideographsextensiond", "CJK_Unified_Ideographs_Extension_D"),
+  ("cjkunifiedideographsextensione", "CJK_Unified_Ideographs_Extension_E"),
+  ("cjkunifiedideographsextensionf", "CJK_Unified_Ideographs_Extension_F"),
+  ("combiningdiacriticalmarks", "Combining_Diacritical_Marks"),
+  ("combiningdiacriticalmarksextended", "Combining_Diacritical_Marks_Extended"),
+  ("combiningdiacriticalmarksforsymbols", "Combining_Diacritical_Marks_For_Symbols"),
+  ("combiningdiacriticalmarkssupplement", "Combining_Diacritical_Marks_Supplement"),
+  ("combininghalfmarks", "Combining_Half_Marks"),
+  ("combiningmarksforsymbols", "Combining_Diacritical_Marks_For_Symbols"),
+  ("commonindicnumberforms", "Common_Indic_Number_Forms"),
+  ("compatjamo", "Hangul_Compatibility_Jamo"),
+  ("controlpictures", "Control_Pictures"), ("coptic", "Coptic"),
+  ("copticepactnumbers", "Coptic_Epact_Numbers"),
+  ("countingrod", "Counting_Rod_Numerals"),
+  ("countingrodnumerals", "Counting_Rod_Numerals"),
+  ("cuneiform", "Cuneiform"),
+  ("cuneiformnumbers", "Cuneiform_Numbers_And_Punctuation"),
+  ("cuneiformnumbersandpunctuation", "Cuneiform_Numbers_And_Punctuation"),
+  ("currencysymbols", "Currency_Symbols"),
+  ("cypriotsyllabary", "Cypriot_Syllabary"), ("cyrillic", "Cyrillic"),
+  ("cyrillicexta", "Cyrillic_Extended_A"),
+  ("cyrillicextb", "Cyrillic_Extended_B"),
+  ("cyrillicextc", "Cyrillic_Extended_C"),
+  ("cyrillicextendeda", "Cyrillic_Extended_A"),
+  ("cyrillicextendedb", "Cyrillic_Extended_B"),
+  ("cyrillicextendedc", "Cyrillic_Extended_C"),
+  ("cyrillicsup", "Cyrillic_Supplement"),
+  ("cyrillicsupplement", "Cyrillic_Supplement"),
+  ("cyrillicsupplementary", "Cyrillic_Supplement"), ("deseret", "Deseret"),
+  ("devanagari", "Devanagari"), ("devanagariext", "Devanagari_Extended"),
+  ("devanagariextended", "Devanagari_Extended"),
+  ("diacriticals", "Combining_Diacritical_Marks"),
+  ("diacriticalsext", "Combining_Diacritical_Marks_Extended"),
+  ("diacriticalsforsymbols", "Combining_Diacritical_Marks_For_Symbols"),
+  ("diacriticalssup", "Combining_Diacritical_Marks_Supplement"),
+  ("dingbats", "Dingbats"), ("domino", "Domino_Tiles"),
+  ("dominotiles", "Domino_Tiles"), ("duployan", "Duployan"),
+  ("earlydynasticcuneiform", "Early_Dynastic_Cuneiform"),
+  ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"), ("elbasan", "Elbasan"),
+  ("emoticons", "Emoticons"), ("enclosedalphanum", "Enclosed_Alphanumerics"),
+  ("enclosedalphanumerics", "Enclosed_Alphanumerics"),
+  ("enclosedalphanumericsupplement", "Enclosed_Alphanumeric_Supplement"),
+  ("enclosedalphanumsup", "Enclosed_Alphanumeric_Supplement"),
+  ("enclosedcjk", "Enclosed_CJK_Letters_And_Months"),
+  ("enclosedcjklettersandmonths", "Enclosed_CJK_Letters_And_Months"),
+  ("enclosedideographicsup", "Enclosed_Ideographic_Supplement"),
+  ("enclosedideographicsupplement", "Enclosed_Ideographic_Supplement"),
+  ("ethiopic", "Ethiopic"), ("ethiopicext", "Ethiopic_Extended"),
+  ("ethiopicexta", "Ethiopic_Extended_A"),
+  ("ethiopicextended", "Ethiopic_Extended"),
+  ("ethiopicextendeda", "Ethiopic_Extended_A"),
+  ("ethiopicsup", "Ethiopic_Supplement"),
+  ("ethiopicsupplement", "Ethiopic_Supplement"),
+  ("generalpunctuation", "General_Punctuation"),
+  ("geometricshapes", "Geometric_Shapes"),
+  ("geometricshapesext", "Geometric_Shapes_Extended"),
+  ("geometricshapesextended", "Geometric_Shapes_Extended"),
+  ("georgian", "Georgian"), ("georgiansup", "Georgian_Supplement"),
+  ("georgiansupplement", "Georgian_Supplement"), ("glagolitic", "Glagolitic"),
+  ("glagoliticsup", "Glagolitic_Supplement"),
+  ("glagoliticsupplement", "Glagolitic_Supplement"), ("gothic", "Gothic"),
+  ("grantha", "Grantha"), ("greek", "Greek_And_Coptic"),
+  ("greekandcoptic", "Greek_And_Coptic"), ("greekext", "Greek_Extended"),
+  ("greekextended", "Greek_Extended"), ("gujarati", "Gujarati"),
+  ("gurmukhi", "Gurmukhi"),
+  ("halfandfullforms", "Halfwidth_And_Fullwidth_Forms"),
+  ("halfmarks", "Combining_Half_Marks"),
+  ("halfwidthandfullwidthforms", "Halfwidth_And_Fullwidth_Forms"),
+  ("hangul", "Hangul_Syllables"),
+  ("hangulcompatibilityjamo", "Hangul_Compatibility_Jamo"),
+  ("hanguljamo", "Hangul_Jamo"),
+  ("hanguljamoextendeda", "Hangul_Jamo_Extended_A"),
+  ("hanguljamoextendedb", "Hangul_Jamo_Extended_B"),
+  ("hangulsyllables", "Hangul_Syllables"), ("hanunoo", "Hanunoo"),
+  ("hatran", "Hatran"), ("hebrew", "Hebrew"),
+  ("highprivateusesurrogates", "High_Private_Use_Surrogates"),
+  ("highpusurrogates", "High_Private_Use_Surrogates"),
+  ("highsurrogates", "High_Surrogates"), ("hiragana", "Hiragana"),
+  ("idc", "Ideographic_Description_Characters"),
+  ("ideographicdescriptioncharacters", "Ideographic_Description_Characters"),
+  ("ideographicsymbols", "Ideographic_Symbols_And_Punctuation"),
+  ("ideographicsymbolsandpunctuation", "Ideographic_Symbols_And_Punctuation"),
+  ("imperialaramaic", "Imperial_Aramaic"),
+  ("indicnumberforms", "Common_Indic_Number_Forms"),
+  ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+  ("inscriptionalparthian", "Inscriptional_Parthian"),
+  ("ipaext", "IPA_Extensions"), ("ipaextensions", "IPA_Extensions"),
+  ("jamo", "Hangul_Jamo"), ("jamoexta", "Hangul_Jamo_Extended_A"),
+  ("jamoextb", "Hangul_Jamo_Extended_B"), ("javanese", "Javanese"),
+  ("kaithi", "Kaithi"), ("kanaexta", "Kana_Extended_A"),
+  ("kanaextendeda", "Kana_Extended_A"), ("kanasup", "Kana_Supplement"),
+  ("kanasupplement", "Kana_Supplement"), ("kanbun", "Kanbun"),
+  ("kangxi", "Kangxi_Radicals"), ("kangxiradicals", "Kangxi_Radicals"),
+  ("kannada", "Kannada"), ("katakana", "Katakana"),
+  ("katakanaext", "Katakana_Phonetic_Extensions"),
+  ("katakanaphoneticextensions", "Katakana_Phonetic_Extensions"),
+  ("kayahli", "Kayah_Li"), ("kharoshthi", "Kharoshthi"), ("khmer", "Khmer"),
+  ("khmersymbols", "Khmer_Symbols"), ("khojki", "Khojki"),
+  ("khudawadi", "Khudawadi"), ("lao", "Lao"),
+  ("latin1", "Latin_1_Supplement"), ("latin1sup", "Latin_1_Supplement"),
+  ("latin1supplement", "Latin_1_Supplement"),
+  ("latinexta", "Latin_Extended_A"),
+  ("latinextadditional", "Latin_Extended_Additional"),
+  ("latinextb", "Latin_Extended_B"), ("latinextc", "Latin_Extended_C"),
+  ("latinextd", "Latin_Extended_D"), ("latinexte", "Latin_Extended_E"),
+  ("latinextendeda", "Latin_Extended_A"),
+  ("latinextendedadditional", "Latin_Extended_Additional"),
+  ("latinextendedb", "Latin_Extended_B"),
+  ("latinextendedc", "Latin_Extended_C"),
+  ("latinextendedd", "Latin_Extended_D"),
+  ("latinextendede", "Latin_Extended_E"), ("lepcha", "Lepcha"),
+  ("letterlikesymbols", "Letterlike_Symbols"), ("limbu", "Limbu"),
+  ("lineara", "Linear_A"), ("linearbideograms", "Linear_B_Ideograms"),
+  ("linearbsyllabary", "Linear_B_Syllabary"), ("lisu", "Lisu"),
+  ("lowsurrogates", "Low_Surrogates"), ("lycian", "Lycian"),
+  ("lydian", "Lydian"), ("mahajani", "Mahajani"),
+  ("mahjong", "Mahjong_Tiles"), ("mahjongtiles", "Mahjong_Tiles"),
+  ("malayalam", "Malayalam"), ("mandaic", "Mandaic"),
+  ("manichaean", "Manichaean"), ("marchen", "Marchen"),
+  ("masaramgondi", "Masaram_Gondi"),
+  ("mathalphanum", "Mathematical_Alphanumeric_Symbols"),
+  ("mathematicalalphanumericsymbols", "Mathematical_Alphanumeric_Symbols"),
+  ("mathematicaloperators", "Mathematical_Operators"),
+  ("mathoperators", "Mathematical_Operators"),
+  ("meeteimayek", "Meetei_Mayek"),
+  ("meeteimayekext", "Meetei_Mayek_Extensions"),
+  ("meeteimayekextensions", "Meetei_Mayek_Extensions"),
+  ("mendekikakui", "Mende_Kikakui"), ("meroiticcursive", "Meroitic_Cursive"),
+  ("meroitichieroglyphs", "Meroitic_Hieroglyphs"), ("miao", "Miao"),
+  ("miscarrows", "Miscellaneous_Symbols_And_Arrows"),
+  ("miscellaneousmathematicalsymbolsa", "Miscellaneous_Mathematical_Symbols_A"),
+  ("miscellaneousmathematicalsymbolsb", "Miscellaneous_Mathematical_Symbols_B"),
+  ("miscellaneoussymbols", "Miscellaneous_Symbols"),
+  ("miscellaneoussymbolsandarrows", "Miscellaneous_Symbols_And_Arrows"),
+  ("miscellaneoussymbolsandpictographs", "Miscellaneous_Symbols_And_Pictographs"),
+  ("miscellaneoustechnical", "Miscellaneous_Technical"),
+  ("miscmathsymbolsa", "Miscellaneous_Mathematical_Symbols_A"),
+  ("miscmathsymbolsb", "Miscellaneous_Mathematical_Symbols_B"),
+  ("miscpictographs", "Miscellaneous_Symbols_And_Pictographs"),
+  ("miscsymbols", "Miscellaneous_Symbols"),
+  ("misctechnical", "Miscellaneous_Technical"), ("modi", "Modi"),
+  ("modifierletters", "Spacing_Modifier_Letters"),
+  ("modifiertoneletters", "Modifier_Tone_Letters"),
+  ("mongolian", "Mongolian"), ("mongoliansup", "Mongolian_Supplement"),
+  ("mongoliansupplement", "Mongolian_Supplement"), ("mro", "Mro"),
+  ("multani", "Multani"), ("music", "Musical_Symbols"),
+  ("musicalsymbols", "Musical_Symbols"), ("myanmar", "Myanmar"),
+  ("myanmarexta", "Myanmar_Extended_A"),
+  ("myanmarextb", "Myanmar_Extended_B"),
+  ("myanmarextendeda", "Myanmar_Extended_A"),
+  ("myanmarextendedb", "Myanmar_Extended_B"), ("nabataean", "Nabataean"),
+  ("nb", "No_Block"), ("newa", "Newa"), ("newtailue", "New_Tai_Lue"),
+  ("nko", "NKo"), ("noblock", "No_Block"), ("numberforms", "Number_Forms"),
+  ("nushu", "Nushu"), ("ocr", "Optical_Character_Recognition"),
+  ("ogham", "Ogham"), ("olchiki", "Ol_Chiki"),
+  ("oldhungarian", "Old_Hungarian"), ("olditalic", "Old_Italic"),
+  ("oldnortharabian", "Old_North_Arabian"), ("oldpermic", "Old_Permic"),
+  ("oldpersian", "Old_Persian"), ("oldsoutharabian", "Old_South_Arabian"),
+  ("oldturkic", "Old_Turkic"),
+  ("opticalcharacterrecognition", "Optical_Character_Recognition"),
+  ("oriya", "Oriya"), ("ornamentaldingbats", "Ornamental_Dingbats"),
+  ("osage", "Osage"), ("osmanya", "Osmanya"), ("pahawhhmong", "Pahawh_Hmong"),
+  ("palmyrene", "Palmyrene"), ("paucinhau", "Pau_Cin_Hau"),
+  ("phagspa", "Phags_Pa"), ("phaistos", "Phaistos_Disc"),
+  ("phaistosdisc", "Phaistos_Disc"), ("phoenician", "Phoenician"),
+  ("phoneticext", "Phonetic_Extensions"),
+  ("phoneticextensions", "Phonetic_Extensions"),
+  ("phoneticextensionssupplement", "Phonetic_Extensions_Supplement"),
+  ("phoneticextsup", "Phonetic_Extensions_Supplement"),
+  ("playingcards", "Playing_Cards"), ("privateuse", "Private_Use_Area"),
+  ("privateusearea", "Private_Use_Area"),
+  ("psalterpahlavi", "Psalter_Pahlavi"), ("pua", "Private_Use_Area"),
+  ("punctuation", "General_Punctuation"), ("rejang", "Rejang"),
+  ("rumi", "Rumi_Numeral_Symbols"),
+  ("ruminumeralsymbols", "Rumi_Numeral_Symbols"), ("runic", "Runic"),
+  ("samaritan", "Samaritan"), ("saurashtra", "Saurashtra"),
+  ("sharada", "Sharada"), ("shavian", "Shavian"),
+  ("shorthandformatcontrols", "Shorthand_Format_Controls"),
+  ("siddham", "Siddham"), ("sinhala", "Sinhala"),
+  ("sinhalaarchaicnumbers", "Sinhala_Archaic_Numbers"),
+  ("smallforms", "Small_Form_Variants"),
+  ("smallformvariants", "Small_Form_Variants"),
+  ("sorasompeng", "Sora_Sompeng"), ("soyombo", "Soyombo"),
+  ("spacingmodifierletters", "Spacing_Modifier_Letters"),
+  ("specials", "Specials"), ("sundanese", "Sundanese"),
+  ("sundanesesup", "Sundanese_Supplement"),
+  ("sundanesesupplement", "Sundanese_Supplement"),
+  ("suparrowsa", "Supplemental_Arrows_A"),
+  ("suparrowsb", "Supplemental_Arrows_B"),
+  ("suparrowsc", "Supplemental_Arrows_C"),
+  ("superandsub", "Superscripts_And_Subscripts"),
+  ("superscriptsandsubscripts", "Superscripts_And_Subscripts"),
+  ("supmathoperators", "Supplemental_Mathematical_Operators"),
+  ("supplementalarrowsa", "Supplemental_Arrows_A"),
+  ("supplementalarrowsb", "Supplemental_Arrows_B"),
+  ("supplementalarrowsc", "Supplemental_Arrows_C"),
+  ("supplementalmathematicaloperators", "Supplemental_Mathematical_Operators"),
+  ("supplementalpunctuation", "Supplemental_Punctuation"),
+  ("supplementalsymbolsandpictographs", "Supplemental_Symbols_And_Pictographs"),
+  ("supplementaryprivateuseareaa", "Supplementary_Private_Use_Area_A"),
+  ("supplementaryprivateuseareab", "Supplementary_Private_Use_Area_B"),
+  ("suppuaa", "Supplementary_Private_Use_Area_A"),
+  ("suppuab", "Supplementary_Private_Use_Area_B"),
+  ("suppunctuation", "Supplemental_Punctuation"),
+  ("supsymbolsandpictographs", "Supplemental_Symbols_And_Pictographs"),
+  ("suttonsignwriting", "Sutton_SignWriting"),
+  ("sylotinagri", "Syloti_Nagri"), ("syriac", "Syriac"),
+  ("syriacsup", "Syriac_Supplement"),
+  ("syriacsupplement", "Syriac_Supplement"), ("tagalog", "Tagalog"),
+  ("tagbanwa", "Tagbanwa"), ("tags", "Tags"), ("taile", "Tai_Le"),
+  ("taitham", "Tai_Tham"), ("taiviet", "Tai_Viet"),
+  ("taixuanjing", "Tai_Xuan_Jing_Symbols"),
+  ("taixuanjingsymbols", "Tai_Xuan_Jing_Symbols"), ("takri", "Takri"),
+  ("tamil", "Tamil"), ("tangut", "Tangut"),
+  ("tangutcomponents", "Tangut_Components"), ("telugu", "Telugu"),
+  ("thaana", "Thaana"), ("thai", "Thai"), ("tibetan", "Tibetan"),
+  ("tifinagh", "Tifinagh"), ("tirhuta", "Tirhuta"),
+  ("transportandmap", "Transport_And_Map_Symbols"),
+  ("transportandmapsymbols", "Transport_And_Map_Symbols"),
+  ("ucas", "Unified_Canadian_Aboriginal_Syllabics"),
+  ("ucasext", "Unified_Canadian_Aboriginal_Syllabics_Extended"),
+  ("ugaritic", "Ugaritic"),
+  ("unifiedcanadianaboriginalsyllabics", "Unified_Canadian_Aboriginal_Syllabics"),
+  ("unifiedcanadianaboriginalsyllabicsextended", "Unified_Canadian_Aboriginal_Syllabics_Extended"),
+  ("vai", "Vai"), ("variationselectors", "Variation_Selectors"),
+  ("variationselectorssupplement", "Variation_Selectors_Supplement"),
+  ("vedicext", "Vedic_Extensions"), ("vedicextensions", "Vedic_Extensions"),
+  ("verticalforms", "Vertical_Forms"), ("vs", "Variation_Selectors"),
+  ("vssup", "Variation_Selectors_Supplement"), ("warangciti", "Warang_Citi"),
+  ("yijing", "Yijing_Hexagram_Symbols"),
+  ("yijinghexagramsymbols", "Yijing_Hexagram_Symbols"),
+  ("yiradicals", "Yi_Radicals"), ("yisyllables", "Yi_Syllables"),
+  ("zanabazarsquare", "Zanabazar_Square"), ]),
+
+  ("Canonical_Combining_Class", &[("", "Iota_Subscript"),
+  ("0", "Not_Reordered"), ("1", "Overlay"), ("10", "CCC10"),
+  ("103", "CCC103"), ("107", "CCC107"), ("11", "CCC11"), ("118", "CCC118"),
+  ("12", "CCC12"), ("122", "CCC122"), ("129", "CCC129"), ("13", "CCC13"),
+  ("130", "CCC130"), ("132", "CCC132"), ("133", "CCC133"), ("14", "CCC14"),
+  ("15", "CCC15"), ("16", "CCC16"), ("17", "CCC17"), ("18", "CCC18"),
+  ("19", "CCC19"), ("20", "CCC20"), ("200", "Attached_Below_Left"),
+  ("202", "Attached_Below"), ("21", "CCC21"), ("214", "Attached_Above"),
+  ("216", "Attached_Above_Right"), ("218", "Below_Left"), ("22", "CCC22"),
+  ("220", "Below"), ("222", "Below_Right"), ("224", "Left"), ("226", "Right"),
+  ("228", "Above_Left"), ("23", "CCC23"), ("230", "Above"),
+  ("232", "Above_Right"), ("233", "Double_Below"), ("234", "Double_Above"),
+  ("24", "CCC24"), ("240", "Iota_Subscript"), ("25", "CCC25"),
+  ("26", "CCC26"), ("27", "CCC27"), ("28", "CCC28"), ("29", "CCC29"),
+  ("30", "CCC30"), ("31", "CCC31"), ("32", "CCC32"), ("33", "CCC33"),
+  ("34", "CCC34"), ("35", "CCC35"), ("36", "CCC36"), ("7", "Nukta"),
+  ("8", "Kana_Voicing"), ("84", "CCC84"), ("9", "Virama"), ("91", "CCC91"),
+  ("a", "Above"), ("above", "Above"), ("aboveleft", "Above_Left"),
+  ("aboveright", "Above_Right"), ("al", "Above_Left"), ("ar", "Above_Right"),
+  ("ata", "Attached_Above"), ("atar", "Attached_Above_Right"),
+  ("atb", "Attached_Below"), ("atbl", "Attached_Below_Left"),
+  ("attachedabove", "Attached_Above"),
+  ("attachedaboveright", "Attached_Above_Right"),
+  ("attachedbelow", "Attached_Below"),
+  ("attachedbelowleft", "Attached_Below_Left"), ("b", "Below"),
+  ("below", "Below"), ("belowleft", "Below_Left"),
+  ("belowright", "Below_Right"), ("bl", "Below_Left"), ("br", "Below_Right"),
+  ("ccc10", "CCC10"), ("ccc103", "CCC103"), ("ccc107", "CCC107"),
+  ("ccc11", "CCC11"), ("ccc118", "CCC118"), ("ccc12", "CCC12"),
+  ("ccc122", "CCC122"), ("ccc129", "CCC129"), ("ccc13", "CCC13"),
+  ("ccc130", "CCC130"), ("ccc132", "CCC132"), ("ccc133", "CCC133"),
+  ("ccc14", "CCC14"), ("ccc15", "CCC15"), ("ccc16", "CCC16"),
+  ("ccc17", "CCC17"), ("ccc18", "CCC18"), ("ccc19", "CCC19"),
+  ("ccc20", "CCC20"), ("ccc21", "CCC21"), ("ccc22", "CCC22"),
+  ("ccc23", "CCC23"), ("ccc24", "CCC24"), ("ccc25", "CCC25"),
+  ("ccc26", "CCC26"), ("ccc27", "CCC27"), ("ccc28", "CCC28"),
+  ("ccc29", "CCC29"), ("ccc30", "CCC30"), ("ccc31", "CCC31"),
+  ("ccc32", "CCC32"), ("ccc33", "CCC33"), ("ccc34", "CCC34"),
+  ("ccc35", "CCC35"), ("ccc36", "CCC36"), ("ccc84", "CCC84"),
+  ("ccc91", "CCC91"), ("da", "Double_Above"), ("db", "Double_Below"),
+  ("doubleabove", "Double_Above"), ("doublebelow", "Double_Below"),
+  ("iotasubscript", "Iota_Subscript"), ("kanavoicing", "Kana_Voicing"),
+  ("kv", "Kana_Voicing"), ("l", "Left"), ("left", "Left"), ("nk", "Nukta"),
+  ("notreordered", "Not_Reordered"), ("nr", "Not_Reordered"),
+  ("nukta", "Nukta"), ("ov", "Overlay"), ("overlay", "Overlay"),
+  ("r", "Right"), ("right", "Right"), ("virama", "Virama"), ("vr", "Virama"),
+  ]),
+
+  ("Case_Ignorable", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Cased", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Changes_When_Casefolded", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Changes_When_Casemapped", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Changes_When_Lowercased", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Changes_When_NFKC_Casefolded", &[("f", "No"), ("false", "No"),
+  ("n", "No"), ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"),
+  ("yes", "Yes"), ]),
+
+  ("Changes_When_Titlecased", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Changes_When_Uppercased", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Composition_Exclusion", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Dash", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Decomposition_Type", &[("can", "Canonical"), ("canonical", "Canonical"),
+  ("circle", "Circle"), ("com", "Compat"), ("compat", "Compat"),
+  ("enc", "Circle"), ("fin", "Final"), ("final", "Final"), ("font", "Font"),
+  ("fra", "Fraction"), ("fraction", "Fraction"), ("init", "Initial"),
+  ("initial", "Initial"), ("med", "Medial"), ("medial", "Medial"),
+  ("nar", "Narrow"), ("narrow", "Narrow"), ("nb", "Nobreak"),
+  ("nobreak", "Nobreak"), ("none", "None"), ("o", "Isolated"),
+  ("olated", "Isolated"), ("small", "Small"), ("sml", "Small"),
+  ("sqr", "Square"), ("square", "Square"), ("sub", "Sub"), ("sup", "Super"),
+  ("super", "Super"), ("vert", "Vertical"), ("vertical", "Vertical"),
+  ("wide", "Wide"), ]),
+
+  ("Default_Ignorable_Code_Point", &[("f", "No"), ("false", "No"),
+  ("n", "No"), ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"),
+  ("yes", "Yes"), ]),
+
+  ("Deprecated", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Diacritic", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("East_Asian_Width", &[("a", "Ambiguous"), ("ambiguous", "Ambiguous"),
+  ("f", "Fullwidth"), ("fullwidth", "Fullwidth"), ("h", "Halfwidth"),
+  ("halfwidth", "Halfwidth"), ("n", "Neutral"), ("na", "Narrow"),
+  ("narrow", "Narrow"), ("neutral", "Neutral"), ("w", "Wide"),
+  ("wide", "Wide"), ]),
+
+  ("Expands_On_NFC", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Expands_On_NFD", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Expands_On_NFKC", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Expands_On_NFKD", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Extender", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Full_Composition_Exclusion", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("General_Category", &[("c", "Other"), ("casedletter", "Cased_Letter"),
+  ("cc", "Control"), ("cf", "Format"),
+  ("closepunctuation", "Close_Punctuation"), ("cn", "Unassigned"),
+  ("cntrl", "Control"), ("co", "Private_Use"), ("combiningmark", "Mark"),
+  ("connectorpunctuation", "Connector_Punctuation"), ("control", "Control"),
+  ("cs", "Surrogate"), ("currencysymbol", "Currency_Symbol"),
+  ("dashpunctuation", "Dash_Punctuation"),
+  ("decimalnumber", "Decimal_Number"), ("digit", "Decimal_Number"),
+  ("enclosingmark", "Enclosing_Mark"),
+  ("finalpunctuation", "Final_Punctuation"), ("format", "Format"),
+  ("initialpunctuation", "Initial_Punctuation"), ("l", "Letter"),
+  ("lc", "Cased_Letter"), ("letter", "Letter"),
+  ("letternumber", "Letter_Number"), ("lineseparator", "Line_Separator"),
+  ("ll", "Lowercase_Letter"), ("lm", "Modifier_Letter"),
+  ("lo", "Other_Letter"), ("lowercaseletter", "Lowercase_Letter"),
+  ("lt", "Titlecase_Letter"), ("lu", "Uppercase_Letter"), ("m", "Mark"),
+  ("mark", "Mark"), ("mathsymbol", "Math_Symbol"), ("mc", "Spacing_Mark"),
+  ("me", "Enclosing_Mark"), ("mn", "Nonspacing_Mark"),
+  ("modifierletter", "Modifier_Letter"),
+  ("modifiersymbol", "Modifier_Symbol"), ("n", "Number"),
+  ("nd", "Decimal_Number"), ("nl", "Letter_Number"), ("no", "Other_Number"),
+  ("nonspacingmark", "Nonspacing_Mark"), ("number", "Number"),
+  ("openpunctuation", "Open_Punctuation"), ("other", "Other"),
+  ("otherletter", "Other_Letter"), ("othernumber", "Other_Number"),
+  ("otherpunctuation", "Other_Punctuation"), ("othersymbol", "Other_Symbol"),
+  ("p", "Punctuation"), ("paragraphseparator", "Paragraph_Separator"),
+  ("pc", "Connector_Punctuation"), ("pd", "Dash_Punctuation"),
+  ("pe", "Close_Punctuation"), ("pf", "Final_Punctuation"),
+  ("pi", "Initial_Punctuation"), ("po", "Other_Punctuation"),
+  ("privateuse", "Private_Use"), ("ps", "Open_Punctuation"),
+  ("punct", "Punctuation"), ("punctuation", "Punctuation"), ("s", "Symbol"),
+  ("sc", "Currency_Symbol"), ("separator", "Separator"),
+  ("sk", "Modifier_Symbol"), ("sm", "Math_Symbol"), ("so", "Other_Symbol"),
+  ("spaceseparator", "Space_Separator"), ("spacingmark", "Spacing_Mark"),
+  ("surrogate", "Surrogate"), ("symbol", "Symbol"),
+  ("titlecaseletter", "Titlecase_Letter"), ("unassigned", "Unassigned"),
+  ("uppercaseletter", "Uppercase_Letter"), ("z", "Separator"),
+  ("zl", "Line_Separator"), ("zp", "Paragraph_Separator"),
+  ("zs", "Space_Separator"), ]),
+
+  ("Grapheme_Base", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Grapheme_Cluster_Break", &[("cn", "Control"), ("control", "Control"),
+  ("cr", "CR"), ("eb", "E_Base"), ("ebase", "E_Base"),
+  ("ebasegaz", "E_Base_GAZ"), ("ebg", "E_Base_GAZ"), ("em", "E_Modifier"),
+  ("emodifier", "E_Modifier"), ("ex", "Extend"), ("extend", "Extend"),
+  ("gaz", "Glue_After_Zwj"), ("glueafterzwj", "Glue_After_Zwj"), ("l", "L"),
+  ("lf", "LF"), ("lv", "LV"), ("lvt", "LVT"), ("other", "Other"),
+  ("pp", "Prepend"), ("prepend", "Prepend"),
+  ("regionalindicator", "Regional_Indicator"), ("ri", "Regional_Indicator"),
+  ("sm", "SpacingMark"), ("spacingmark", "SpacingMark"), ("t", "T"),
+  ("v", "V"), ("xx", "Other"), ("zwj", "ZWJ"), ]),
+
+  ("Grapheme_Extend", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Grapheme_Link", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Hangul_Syllable_Type", &[("l", "Leading_Jamo"),
+  ("leadingjamo", "Leading_Jamo"), ("lv", "LV_Syllable"),
+  ("lvsyllable", "LV_Syllable"), ("lvt", "LVT_Syllable"),
+  ("lvtsyllable", "LVT_Syllable"), ("na", "Not_Applicable"),
+  ("notapplicable", "Not_Applicable"), ("t", "Trailing_Jamo"),
+  ("trailingjamo", "Trailing_Jamo"), ("v", "Vowel_Jamo"),
+  ("voweljamo", "Vowel_Jamo"), ]),
+
+  ("Hex_Digit", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Hyphen", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("IDS_Binary_Operator", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("IDS_Trinary_Operator", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("ID_Continue", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("ID_Start", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Ideographic", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Indic_Positional_Category", &[("bottom", "Bottom"),
+  ("bottomandleft", "Bottom_And_Left"),
+  ("bottomandright", "Bottom_And_Right"), ("left", "Left"),
+  ("leftandright", "Left_And_Right"), ("na", "NA"),
+  ("overstruck", "Overstruck"), ("right", "Right"), ("top", "Top"),
+  ("topandbottom", "Top_And_Bottom"),
+  ("topandbottomandright", "Top_And_Bottom_And_Right"),
+  ("topandleft", "Top_And_Left"),
+  ("topandleftandright", "Top_And_Left_And_Right"),
+  ("topandright", "Top_And_Right"), ("visualorderleft", "Visual_Order_Left"),
+  ]),
+
+  ("Indic_Syllabic_Category", &[("avagraha", "Avagraha"), ("bindu", "Bindu"),
+  ("brahmijoiningnumber", "Brahmi_Joining_Number"),
+  ("cantillationmark", "Cantillation_Mark"), ("consonant", "Consonant"),
+  ("consonantdead", "Consonant_Dead"), ("consonantfinal", "Consonant_Final"),
+  ("consonantheadletter", "Consonant_Head_Letter"),
+  ("consonantkiller", "Consonant_Killer"),
+  ("consonantmedial", "Consonant_Medial"),
+  ("consonantplaceholder", "Consonant_Placeholder"),
+  ("consonantprecedingrepha", "Consonant_Preceding_Repha"),
+  ("consonantprefixed", "Consonant_Prefixed"),
+  ("consonantsubjoined", "Consonant_Subjoined"),
+  ("consonantsucceedingrepha", "Consonant_Succeeding_Repha"),
+  ("consonantwithstacker", "Consonant_With_Stacker"),
+  ("geminationmark", "Gemination_Mark"),
+  ("invisiblestacker", "Invisible_Stacker"), ("joiner", "Joiner"),
+  ("modifyingletter", "Modifying_Letter"), ("nonjoiner", "Non_Joiner"),
+  ("nukta", "Nukta"), ("number", "Number"), ("numberjoiner", "Number_Joiner"),
+  ("other", "Other"), ("purekiller", "Pure_Killer"),
+  ("registershifter", "Register_Shifter"),
+  ("syllablemodifier", "Syllable_Modifier"), ("toneletter", "Tone_Letter"),
+  ("tonemark", "Tone_Mark"), ("virama", "Virama"), ("visarga", "Visarga"),
+  ("vowel", "Vowel"), ("voweldependent", "Vowel_Dependent"),
+  ("vowelindependent", "Vowel_Independent"), ]),
+
+  ("Jamo_Short_Name", &[("a", "A"), ("ae", "AE"), ("b", "B"), ("bb", "BB"),
+  ("bs", "BS"), ("c", "C"), ("d", "D"), ("dd", "DD"), ("e", "E"),
+  ("eo", "EO"), ("eu", "EU"), ("g", "G"), ("gg", "GG"), ("gs", "GS"),
+  ("h", "H"), ("i", "I"), ("j", "J"), ("jj", "JJ"), ("k", "K"), ("l", "L"),
+  ("lb", "LB"), ("lg", "LG"), ("lh", "LH"), ("lm", "LM"), ("lp", "LP"),
+  ("ls", "LS"), ("lt", "LT"), ("m", "M"), ("n", "N"), ("ng", "NG"),
+  ("nh", "NH"), ("nj", "NJ"), ("o", "O"), ("oe", "OE"), ("p", "P"),
+  ("r", "R"), ("s", "S"), ("ss", "SS"), ("t", "T"), ("u", "U"), ("wa", "WA"),
+  ("wae", "WAE"), ("we", "WE"), ("weo", "WEO"), ("wi", "WI"), ("ya", "YA"),
+  ("yae", "YAE"), ("ye", "YE"), ("yeo", "YEO"), ("yi", "YI"), ("yo", "YO"),
+  ("yu", "YU"), ]),
+
+  ("Join_Control", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Joining_Group", &[("africanfeh", "African_Feh"),
+  ("africannoon", "African_Noon"), ("africanqaf", "African_Qaf"),
+  ("ain", "Ain"), ("alaph", "Alaph"), ("alef", "Alef"), ("beh", "Beh"),
+  ("beth", "Beth"), ("burushaskiyehbarree", "Burushaski_Yeh_Barree"),
+  ("dal", "Dal"), ("dalathrish", "Dalath_Rish"), ("e", "E"),
+  ("farsiyeh", "Farsi_Yeh"), ("fe", "Fe"), ("feh", "Feh"),
+  ("finalsemkath", "Final_Semkath"), ("gaf", "Gaf"), ("gamal", "Gamal"),
+  ("hah", "Hah"), ("hamzaonhehgoal", "Hamza_On_Heh_Goal"), ("he", "He"),
+  ("heh", "Heh"), ("hehgoal", "Heh_Goal"), ("heth", "Heth"), ("kaf", "Kaf"),
+  ("kaph", "Kaph"), ("khaph", "Khaph"), ("knottedheh", "Knotted_Heh"),
+  ("lam", "Lam"), ("lamadh", "Lamadh"), ("malayalambha", "Malayalam_Bha"),
+  ("malayalamja", "Malayalam_Ja"), ("malayalamlla", "Malayalam_Lla"),
+  ("malayalamllla", "Malayalam_Llla"), ("malayalamnga", "Malayalam_Nga"),
+  ("malayalamnna", "Malayalam_Nna"), ("malayalamnnna", "Malayalam_Nnna"),
+  ("malayalamnya", "Malayalam_Nya"), ("malayalamra", "Malayalam_Ra"),
+  ("malayalamssa", "Malayalam_Ssa"), ("malayalamtta", "Malayalam_Tta"),
+  ("manichaeanaleph", "Manichaean_Aleph"),
+  ("manichaeanayin", "Manichaean_Ayin"),
+  ("manichaeanbeth", "Manichaean_Beth"),
+  ("manichaeandaleth", "Manichaean_Daleth"),
+  ("manichaeandhamedh", "Manichaean_Dhamedh"),
+  ("manichaeanfive", "Manichaean_Five"),
+  ("manichaeangimel", "Manichaean_Gimel"),
+  ("manichaeanheth", "Manichaean_Heth"),
+  ("manichaeanhundred", "Manichaean_Hundred"),
+  ("manichaeankaph", "Manichaean_Kaph"),
+  ("manichaeanlamedh", "Manichaean_Lamedh"),
+  ("manichaeanmem", "Manichaean_Mem"), ("manichaeannun", "Manichaean_Nun"),
+  ("manichaeanone", "Manichaean_One"), ("manichaeanpe", "Manichaean_Pe"),
+  ("manichaeanqoph", "Manichaean_Qoph"),
+  ("manichaeanresh", "Manichaean_Resh"),
+  ("manichaeansadhe", "Manichaean_Sadhe"),
+  ("manichaeansamekh", "Manichaean_Samekh"),
+  ("manichaeantaw", "Manichaean_Taw"), ("manichaeanten", "Manichaean_Ten"),
+  ("manichaeanteth", "Manichaean_Teth"),
+  ("manichaeanthamedh", "Manichaean_Thamedh"),
+  ("manichaeantwenty", "Manichaean_Twenty"),
+  ("manichaeanwaw", "Manichaean_Waw"), ("manichaeanyodh", "Manichaean_Yodh"),
+  ("manichaeanzayin", "Manichaean_Zayin"), ("meem", "Meem"), ("mim", "Mim"),
+  ("nojoininggroup", "No_Joining_Group"), ("noon", "Noon"), ("nun", "Nun"),
+  ("nya", "Nya"), ("pe", "Pe"), ("qaf", "Qaf"), ("qaph", "Qaph"),
+  ("reh", "Reh"), ("reversedpe", "Reversed_Pe"),
+  ("rohingyayeh", "Rohingya_Yeh"), ("sad", "Sad"), ("sadhe", "Sadhe"),
+  ("seen", "Seen"), ("semkath", "Semkath"), ("shin", "Shin"),
+  ("straightwaw", "Straight_Waw"), ("swashkaf", "Swash_Kaf"),
+  ("syriacwaw", "Syriac_Waw"), ("tah", "Tah"), ("taw", "Taw"),
+  ("tehmarbuta", "Teh_Marbuta"), ("tehmarbutagoal", "Hamza_On_Heh_Goal"),
+  ("teth", "Teth"), ("waw", "Waw"), ("yeh", "Yeh"),
+  ("yehbarree", "Yeh_Barree"), ("yehwithtail", "Yeh_With_Tail"),
+  ("yudh", "Yudh"), ("yudhhe", "Yudh_He"), ("zain", "Zain"),
+  ("zhain", "Zhain"), ]),
+
+  ("Joining_Type", &[("c", "Join_Causing"), ("d", "Dual_Joining"),
+  ("dualjoining", "Dual_Joining"), ("joincausing", "Join_Causing"),
+  ("l", "Left_Joining"), ("leftjoining", "Left_Joining"),
+  ("nonjoining", "Non_Joining"), ("r", "Right_Joining"),
+  ("rightjoining", "Right_Joining"), ("t", "Transparent"),
+  ("transparent", "Transparent"), ("u", "Non_Joining"), ]),
+
+  ("Line_Break", &[("", "Infix_Numeric"), ("ai", "Ambiguous"),
+  ("al", "Alphabetic"), ("alphabetic", "Alphabetic"),
+  ("ambiguous", "Ambiguous"), ("b2", "Break_Both"), ("ba", "Break_After"),
+  ("bb", "Break_Before"), ("bk", "Mandatory_Break"),
+  ("breakafter", "Break_After"), ("breakbefore", "Break_Before"),
+  ("breakboth", "Break_Both"), ("breaksymbols", "Break_Symbols"),
+  ("carriagereturn", "Carriage_Return"), ("cb", "Contingent_Break"),
+  ("cj", "Conditional_Japanese_Starter"), ("cl", "Close_Punctuation"),
+  ("closeparenthesis", "Close_Parenthesis"),
+  ("closepunctuation", "Close_Punctuation"), ("cm", "Combining_Mark"),
+  ("combiningmark", "Combining_Mark"), ("complexcontext", "Complex_Context"),
+  ("conditionaljapanesestarter", "Conditional_Japanese_Starter"),
+  ("contingentbreak", "Contingent_Break"), ("cp", "Close_Parenthesis"),
+  ("cr", "Carriage_Return"), ("eb", "E_Base"), ("ebase", "E_Base"),
+  ("em", "E_Modifier"), ("emodifier", "E_Modifier"), ("ex", "Exclamation"),
+  ("exclamation", "Exclamation"), ("gl", "Glue"), ("glue", "Glue"),
+  ("h2", "H2"), ("h3", "H3"), ("hebrewletter", "Hebrew_Letter"),
+  ("hl", "Hebrew_Letter"), ("hy", "Hyphen"), ("hyphen", "Hyphen"),
+  ("id", "Ideographic"), ("ideographic", "Ideographic"),
+  ("in", "Inseparable"), ("infixnumeric", "Infix_Numeric"),
+  ("inseparable", "Inseparable"), ("inseperable", "Inseparable"),
+  ("jl", "JL"), ("jt", "JT"), ("jv", "JV"), ("lf", "Line_Feed"),
+  ("linefeed", "Line_Feed"), ("mandatorybreak", "Mandatory_Break"),
+  ("nextline", "Next_Line"), ("nl", "Next_Line"),
+  ("nonstarter", "Nonstarter"), ("ns", "Nonstarter"), ("nu", "Numeric"),
+  ("numeric", "Numeric"), ("op", "Open_Punctuation"),
+  ("openpunctuation", "Open_Punctuation"), ("po", "Postfix_Numeric"),
+  ("postfixnumeric", "Postfix_Numeric"), ("pr", "Prefix_Numeric"),
+  ("prefixnumeric", "Prefix_Numeric"), ("qu", "Quotation"),
+  ("quotation", "Quotation"), ("regionalindicator", "Regional_Indicator"),
+  ("ri", "Regional_Indicator"), ("sa", "Complex_Context"),
+  ("sg", "Surrogate"), ("sp", "Space"), ("space", "Space"),
+  ("surrogate", "Surrogate"), ("sy", "Break_Symbols"), ("unknown", "Unknown"),
+  ("wj", "Word_Joiner"), ("wordjoiner", "Word_Joiner"), ("xx", "Unknown"),
+  ("zw", "ZWSpace"), ("zwj", "ZWJ"), ("zwspace", "ZWSpace"), ]),
+
+  ("Logical_Order_Exception", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Lowercase", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Math", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("NFC_Quick_Check", &[("m", "Maybe"), ("maybe", "Maybe"), ("n", "No"),
+  ("no", "No"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("NFD_Quick_Check", &[("n", "No"), ("no", "No"), ("y", "Yes"),
+  ("yes", "Yes"), ]),
+
+  ("NFKC_Quick_Check", &[("m", "Maybe"), ("maybe", "Maybe"), ("n", "No"),
+  ("no", "No"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("NFKD_Quick_Check", &[("n", "No"), ("no", "No"), ("y", "Yes"),
+  ("yes", "Yes"), ]),
+
+  ("Noncharacter_Code_Point", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Numeric_Type", &[("de", "Decimal"), ("decimal", "Decimal"),
+  ("di", "Digit"), ("digit", "Digit"), ("none", "None"), ("nu", "Numeric"),
+  ("numeric", "Numeric"), ]),
+
+  ("Other_Alphabetic", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Other_Default_Ignorable_Code_Point", &[("f", "No"), ("false", "No"),
+  ("n", "No"), ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"),
+  ("yes", "Yes"), ]),
+
+  ("Other_Grapheme_Extend", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Other_ID_Continue", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Other_ID_Start", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Other_Lowercase", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Other_Math", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Other_Uppercase", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Pattern_Syntax", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Pattern_White_Space", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Prepended_Concatenation_Mark", &[("f", "No"), ("false", "No"),
+  ("n", "No"), ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"),
+  ("yes", "Yes"), ]),
+
+  ("Quotation_Mark", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Radical", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Regional_Indicator", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Script", &[("adlam", "Adlam"), ("adlm", "Adlam"),
+  ("aghb", "Caucasian_Albanian"), ("ahom", "Ahom"),
+  ("anatolianhieroglyphs", "Anatolian_Hieroglyphs"), ("arab", "Arabic"),
+  ("arabic", "Arabic"), ("armenian", "Armenian"),
+  ("armi", "Imperial_Aramaic"), ("armn", "Armenian"), ("avestan", "Avestan"),
+  ("avst", "Avestan"), ("bali", "Balinese"), ("balinese", "Balinese"),
+  ("bamu", "Bamum"), ("bamum", "Bamum"), ("bass", "Bassa_Vah"),
+  ("bassavah", "Bassa_Vah"), ("batak", "Batak"), ("batk", "Batak"),
+  ("beng", "Bengali"), ("bengali", "Bengali"), ("bhaiksuki", "Bhaiksuki"),
+  ("bhks", "Bhaiksuki"), ("bopo", "Bopomofo"), ("bopomofo", "Bopomofo"),
+  ("brah", "Brahmi"), ("brahmi", "Brahmi"), ("brai", "Braille"),
+  ("braille", "Braille"), ("bugi", "Buginese"), ("buginese", "Buginese"),
+  ("buhd", "Buhid"), ("buhid", "Buhid"), ("cakm", "Chakma"),
+  ("canadianaboriginal", "Canadian_Aboriginal"),
+  ("cans", "Canadian_Aboriginal"), ("cari", "Carian"), ("carian", "Carian"),
+  ("caucasianalbanian", "Caucasian_Albanian"), ("chakma", "Chakma"),
+  ("cham", "Cham"), ("cher", "Cherokee"), ("cherokee", "Cherokee"),
+  ("common", "Common"), ("copt", "Coptic"), ("coptic", "Coptic"),
+  ("cprt", "Cypriot"), ("cuneiform", "Cuneiform"), ("cypriot", "Cypriot"),
+  ("cyrillic", "Cyrillic"), ("cyrl", "Cyrillic"), ("deseret", "Deseret"),
+  ("deva", "Devanagari"), ("devanagari", "Devanagari"), ("dsrt", "Deseret"),
+  ("dupl", "Duployan"), ("duployan", "Duployan"),
+  ("egyp", "Egyptian_Hieroglyphs"),
+  ("egyptianhieroglyphs", "Egyptian_Hieroglyphs"), ("elba", "Elbasan"),
+  ("elbasan", "Elbasan"), ("ethi", "Ethiopic"), ("ethiopic", "Ethiopic"),
+  ("geor", "Georgian"), ("georgian", "Georgian"), ("glag", "Glagolitic"),
+  ("glagolitic", "Glagolitic"), ("gonm", "Masaram_Gondi"), ("goth", "Gothic"),
+  ("gothic", "Gothic"), ("gran", "Grantha"), ("grantha", "Grantha"),
+  ("greek", "Greek"), ("grek", "Greek"), ("gujarati", "Gujarati"),
+  ("gujr", "Gujarati"), ("gurmukhi", "Gurmukhi"), ("guru", "Gurmukhi"),
+  ("han", "Han"), ("hang", "Hangul"), ("hangul", "Hangul"), ("hani", "Han"),
+  ("hano", "Hanunoo"), ("hanunoo", "Hanunoo"), ("hatr", "Hatran"),
+  ("hatran", "Hatran"), ("hebr", "Hebrew"), ("hebrew", "Hebrew"),
+  ("hira", "Hiragana"), ("hiragana", "Hiragana"),
+  ("hluw", "Anatolian_Hieroglyphs"), ("hmng", "Pahawh_Hmong"),
+  ("hrkt", "Katakana_Or_Hiragana"), ("hung", "Old_Hungarian"),
+  ("imperialaramaic", "Imperial_Aramaic"), ("inherited", "Inherited"),
+  ("inscriptionalpahlavi", "Inscriptional_Pahlavi"),
+  ("inscriptionalparthian", "Inscriptional_Parthian"), ("ital", "Old_Italic"),
+  ("java", "Javanese"), ("javanese", "Javanese"), ("kaithi", "Kaithi"),
+  ("kali", "Kayah_Li"), ("kana", "Katakana"), ("kannada", "Kannada"),
+  ("katakana", "Katakana"), ("katakanaorhiragana", "Katakana_Or_Hiragana"),
+  ("kayahli", "Kayah_Li"), ("khar", "Kharoshthi"),
+  ("kharoshthi", "Kharoshthi"), ("khmer", "Khmer"), ("khmr", "Khmer"),
+  ("khoj", "Khojki"), ("khojki", "Khojki"), ("khudawadi", "Khudawadi"),
+  ("knda", "Kannada"), ("kthi", "Kaithi"), ("lana", "Tai_Tham"),
+  ("lao", "Lao"), ("laoo", "Lao"), ("latin", "Latin"), ("latn", "Latin"),
+  ("lepc", "Lepcha"), ("lepcha", "Lepcha"), ("limb", "Limbu"),
+  ("limbu", "Limbu"), ("lina", "Linear_A"), ("linb", "Linear_B"),
+  ("lineara", "Linear_A"), ("linearb", "Linear_B"), ("lisu", "Lisu"),
+  ("lyci", "Lycian"), ("lycian", "Lycian"), ("lydi", "Lydian"),
+  ("lydian", "Lydian"), ("mahajani", "Mahajani"), ("mahj", "Mahajani"),
+  ("malayalam", "Malayalam"), ("mand", "Mandaic"), ("mandaic", "Mandaic"),
+  ("mani", "Manichaean"), ("manichaean", "Manichaean"), ("marc", "Marchen"),
+  ("marchen", "Marchen"), ("masaramgondi", "Masaram_Gondi"),
+  ("meeteimayek", "Meetei_Mayek"), ("mend", "Mende_Kikakui"),
+  ("mendekikakui", "Mende_Kikakui"), ("merc", "Meroitic_Cursive"),
+  ("mero", "Meroitic_Hieroglyphs"), ("meroiticcursive", "Meroitic_Cursive"),
+  ("meroitichieroglyphs", "Meroitic_Hieroglyphs"), ("miao", "Miao"),
+  ("mlym", "Malayalam"), ("modi", "Modi"), ("mong", "Mongolian"),
+  ("mongolian", "Mongolian"), ("mro", "Mro"), ("mroo", "Mro"),
+  ("mtei", "Meetei_Mayek"), ("mult", "Multani"), ("multani", "Multani"),
+  ("myanmar", "Myanmar"), ("mymr", "Myanmar"), ("nabataean", "Nabataean"),
+  ("narb", "Old_North_Arabian"), ("nbat", "Nabataean"), ("newa", "Newa"),
+  ("newtailue", "New_Tai_Lue"), ("nko", "Nko"), ("nkoo", "Nko"),
+  ("nshu", "Nushu"), ("nushu", "Nushu"), ("ogam", "Ogham"),
+  ("ogham", "Ogham"), ("olchiki", "Ol_Chiki"), ("olck", "Ol_Chiki"),
+  ("oldhungarian", "Old_Hungarian"), ("olditalic", "Old_Italic"),
+  ("oldnortharabian", "Old_North_Arabian"), ("oldpermic", "Old_Permic"),
+  ("oldpersian", "Old_Persian"), ("oldsoutharabian", "Old_South_Arabian"),
+  ("oldturkic", "Old_Turkic"), ("oriya", "Oriya"), ("orkh", "Old_Turkic"),
+  ("orya", "Oriya"), ("osage", "Osage"), ("osge", "Osage"),
+  ("osma", "Osmanya"), ("osmanya", "Osmanya"),
+  ("pahawhhmong", "Pahawh_Hmong"), ("palm", "Palmyrene"),
+  ("palmyrene", "Palmyrene"), ("pauc", "Pau_Cin_Hau"),
+  ("paucinhau", "Pau_Cin_Hau"), ("perm", "Old_Permic"), ("phag", "Phags_Pa"),
+  ("phagspa", "Phags_Pa"), ("phli", "Inscriptional_Pahlavi"),
+  ("phlp", "Psalter_Pahlavi"), ("phnx", "Phoenician"),
+  ("phoenician", "Phoenician"), ("plrd", "Miao"),
+  ("prti", "Inscriptional_Parthian"), ("psalterpahlavi", "Psalter_Pahlavi"),
+  ("qaac", "Coptic"), ("qaai", "Inherited"), ("rejang", "Rejang"),
+  ("rjng", "Rejang"), ("runic", "Runic"), ("runr", "Runic"),
+  ("samaritan", "Samaritan"), ("samr", "Samaritan"),
+  ("sarb", "Old_South_Arabian"), ("saur", "Saurashtra"),
+  ("saurashtra", "Saurashtra"), ("sgnw", "SignWriting"),
+  ("sharada", "Sharada"), ("shavian", "Shavian"), ("shaw", "Shavian"),
+  ("shrd", "Sharada"), ("sidd", "Siddham"), ("siddham", "Siddham"),
+  ("signwriting", "SignWriting"), ("sind", "Khudawadi"), ("sinh", "Sinhala"),
+  ("sinhala", "Sinhala"), ("sora", "Sora_Sompeng"),
+  ("sorasompeng", "Sora_Sompeng"), ("soyo", "Soyombo"),
+  ("soyombo", "Soyombo"), ("sund", "Sundanese"), ("sundanese", "Sundanese"),
+  ("sylo", "Syloti_Nagri"), ("sylotinagri", "Syloti_Nagri"),
+  ("syrc", "Syriac"), ("syriac", "Syriac"), ("tagalog", "Tagalog"),
+  ("tagb", "Tagbanwa"), ("tagbanwa", "Tagbanwa"), ("taile", "Tai_Le"),
+  ("taitham", "Tai_Tham"), ("taiviet", "Tai_Viet"), ("takr", "Takri"),
+  ("takri", "Takri"), ("tale", "Tai_Le"), ("talu", "New_Tai_Lue"),
+  ("tamil", "Tamil"), ("taml", "Tamil"), ("tang", "Tangut"),
+  ("tangut", "Tangut"), ("tavt", "Tai_Viet"), ("telu", "Telugu"),
+  ("telugu", "Telugu"), ("tfng", "Tifinagh"), ("tglg", "Tagalog"),
+  ("thaa", "Thaana"), ("thaana", "Thaana"), ("thai", "Thai"),
+  ("tibetan", "Tibetan"), ("tibt", "Tibetan"), ("tifinagh", "Tifinagh"),
+  ("tirh", "Tirhuta"), ("tirhuta", "Tirhuta"), ("ugar", "Ugaritic"),
+  ("ugaritic", "Ugaritic"), ("unknown", "Unknown"), ("vai", "Vai"),
+  ("vaii", "Vai"), ("wara", "Warang_Citi"), ("warangciti", "Warang_Citi"),
+  ("xpeo", "Old_Persian"), ("xsux", "Cuneiform"), ("yi", "Yi"),
+  ("yiii", "Yi"), ("zanabazarsquare", "Zanabazar_Square"),
+  ("zanb", "Zanabazar_Square"), ("zinh", "Inherited"), ("zyyy", "Common"),
+  ("zzzz", "Unknown"), ]),
+
+  ("Sentence_Break", &[("at", "ATerm"), ("aterm", "ATerm"), ("cl", "Close"),
+  ("close", "Close"), ("cr", "CR"), ("ex", "Extend"), ("extend", "Extend"),
+  ("fo", "Format"), ("format", "Format"), ("le", "OLetter"), ("lf", "LF"),
+  ("lo", "Lower"), ("lower", "Lower"), ("nu", "Numeric"),
+  ("numeric", "Numeric"), ("oletter", "OLetter"), ("other", "Other"),
+  ("sc", "SContinue"), ("scontinue", "SContinue"), ("se", "Sep"),
+  ("sep", "Sep"), ("sp", "Sp"), ("st", "STerm"), ("sterm", "STerm"),
+  ("up", "Upper"), ("upper", "Upper"), ("xx", "Other"), ]),
+
+  ("Sentence_Terminal", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Soft_Dotted", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Terminal_Punctuation", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Unified_Ideograph", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Uppercase", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Variation_Selector", &[("f", "No"), ("false", "No"), ("n", "No"),
+  ("no", "No"), ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"),
+  ]),
+
+  ("Vertical_Orientation", &[("r", "Rotated"), ("rotated", "Rotated"),
+  ("tr", "Transformed_Rotated"),
+  ("transformedrotated", "Transformed_Rotated"),
+  ("transformedupright", "Transformed_Upright"),
+  ("tu", "Transformed_Upright"), ("u", "Upright"), ("upright", "Upright"), ]),
+
+  ("White_Space", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("Word_Break", &[("aletter", "ALetter"), ("cr", "CR"),
+  ("doublequote", "Double_Quote"), ("dq", "Double_Quote"), ("eb", "E_Base"),
+  ("ebase", "E_Base"), ("ebasegaz", "E_Base_GAZ"), ("ebg", "E_Base_GAZ"),
+  ("em", "E_Modifier"), ("emodifier", "E_Modifier"), ("ex", "ExtendNumLet"),
+  ("extend", "Extend"), ("extendnumlet", "ExtendNumLet"), ("fo", "Format"),
+  ("format", "Format"), ("gaz", "Glue_After_Zwj"),
+  ("glueafterzwj", "Glue_After_Zwj"), ("hebrewletter", "Hebrew_Letter"),
+  ("hl", "Hebrew_Letter"), ("ka", "Katakana"), ("katakana", "Katakana"),
+  ("le", "ALetter"), ("lf", "LF"), ("mb", "MidNumLet"),
+  ("midletter", "MidLetter"), ("midnum", "MidNum"),
+  ("midnumlet", "MidNumLet"), ("ml", "MidLetter"), ("mn", "MidNum"),
+  ("newline", "Newline"), ("nl", "Newline"), ("nu", "Numeric"),
+  ("numeric", "Numeric"), ("other", "Other"),
+  ("regionalindicator", "Regional_Indicator"), ("ri", "Regional_Indicator"),
+  ("singlequote", "Single_Quote"), ("sq", "Single_Quote"), ("xx", "Other"),
+  ("zwj", "ZWJ"), ]),
+
+  ("XID_Continue", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+
+  ("XID_Start", &[("f", "No"), ("false", "No"), ("n", "No"), ("no", "No"),
+  ("t", "Yes"), ("true", "Yes"), ("y", "Yes"), ("yes", "Yes"), ]),
+];
author	Bastien Orivel <eijebong@bananium.fr>
	Mon, 21 May 2018 22:34:18 +0200
changeset 797864	59217bdcab741f23bb5ec851c7d9bbae07c47224
parent 797863	15296312f69bdd293c1a0686a7b0b7e8ad86c5b8
push id	110608
push user	bmo:eijebong@bananium.fr
push date	Mon, 21 May 2018 21:16:37 +0000
reviewers	ato, jgraham
bugs	1463251
milestone	62.0a1