Bug 1402247 addendum - Unconditionally use unaligned loads and stores and unroll the loop by two when converting between Latin1 and UTF-16. draft
authorHenri Sivonen <hsivonen@hsivonen.fi>
Mon, 09 Apr 2018 14:20:44 +0300
changeset 779162 042ad6c05c461f8e3679d76ab91f73ba781bf6c4
parent 779161 f11eb8e8d4c1bf8ef1a6a452c654d2a0c5dc148f
child 808082 f9b161b2bf2c29f2f71d4d38cb05f75155706532
child 808103 f83cc54917ca7c6f498d92f68fc21ec68cd073f0
push id105677
push userbmo:hsivonen@hsivonen.fi
push dateMon, 09 Apr 2018 11:22:51 +0000
bugs1402247
milestone61.0a1
Bug 1402247 addendum - Unconditionally use unaligned loads and stores and unroll the loop by two when converting between Latin1 and UTF-16. MozReview-Commit-ID: GbcUJc0Hvem
third_party/rust/encoding_rs/.cargo-checksum.json
third_party/rust/encoding_rs/src/ascii.rs
--- a/third_party/rust/encoding_rs/.cargo-checksum.json
+++ b/third_party/rust/encoding_rs/.cargo-checksum.json
@@ -1,1 +1,1 @@
-{"files":{".travis.yml":"dc509cc3b8f44fbdf1d806f533c3f005afaf0fd77cd266b38cb69bab3e4ea136","CONTRIBUTING.md":"6dac812ad206dbeb43b32ae01062fb79684fb01f9ee778c1c166852adc77d4c9","COPYRIGHT":"20d4fff11cca11529df3f02096fbe8ffe350219cdb07cdedea34e6a762866da5","Cargo.toml":"114f3399a97af04c9e2f8514448ccac81aac9ce7b333ec1594e733aad0c92e87","Ideas.md":"7fbeddb0f8ba7b233673ee705997adc2fddb1636a17fe662532b35ef2810a51d","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"74aa8b6d04c36bb640ee81187a3f24a2fa94e36d4c1d4f2ca164c3784ae87a83","README.md":"f60b9e7ff6d62f6cd580cbd386a039fda2d7407821da984fbe3cdb9c4a64f5d3","generate-encoding-data.py":"8a0a5162098d355e4df63532819769fd6626a66a0aa93f2762e315d6147aa0a5","rustfmt.toml":"c01c06dfbdfcf30730535aab911d69068febb921e2faef9571ceeb6a5c2a3eab","src/ascii.rs":"0fd4833571df22b0bb98e230c07b4ff733284c5b58b7b21a50f4f68c683ee706","src/big5.rs":"780ae537353f899a5772a9e7d062441041276e1eb1506a013e4280c5cda6bb93","src/data.rs":"412c842c698c3ce1cec4a27ab19ca275372ac28940ac49cdf3e0dad71a2c2812","src/euc_jp.rs":"feda0ade5e1c3e4abd7637c59373b977662007990fd164ea7db1acc502ba3534","src/euc_kr.rs":"2699c52055882e34ba4e12d072b8161c635840f3620075ca3f10986aec0e8d3b","src/gb18030.rs":"6a4d5ff9a89cdf1d89de78cd309f01385435dd9a4ffee182e13df2675cf57600","src/handles.rs":"c07a3738e43e8aae11108a30d34067c31ddc5d22074b85ef393f00abcc1f4e01","src/iso_2022_jp.rs":"1f780c3ff72f1a867d6c5782135cd01427eca6d74f0dd6cb23c1406b5163af1a","src/lib.rs":"b53cfe7009dcba83724ac2affa1f3fdd675451a33742ceb9f030eb83e702305f","src/macros.rs":"9ab30e7194f61f268cd7d899cabb06ff9ca7717663926fd583b20334f49ac8d3","src/mem.rs":"326003897f0efefa257210f4e698a2a039e7e9d2fe16e0fc9341b51a68ce1dff","src/replacement.rs":"782f03f04d110e9a0656262bf4296aa0ab8199e196cb63239c30d9649996caa4","src/shift_jis.rs":"84df4ff58b60e0827d6c0c7049f2cf19033f2b9e25a9186bcfb0bbb05e87b380","src/simd_funcs.rs":"76c4abc881f2dd91f8e936b059152fa4ee5056af0af59356fbf105436ddd673f","src/single_byte.rs":"b3fadb4fa1e66b00efc12b8850b3076580a8cd73c9ed810a19421fd3ade9bbf1","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"16da398fdab694283d24556932ff7fce893e22cf66a180795a830541f4ddd94b","src/utf_16.rs":"1d2c40857c946f6eecf724efc60a196865b4d84a59b08b42fbe4576fa8308fd0","src/utf_8.rs":"dc7df98c65a23607071b699243aec75a461510ee0617abba289df1ebe781c08b","src/utf_8_core.rs":"bbc010dbdfed0f5e7c48a1ab0772eaf2e27711b789bb82f71a678f2240158a65","src/variant.rs":"93dfec2dcfc9fd9711bb55d48177f4a0e9479c7fbd055f08db3853338569da83","src/x_user_defined.rs":"84d054eec249dd676452585f8eb13dc851095021ed6e1f8c79e952c6d81751df"},"package":"98fd0f24d1fb71a4a6b9330c8ca04cbd4e7cc5d846b54ca74ff376bc7c9f798d"}
\ No newline at end of file
+{"files":{".travis.yml":"dc509cc3b8f44fbdf1d806f533c3f005afaf0fd77cd266b38cb69bab3e4ea136","CONTRIBUTING.md":"6dac812ad206dbeb43b32ae01062fb79684fb01f9ee778c1c166852adc77d4c9","COPYRIGHT":"20d4fff11cca11529df3f02096fbe8ffe350219cdb07cdedea34e6a762866da5","Cargo.toml":"114f3399a97af04c9e2f8514448ccac81aac9ce7b333ec1594e733aad0c92e87","Ideas.md":"7fbeddb0f8ba7b233673ee705997adc2fddb1636a17fe662532b35ef2810a51d","LICENSE-APACHE":"cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30","LICENSE-MIT":"74aa8b6d04c36bb640ee81187a3f24a2fa94e36d4c1d4f2ca164c3784ae87a83","README.md":"f60b9e7ff6d62f6cd580cbd386a039fda2d7407821da984fbe3cdb9c4a64f5d3","generate-encoding-data.py":"8a0a5162098d355e4df63532819769fd6626a66a0aa93f2762e315d6147aa0a5","rustfmt.toml":"c01c06dfbdfcf30730535aab911d69068febb921e2faef9571ceeb6a5c2a3eab","src/ascii.rs":"801d0e47dd2694f2248ba1d8cb661c778fe1201d225c54b30d2eea33cfb2662c","src/big5.rs":"780ae537353f899a5772a9e7d062441041276e1eb1506a013e4280c5cda6bb93","src/data.rs":"412c842c698c3ce1cec4a27ab19ca275372ac28940ac49cdf3e0dad71a2c2812","src/euc_jp.rs":"feda0ade5e1c3e4abd7637c59373b977662007990fd164ea7db1acc502ba3534","src/euc_kr.rs":"2699c52055882e34ba4e12d072b8161c635840f3620075ca3f10986aec0e8d3b","src/gb18030.rs":"6a4d5ff9a89cdf1d89de78cd309f01385435dd9a4ffee182e13df2675cf57600","src/handles.rs":"c07a3738e43e8aae11108a30d34067c31ddc5d22074b85ef393f00abcc1f4e01","src/iso_2022_jp.rs":"1f780c3ff72f1a867d6c5782135cd01427eca6d74f0dd6cb23c1406b5163af1a","src/lib.rs":"b53cfe7009dcba83724ac2affa1f3fdd675451a33742ceb9f030eb83e702305f","src/macros.rs":"9ab30e7194f61f268cd7d899cabb06ff9ca7717663926fd583b20334f49ac8d3","src/mem.rs":"326003897f0efefa257210f4e698a2a039e7e9d2fe16e0fc9341b51a68ce1dff","src/replacement.rs":"782f03f04d110e9a0656262bf4296aa0ab8199e196cb63239c30d9649996caa4","src/shift_jis.rs":"84df4ff58b60e0827d6c0c7049f2cf19033f2b9e25a9186bcfb0bbb05e87b380","src/simd_funcs.rs":"76c4abc881f2dd91f8e936b059152fa4ee5056af0af59356fbf105436ddd673f","src/single_byte.rs":"b3fadb4fa1e66b00efc12b8850b3076580a8cd73c9ed810a19421fd3ade9bbf1","src/test_data/big5_in.txt":"4c5a8691f8dc717311889c63894026d2fb62725a86c4208ca274a9cc8d42a503","src/test_data/big5_in_ref.txt":"99d399e17750cf9c7cf30bb253dbfe35b81c4fcbdead93cfa48b1429213473c7","src/test_data/big5_out.txt":"6193ca97c297aa20e09396038d18e938bb7ea331c26f0f2454097296723a0b13","src/test_data/big5_out_ref.txt":"36567691f557df144f6cc520015a87038dfa156f296fcf103b56ae9a718be1fc","src/test_data/euc_kr_in.txt":"c86a7224f3215fa0d04e685622a752fdc72763e8ae076230c7fd62de57ec4074","src/test_data/euc_kr_in_ref.txt":"1f419f4ca47d708b54c73c461545a022ae2e20498fdbf8005a483d752a204883","src/test_data/euc_kr_out.txt":"e7f32e026f70be1e1b58e0047baf7d3d2c520269c4f9b9992e158b4decb0a1a3","src/test_data/euc_kr_out_ref.txt":"c9907857980b20b8e9e3b584482ed6567a2be6185d72237b6322f0404944924e","src/test_data/gb18030_in.txt":"ab7231b2d3e9afacdbd7d7f3b9e5361a7ff9f7e1cfdb4f3bd905b9362b309e53","src/test_data/gb18030_in_ref.txt":"dc5069421adca2043c55f5012b55a76fdff651d22e6e699fd0978f8d5706815c","src/test_data/gb18030_out.txt":"f0208d527f5ca63de7d9a0323be8d5cf12d8a104b2943d92c2701f0c3364dac1","src/test_data/gb18030_out_ref.txt":"6819fe47627e4ea01027003fc514b9f21a1322e732d7f1fb92cc6c5455bc6c07","src/test_data/iso_2022_jp_in.txt":"cd24bbdcb1834e25db54646fbf4c41560a13dc7540f6be3dba4f5d97d44513af","src/test_data/iso_2022_jp_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/iso_2022_jp_out.txt":"9b6f015329dda6c3f9ee5ce6dbd6fa9c89acc21283e886836c78b8d833480c21","src/test_data/iso_2022_jp_out_ref.txt":"78cb260093a20116ad9a42f43b05d1848c5ab100b6b9a850749809e943884b35","src/test_data/jis0208_in.txt":"6df3030553ffb0a6615bb33dc8ea9dca6d9623a9028e2ffec754ce3c3da824cc","src/test_data/jis0208_in_ref.txt":"3dc4e6a5e06471942d086b16c9440945e78415f6f3f47e43717e4bc2eac2cdf5","src/test_data/jis0208_out.txt":"4ec24477e1675ce750733bdc3c5add1cd27b6bd4ce1f09289564646e9654e857","src/test_data/jis0208_out_ref.txt":"c3e1cef5032b2b1d93a406f31ff940c4e2dfe8859b8b17ca2761fee7a75a0e48","src/test_data/jis0212_in.txt":"c011f0dd72bd7c8cd922df9374ef8d2769a77190514c77f6c62b415852eeb9fe","src/test_data/jis0212_in_ref.txt":"7d9458b3d2f73e7092a7f505c08ce1d233dde18aa679fbcf9889256239cc9e06","src/test_data/shift_jis_in.txt":"02e389ccef0dd2122e63f503899402cb7f797912c2444cc80ab93131116c5524","src/test_data/shift_jis_in_ref.txt":"512f985950ca902e643c88682dba9708b7c38d3c5ec2925168ab00ac94ab19f9","src/test_data/shift_jis_out.txt":"5fbc44da7bf639bf6cfe0fa1fd3eba7102b88f81919c9ea991302712f69426fb","src/test_data/shift_jis_out_ref.txt":"466322c6fed8286c64582731755290c2296508efdd258826e6279686649b481f","src/test_labels_names.rs":"c962c7aeac3d9ef2aca70c9e21983b231d4cf998cb06879374b0401e5149d1da","src/testing.rs":"16da398fdab694283d24556932ff7fce893e22cf66a180795a830541f4ddd94b","src/utf_16.rs":"1d2c40857c946f6eecf724efc60a196865b4d84a59b08b42fbe4576fa8308fd0","src/utf_8.rs":"dc7df98c65a23607071b699243aec75a461510ee0617abba289df1ebe781c08b","src/utf_8_core.rs":"bbc010dbdfed0f5e7c48a1ab0772eaf2e27711b789bb82f71a678f2240158a65","src/variant.rs":"93dfec2dcfc9fd9711bb55d48177f4a0e9479c7fbd055f08db3853338569da83","src/x_user_defined.rs":"84d054eec249dd676452585f8eb13dc851095021ed6e1f8c79e952c6d81751df"},"package":"98fd0f24d1fb71a4a6b9330c8ca04cbd4e7cc5d846b54ca74ff376bc7c9f798d"}
--- a/third_party/rust/encoding_rs/src/ascii.rs
+++ b/third_party/rust/encoding_rs/src/ascii.rs
@@ -16,17 +16,18 @@
 // numbers of the actual ARMv7 CPU I have access to, because (thermal?)
 // throttling kept interfering. Since Raspberry Pi 3 (ARMv8 core but running
 // ARMv7 code) produced reproducible performance numbers, that's the ARM
 // computer that this code ended up being optimized for in the ALU case.
 // Less popular CPU architectures simply get the approach that was chosen based
 // on Raspberry Pi 3 measurements. The UTF-16 and UTF-8 ALU cases take
 // different approaches based on benchmarking on Raspberry Pi 3.
 
-#[cfg(all(feature = "simd-accel", any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"))))]
+#[cfg(all(feature = "simd-accel",
+          any(target_feature = "sse2", all(target_endian = "little", target_arch = "aarch64"))))]
 use simd_funcs::*;
 
 // `as` truncates, so works on 32-bit, too.
 #[allow(dead_code)]
 pub const ASCII_MASK: usize = 0x80808080_80808080u64 as usize;
 
 // `as` truncates, so works on 32-bit, too.
 #[allow(dead_code)]
@@ -637,18 +638,18 @@ cfg_if! {
         pack_simd_stride!(pack_stride_both_aligned, load8_aligned, store16_aligned);
         pack_simd_stride!(pack_stride_src_aligned, load8_aligned, store16_unaligned);
         pack_simd_stride!(pack_stride_dst_aligned, load8_unaligned, store16_aligned);
         pack_simd_stride!(pack_stride_neither_aligned, load8_unaligned, store16_unaligned);
 
         ascii_simd_check_align!(ascii_to_ascii, u8, u8, ascii_to_ascii_stride_both_aligned, ascii_to_ascii_stride_src_aligned, ascii_to_ascii_stride_dst_aligned, ascii_to_ascii_stride_neither_aligned);
         ascii_simd_check_align!(ascii_to_basic_latin, u8, u16, ascii_to_basic_latin_stride_both_aligned, ascii_to_basic_latin_stride_src_aligned, ascii_to_basic_latin_stride_dst_aligned, ascii_to_basic_latin_stride_neither_aligned);
         ascii_simd_check_align!(basic_latin_to_ascii, u16, u8, basic_latin_to_ascii_stride_both_aligned, basic_latin_to_ascii_stride_src_aligned, basic_latin_to_ascii_stride_dst_aligned, basic_latin_to_ascii_stride_neither_aligned);
-        latin1_simd_check_align!(unpack_latin1, u8, u16, unpack_stride_both_aligned, unpack_stride_src_aligned, unpack_stride_dst_aligned, unpack_stride_neither_aligned);
-        latin1_simd_check_align!(pack_latin1, u16, u8, pack_stride_both_aligned, pack_stride_src_aligned, pack_stride_dst_aligned, pack_stride_neither_aligned);
+        latin1_simd_unalign!(unpack_latin1, u8, u16, unpack_stride_neither_aligned);
+        latin1_simd_unalign!(pack_latin1, u16, u8, pack_stride_neither_aligned);
     } else if #[cfg(all(target_endian = "little", target_pointer_width = "64"))] {
         // Aligned ALU word, little-endian, 64-bit
 
         pub const STRIDE_SIZE: usize = 16;
 
         pub const ALIGNMENT: usize = 8;
 
         pub const ALIGNMENT_MASK: usize = 7;