Bug 1275474 - Part 1. nsIUnicodeNormalizer should use ICU library if turned on. r?jfkthame draft
authorMakoto Kato <m_kato@ga2.so-net.ne.jp>
Mon, 16 May 2016 17:17:47 +0900
changeset 370596 686a7ecc81d3d8113c838689d076d9c6fe321e54
parent 370474 5511d54a3f172c1d68f98cc55dce4de1d0ba1b51
child 370597 2a7944cf50d8380a1c5693a8aa2613d0ef231f22
push id19119
push userm_kato@ga2.so-net.ne.jp
push dateWed, 25 May 2016 04:47:19 +0000
reviewersjfkthame
bugs1275474, 1261900
milestone49.0a1
Bug 1275474 - Part 1. nsIUnicodeNormalizer should use ICU library if turned on. r?jfkthame By bug 1261900, Compress and DecomposeNonRecursively are no logner used when ICU is turned on. So it shouldn't be used with ICU. MozReview-Commit-ID: C30jJu31W7G
intl/unicharutil/moz.build
intl/unicharutil/nsUnicodeNormalizer.cpp
--- a/intl/unicharutil/moz.build
+++ b/intl/unicharutil/moz.build
@@ -22,15 +22,30 @@ EXPORTS += [
     'nsUnicodeNormalizer.h',
 ]
 
 UNIFIED_SOURCES += [
     'nsCaseConversionImp2.cpp',
     'nsCategoryImp.cpp',
     'nsEntityConverter.cpp',
     'nsSaveAsCharset.cpp',
-    'nsUnicodeNormalizer.cpp',
 ]
 
+if CONFIG['ENABLE_INTL_API']:
+    SOURCES += [
+        'nsUnicodeNormalizer.cpp',
+    ]
+    SOURCES['nsUnicodeNormalizer.cpp'].flags += CONFIG['MOZ_ICU_CFLAGS']
+    LOCAL_INCLUDES += CONFIG['MOZ_ICU_INCLUDES']
+    if CONFIG['_MSC_VER']:
+        # This is intended as a temporary hack to support building with VS2015.
+        # 'noexcept' used with no exception handling mode specified;
+        # termination on exception is not guaranteed. Specify /EHsc
+        SOURCES['nsUnicodeNormalizer.cpp'].flags += ['-wd4577']
+else:
+    UNIFIED_SOURCES += [
+        'nsUnicodeNormalizer.cpp',
+    ]
+
 FINAL_LIBRARY = 'xul'
 
 if CONFIG['GNU_CXX']:
     CXXFLAGS += ['-Wno-error=shadow']
--- a/intl/unicharutil/nsUnicodeNormalizer.cpp
+++ b/intl/unicharutil/nsUnicodeNormalizer.cpp
@@ -46,35 +46,71 @@
  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
  */
 
-#include <string.h>
-
-#include "nsMemory.h"
 #include "nsUnicodeNormalizer.h"
 #include "nsString.h"
+#if ENABLE_INTL_API
+#include "unicode/unorm2.h"
+#else
+#include "nsMemory.h"
 #include "mozilla/BinarySearch.h"
+#include <string.h>
+#endif
 
 NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)
 
 
 nsUnicodeNormalizer::nsUnicodeNormalizer()
 {
 }
 
 nsUnicodeNormalizer::~nsUnicodeNormalizer()
 {
 }
 
+#if ENABLE_INTL_API
+static bool
+NormalizeString(const UNormalizer2* aNormalizer, const nsAString& aSrc,
+                nsAString& aDest)
+{
+  UErrorCode error = U_ZERO_ERROR;
 
+  int32_t length =
+    unorm2_normalize(aNormalizer,
+                     reinterpret_cast<const UChar*>(aSrc.BeginReading()),
+                     aSrc.Length(),
+                     nullptr,
+                     0,
+                     &error);
+  if (error != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(error)) {
+    return false;
+  }
+  aDest.SetCapacity(length);
+  error = U_ZERO_ERROR;
+  length =
+    unorm2_normalize(aNormalizer,
+                     reinterpret_cast<const UChar*>(aSrc.BeginReading()),
+                     aSrc.Length(),
+                     reinterpret_cast<UChar*>(aDest.BeginWriting()),
+                     length,
+                     &error);
+  if (U_FAILURE(error)) {
+    return false;
+  }
+  aDest.SetLength(length);
+
+  return true;
+}
+#else
 
 #define END_BIT		0x80000000
 
 
 /*
  * Some constants for Hangul decomposition/composition.
  * These things were taken from unicode book. 
  */
@@ -645,50 +681,110 @@ workbuf_removevoid(workbuf_t *wb) {
 				wb->cclass[j] = wb->cclass[i];
 			}
 			j++;
 		}
 	}
 	wb->cur -= last - j;
 	wb->last = j;
 }
+#endif // ENABLE_INTL_API
 
 nsresult  
 nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)
 {
+#if ENABLE_INTL_API
+  UErrorCode error = U_ZERO_ERROR;
+  const UNormalizer2* normalizer = unorm2_getNFDInstance(&error);
+  if (U_FAILURE(error)) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!NormalizeString(normalizer, aSrc, aDest)) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+#else
   return mdn_normalize(false, false, aSrc, aDest);
+#endif
 }
 
 nsresult  
 nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)
 {
+#if ENABLE_INTL_API
+  UErrorCode error = U_ZERO_ERROR;
+  const UNormalizer2* normalizer = unorm2_getNFCInstance(&error);
+  if (U_FAILURE(error)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  if (!NormalizeString(normalizer, aSrc, aDest)) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+#else
   return mdn_normalize(true, false, aSrc, aDest);
+#endif
 }
 
 nsresult  
 nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)
 {
+#if ENABLE_INTL_API
+  UErrorCode error = U_ZERO_ERROR;
+  const UNormalizer2* normalizer = unorm2_getNFKDInstance(&error);
+  if (U_FAILURE(error)) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!NormalizeString(normalizer, aSrc, aDest)) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+#else
   return mdn_normalize(false, true, aSrc, aDest);
+#endif
 }
 
 nsresult  
 nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
 {
+#if ENABLE_INTL_API
+  UErrorCode error = U_ZERO_ERROR;
+  const UNormalizer2* normalizer = unorm2_getNFKCInstance(&error);
+  if (U_FAILURE(error)) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!NormalizeString(normalizer, aSrc, aDest)) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+#else
   return mdn_normalize(true, true, aSrc, aDest);
+#endif
 }
 
 bool
 nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab)
 {
+#if ENABLE_INTL_API
+  // no longer used when turned on ICU
+  NS_ASSERTION(false, "Use ICU API instead");
+  return false;
+#else
   return mdn__unicode_compose(a, b, ab) == NS_OK;
+#endif
 }
 
 bool
 nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2)
 {
+#if ENABLE_INTL_API
+  // no longer used when turned on ICU
+  NS_ASSERTION(false, "Use ICU API instead");
+  return false;
+#else
   // We can't use mdn__unicode_decompose here, because that does a recursive
   // decomposition that may yield more than two characters, but the harfbuzz
   // callback wants just a single-step decomp that is guaranteed to produce
   // no more than two characters. So we do a low-level lookup in the table
   // of decomp sequences.
   const uint32_t *seq;
   uint32_t seqidx = decompose_char(c, &seq);
   if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
@@ -696,9 +792,10 @@ nsUnicodeNormalizer::DecomposeNonRecursi
   }
   *c1 = *seq & ~END_BIT;
   if (*seq & END_BIT) {
     *c2 = 0;
   } else {
     *c2 = *++seq & ~END_BIT;
   }
   return true;
+#endif
 }