author Makoto Kato <m_kato@ga2.so-net.ne.jp>

Mon, 16 May 2016 17:17:47 +0900

changeset 370596 686a7ecc81d3d8113c838689d076d9c6fe321e54

parent 370474 5511d54a3f172c1d68f98cc55dce4de1d0ba1b51

child 370597 2a7944cf50d8380a1c5693a8aa2613d0ef231f22

push id 19119

push user m_kato@ga2.so-net.ne.jp

push date Wed, 25 May 2016 04:47:19 +0000

reviewers jfkthame

bugs 1275474, 1261900

milestone 49.0a1

intl/unicharutil/moz.build file | annotate | diff | comparison | revisions

intl/unicharutil/nsUnicodeNormalizer.cpp file | annotate | diff | comparison | revisions
--- a/intl/unicharutil/moz.build
+++ b/intl/unicharutil/moz.build
@@ -22,15 +22,30 @@ EXPORTS += [
     'nsUnicodeNormalizer.h',
 ]
 
 UNIFIED_SOURCES += [
     'nsCaseConversionImp2.cpp',
     'nsCategoryImp.cpp',
     'nsEntityConverter.cpp',
     'nsSaveAsCharset.cpp',
-    'nsUnicodeNormalizer.cpp',
 ]
 
+if CONFIG['ENABLE_INTL_API']:
+    SOURCES += [
+        'nsUnicodeNormalizer.cpp',
+    ]
+    SOURCES['nsUnicodeNormalizer.cpp'].flags += CONFIG['MOZ_ICU_CFLAGS']
+    LOCAL_INCLUDES += CONFIG['MOZ_ICU_INCLUDES']
+    if CONFIG['_MSC_VER']:
+        # This is intended as a temporary hack to support building with VS2015.
+        # 'noexcept' used with no exception handling mode specified;
+        # termination on exception is not guaranteed. Specify /EHsc
+        SOURCES['nsUnicodeNormalizer.cpp'].flags += ['-wd4577']
+else:
+    UNIFIED_SOURCES += [
+        'nsUnicodeNormalizer.cpp',
+    ]
+
 FINAL_LIBRARY = 'xul'
 
 if CONFIG['GNU_CXX']:
     CXXFLAGS += ['-Wno-error=shadow']
--- a/intl/unicharutil/nsUnicodeNormalizer.cpp
+++ b/intl/unicharutil/nsUnicodeNormalizer.cpp
@@ -46,35 +46,71 @@
  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
  */
 
-#include <string.h>
-
-#include "nsMemory.h"
 #include "nsUnicodeNormalizer.h"
 #include "nsString.h"
+#if ENABLE_INTL_API
+#include "unicode/unorm2.h"
+#else
+#include "nsMemory.h"
 #include "mozilla/BinarySearch.h"
+#include <string.h>
+#endif
 
 NS_IMPL_ISUPPORTS(nsUnicodeNormalizer, nsIUnicodeNormalizer)
 
 
 nsUnicodeNormalizer::nsUnicodeNormalizer()
 {
 }
 
 nsUnicodeNormalizer::~nsUnicodeNormalizer()
 {
 }
 
+#if ENABLE_INTL_API
+static bool
+NormalizeString(const UNormalizer2* aNormalizer, const nsAString& aSrc,
+                nsAString& aDest)
+{
+  UErrorCode error = U_ZERO_ERROR;
 
+  int32_t length =
+    unorm2_normalize(aNormalizer,
+                     reinterpret_cast<const UChar*>(aSrc.BeginReading()),
+                     aSrc.Length(),
+                     nullptr,
+                     0,
+                     &error);
+  if (error != U_BUFFER_OVERFLOW_ERROR && U_FAILURE(error)) {
+    return false;
+  }
+  aDest.SetCapacity(length);
+  error = U_ZERO_ERROR;
+  length =
+    unorm2_normalize(aNormalizer,
+                     reinterpret_cast<const UChar*>(aSrc.BeginReading()),
+                     aSrc.Length(),
+                     reinterpret_cast<UChar*>(aDest.BeginWriting()),
+                     length,
+                     &error);
+  if (U_FAILURE(error)) {
+    return false;
+  }
+  aDest.SetLength(length);
+
+  return true;
+}
+#else
 
 #define END_BIT		0x80000000
 
 
 /*
  * Some constants for Hangul decomposition/composition.
  * These things were taken from unicode book. 
  */
@@ -645,50 +681,110 @@ workbuf_removevoid(workbuf_t *wb) {
 				wb->cclass[j] = wb->cclass[i];
 			}
 			j++;
 		}
 	}
 	wb->cur -= last - j;
 	wb->last = j;
 }
+#endif // ENABLE_INTL_API
 
 nsresult  
 nsUnicodeNormalizer::NormalizeUnicodeNFD( const nsAString& aSrc, nsAString& aDest)
 {
+#if ENABLE_INTL_API
+  UErrorCode error = U_ZERO_ERROR;
+  const UNormalizer2* normalizer = unorm2_getNFDInstance(&error);
+  if (U_FAILURE(error)) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!NormalizeString(normalizer, aSrc, aDest)) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+#else
   return mdn_normalize(false, false, aSrc, aDest);
+#endif
 }
 
 nsresult  
 nsUnicodeNormalizer::NormalizeUnicodeNFC( const nsAString& aSrc, nsAString& aDest)
 {
+#if ENABLE_INTL_API
+  UErrorCode error = U_ZERO_ERROR;
+  const UNormalizer2* normalizer = unorm2_getNFCInstance(&error);
+  if (U_FAILURE(error)) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+  if (!NormalizeString(normalizer, aSrc, aDest)) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+#else
   return mdn_normalize(true, false, aSrc, aDest);
+#endif
 }
 
 nsresult  
 nsUnicodeNormalizer::NormalizeUnicodeNFKD( const nsAString& aSrc, nsAString& aDest)
 {
+#if ENABLE_INTL_API
+  UErrorCode error = U_ZERO_ERROR;
+  const UNormalizer2* normalizer = unorm2_getNFKDInstance(&error);
+  if (U_FAILURE(error)) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!NormalizeString(normalizer, aSrc, aDest)) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+#else
   return mdn_normalize(false, true, aSrc, aDest);
+#endif
 }
 
 nsresult  
 nsUnicodeNormalizer::NormalizeUnicodeNFKC( const nsAString& aSrc, nsAString& aDest)
 {
+#if ENABLE_INTL_API
+  UErrorCode error = U_ZERO_ERROR;
+  const UNormalizer2* normalizer = unorm2_getNFKCInstance(&error);
+  if (U_FAILURE(error)) {
+    return NS_ERROR_FAILURE;
+  }
+  if (!NormalizeString(normalizer, aSrc, aDest)) {
+    return NS_ERROR_FAILURE;
+  }
+  return NS_OK;
+#else
   return mdn_normalize(true, true, aSrc, aDest);
+#endif
 }
 
 bool
 nsUnicodeNormalizer::Compose(uint32_t a, uint32_t b, uint32_t *ab)
 {
+#if ENABLE_INTL_API
+  // no longer used when turned on ICU
+  NS_ASSERTION(false, "Use ICU API instead");
+  return false;
+#else
   return mdn__unicode_compose(a, b, ab) == NS_OK;
+#endif
 }
 
 bool
 nsUnicodeNormalizer::DecomposeNonRecursively(uint32_t c, uint32_t *c1, uint32_t *c2)
 {
+#if ENABLE_INTL_API
+  // no longer used when turned on ICU
+  NS_ASSERTION(false, "Use ICU API instead");
+  return false;
+#else
   // We can't use mdn__unicode_decompose here, because that does a recursive
   // decomposition that may yield more than two characters, but the harfbuzz
   // callback wants just a single-step decomp that is guaranteed to produce
   // no more than two characters. So we do a low-level lookup in the table
   // of decomp sequences.
   const uint32_t *seq;
   uint32_t seqidx = decompose_char(c, &seq);
   if (seqidx == 0 || ((seqidx & DECOMP_COMPAT) != 0)) {
@@ -696,9 +792,10 @@ nsUnicodeNormalizer::DecomposeNonRecursi
   }
   *c1 = *seq & ~END_BIT;
   if (*seq & END_BIT) {
     *c2 = 0;
   } else {
     *c2 = *++seq & ~END_BIT;
   }
   return true;
+#endif
 }
author	Makoto Kato <m_kato@ga2.so-net.ne.jp>
	Mon, 16 May 2016 17:17:47 +0900
changeset 370596	686a7ecc81d3d8113c838689d076d9c6fe321e54
parent 370474	5511d54a3f172c1d68f98cc55dce4de1d0ba1b51
child 370597	2a7944cf50d8380a1c5693a8aa2613d0ef231f22
push id	19119
push user	m_kato@ga2.so-net.ne.jp
push date	Wed, 25 May 2016 04:47:19 +0000
reviewers	jfkthame
bugs	1275474, 1261900
milestone	49.0a1
intl/unicharutil/moz.build		file \| annotate \| diff \| comparison \| revisions
intl/unicharutil/nsUnicodeNormalizer.cpp		file \| annotate \| diff \| comparison \| revisions