Bug 1428771 - Make UCS2_CHAR_IS_BIDI check for lead surrogates corresponding to U+1E800...U+1EFFF and rename to UTF16_CODE_UNIT_IS_BIDI. r?jfkthame draft
authorHenri Sivonen <hsivonen@hsivonen.fi>
Mon, 08 Jan 2018 16:18:32 +0200
changeset 717560 a73a9928adff184d868e1b87c0c3f6c01aab64c5
parent 717272 cd1f40908daa09bbea00503001f6297e61f3bf29
child 717564 f0633541b5dce569868b8dff3e1290f676f85008
push id94730
push userbmo:hsivonen@hsivonen.fi
push dateTue, 09 Jan 2018 08:49:00 +0000
reviewersjfkthame
bugs1428771
milestone59.0a1
Bug 1428771 - Make UCS2_CHAR_IS_BIDI check for lead surrogates corresponding to U+1E800...U+1EFFF and rename to UTF16_CODE_UNIT_IS_BIDI. r?jfkthame MozReview-Commit-ID: 9ZKF6SaN79n
browser/base/content/browser.js
intl/unicharutil/util/nsBidiUtils.h
layout/xul/nsTextBoxFrame.cpp
widget/cocoa/TextInputHandler.mm
--- a/browser/base/content/browser.js
+++ b/browser/base/content/browser.js
@@ -7479,21 +7479,21 @@ var gIdentityHandler = {
         tooltip = gNavigatorBundle.getFormattedString("identity.identified.verifier",
                                                       [iData.caOrg]);
         icon_label = iData.subjectOrg;
         if (iData.country)
           icon_country_label = "(" + iData.country + ")";
 
         // If the organization name starts with an RTL character, then
         // swap the positions of the organization and country code labels.
-        // The Unicode ranges reflect the definition of the UCS2_CHAR_IS_BIDI
+        // The Unicode ranges reflect the definition of the UTF16_CODE_UNIT_IS_BIDI
         // macro in intl/unicharutil/util/nsBidiUtils.h. When bug 218823 gets
         // fixed, this test should be replaced by one adhering to the
         // Unicode Bidirectional Algorithm proper (at the paragraph level).
-        icon_labels_dir = /^[\u0590-\u08ff\ufb1d-\ufdff\ufe70-\ufefc]/.test(icon_label) ?
+        icon_labels_dir = /^[\u0590-\u08ff\ufb1d-\ufdff\ufe70-\ufefc\ud802\ud803\ud83a\ud83b]/.test(icon_label) ?
                           "rtl" : "ltr";
       }
     } else if (this._pageExtensionPolicy) {
       this._identityBox.className = "extensionPage";
       let extensionName = this._pageExtensionPolicy.name;
       icon_label = gNavigatorBundle.getFormattedString(
         "identity.extension.label", [extensionName]);
     } else if (this._uriHasHost && this._isSecure) {
--- a/intl/unicharutil/util/nsBidiUtils.h
+++ b/intl/unicharutil/util/nsBidiUtils.h
@@ -258,15 +258,23 @@ typedef enum nsCharType nsCharType;
  *  http://www.unicode.org/roadmaps/
  */
 
 #define IS_IN_BMP_RTL_BLOCK(c) ((0x590 <= (c)) && ((c) <= 0x8ff))
 #define IS_RTL_PRESENTATION_FORM(c) (((0xfb1d <= (c)) && ((c) <= 0xfdff)) || \
                                      ((0xfe70 <= (c)) && ((c) <= 0xfefc)))
 #define IS_IN_SMP_RTL_BLOCK(c) (((0x10800 <= (c)) && ((c) <= 0x10fff)) || \
                                 ((0x1e800 <= (c)) && ((c) <= 0x1eFFF)))
-#define UCS2_CHAR_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
-                              (IS_RTL_PRESENTATION_FORM(c)) || \
-                              (c) == 0xD802 || (c) == 0xD803)
+// Due to the supplementary-plane RTL blocks being identifiable from the
+// high surrogate without examining the low surrogate, it is correct to
+// use this by-code-unit check on potentially astral text without doing
+// the math to decode surrogate pairs into code points. However, unpaired
+// high surrogates that are RTL high surrogates then count as RTL even
+// though, if replaced by the REPLACEMENT CHARACTER, it would not be
+// RTL.
+#define UTF16_CODE_UNIT_IS_BIDI(c) ((IS_IN_BMP_RTL_BLOCK(c)) || \
+                                    (IS_RTL_PRESENTATION_FORM(c)) || \
+                                    (c) == 0xD802 || (c) == 0xD803 || \
+                                    (c) == 0xD83A || (c) == 0xD83B)
 #define UTF32_CHAR_IS_BIDI(c)  ((IS_IN_BMP_RTL_BLOCK(c)) || \
                                (IS_RTL_PRESENTATION_FORM(c)) || \
                                (IS_IN_SMP_RTL_BLOCK(c)))
 #endif  /* nsBidiUtils_h__ */
--- a/layout/xul/nsTextBoxFrame.cpp
+++ b/layout/xul/nsTextBoxFrame.cpp
@@ -696,17 +696,17 @@ nsTextBoxFrame::CalculateTitleForWidth(g
                 ptrdiff_t length = nextPos - pos;
                 charWidth = nsLayoutUtils::AppUnitWidthOfString(pos, length,
                                                                 *fm,
                                                                 drawTarget);
                 if (totalWidth + charWidth > aWidth) {
                     break;
                 }
 
-                if (UCS2_CHAR_IS_BIDI(*pos)) {
+                if (UTF16_CODE_UNIT_IS_BIDI(*pos)) {
                     AddStateBits(NS_FRAME_IS_BIDI);
                 }
                 pos = nextPos;
                 totalWidth += charWidth;
             }
 
             if (pos == dataBegin) {
                 return titleWidth;
@@ -733,17 +733,17 @@ nsTextBoxFrame::CalculateTitleForWidth(g
                 ptrdiff_t length = prevPos - pos;
                 charWidth = nsLayoutUtils::AppUnitWidthOfString(pos, length,
                                                                 *fm,
                                                                 drawTarget);
                 if (totalWidth + charWidth > aWidth) {
                     break;
                 }
 
-                if (UCS2_CHAR_IS_BIDI(*pos)) {
+                if (UTF16_CODE_UNIT_IS_BIDI(*pos)) {
                     AddStateBits(NS_FRAME_IS_BIDI);
                 }
                 prevPos = pos;
                 totalWidth += charWidth;
             }
 
             if (prevPos == dataEnd) {
                 return titleWidth;
@@ -785,17 +785,17 @@ nsTextBoxFrame::CalculateTitleForWidth(g
                 length = pos - leftPos;
                 charWidth = nsLayoutUtils::AppUnitWidthOfString(leftPos, length,
                                                                 *fm,
                                                                 drawTarget);
                 if (totalWidth + charWidth > aWidth) {
                     break;
                 }
 
-                if (UCS2_CHAR_IS_BIDI(*leftPos)) {
+                if (UTF16_CODE_UNIT_IS_BIDI(*leftPos)) {
                     AddStateBits(NS_FRAME_IS_BIDI);
                 }
 
                 leftString.Append(leftPos, length);
                 leftPos = pos;
                 totalWidth += charWidth;
 
                 if (leftPos >= rightPos) {
@@ -807,17 +807,17 @@ nsTextBoxFrame::CalculateTitleForWidth(g
                 length = rightPos - pos;
                 charWidth = nsLayoutUtils::AppUnitWidthOfString(pos, length,
                                                                 *fm,
                                                                 drawTarget);
                 if (totalWidth + charWidth > aWidth) {
                     break;
                 }
 
-                if (UCS2_CHAR_IS_BIDI(*pos)) {
+                if (UTF16_CODE_UNIT_IS_BIDI(*pos)) {
                     AddStateBits(NS_FRAME_IS_BIDI);
                 }
 
                 rightString.Insert(pos, 0, length);
                 rightPos = pos;
                 totalWidth += charWidth;
             }
 
--- a/widget/cocoa/TextInputHandler.mm
+++ b/widget/cocoa/TextInputHandler.mm
@@ -739,17 +739,17 @@ bool
 TISInputSourceWrapper::IsForRTLLanguage()
 {
   if (mIsRTL < 0) {
     // Get the input character of the 'A' key of ANSI keyboard layout.
     nsAutoString str;
     bool ret = TranslateToString(kVK_ANSI_A, 0, eKbdType_ANSI, str);
     NS_ENSURE_TRUE(ret, ret);
     char16_t ch = str.IsEmpty() ? char16_t(0) : str.CharAt(0);
-    mIsRTL = UCS2_CHAR_IS_BIDI(ch);
+    mIsRTL = UTF16_CODE_UNIT_IS_BIDI(ch);
   }
   return mIsRTL != 0;
 }
 
 bool
 TISInputSourceWrapper::IsInitializedByCurrentInputSource()
 {
   return mInputSource == ::TISCopyCurrentKeyboardInputSource();
@@ -3292,22 +3292,22 @@ IMEInputHandler::GetRangeCount(NSAttribu
   NS_OBJC_BEGIN_TRY_ABORT_BLOCK_RETURN;
 
   // Iterate through aAttrString for the NSUnderlineStyleAttributeName and
   // count the different segments adjusting limitRange as we go.
   uint32_t count = 0;
   NSRange effectiveRange;
   NSRange limitRange = NSMakeRange(0, [aAttrString length]);
   while (limitRange.length > 0) {
-    [aAttrString  attribute:NSUnderlineStyleAttributeName 
-                    atIndex:limitRange.location 
+    [aAttrString  attribute:NSUnderlineStyleAttributeName
+                    atIndex:limitRange.location
       longestEffectiveRange:&effectiveRange
                     inRange:limitRange];
     limitRange =
-      NSMakeRange(NSMaxRange(effectiveRange), 
+      NSMakeRange(NSMaxRange(effectiveRange),
                   NSMaxRange(limitRange) - NSMaxRange(effectiveRange));
     count++;
   }
 
   MOZ_LOG(gLog, LogLevel::Info,
     ("%p IMEInputHandler::GetRangeCount, aAttrString=\"%s\", count=%u",
      this, GetCharacters([aAttrString string]), count));
 
@@ -3353,17 +3353,17 @@ IMEInputHandler::CreateTextRangeArray(NS
 
     MOZ_LOG(gLog, LogLevel::Info,
       ("%p IMEInputHandler::CreateTextRangeArray, "
        "range={ mStartOffset=%u, mEndOffset=%u, mRangeType=%s }",
        this, range.mStartOffset, range.mEndOffset,
        ToChar(range.mRangeType)));
 
     limitRange =
-      NSMakeRange(NSMaxRange(effectiveRange), 
+      NSMakeRange(NSMaxRange(effectiveRange),
                   NSMaxRange(limitRange) - NSMaxRange(effectiveRange));
   }
 
   // Get current caret position.
   TextRange range;
   range.mStartOffset = aSelectedRange.location + aSelectedRange.length;
   range.mEndOffset = range.mStartOffset;
   range.mRangeType = TextRangeType::eCaret;