Bug 1400006 - Extend language negotiation in LocaleService to support looking for the best likelySubtag for the locale with region stripped. r?jfkthame draft
authorZibi Braniecki <zbraniecki@mozilla.com>
Thu, 14 Sep 2017 15:21:33 -0700
changeset 666422 ae9aaab9f914b4028add496f4f25068b2d31d571
parent 666274 ffe6cc09ccf38cca6f0e727837bbc6cb722d1e71
child 732107 88f81084b10e1b90e44eccd5f2da3068af863b8e
push id80409
push userbwerth@mozilla.com
push dateMon, 18 Sep 2017 19:07:18 +0000
reviewersjfkthame
bugs1400006
milestone57.0a1
Bug 1400006 - Extend language negotiation in LocaleService to support looking for the best likelySubtag for the locale with region stripped. r?jfkthame Add additional logic to our language negotation to do apply likelySubtags when a direct match is not available. Currently, if the user specifies the locale with region, and we do not have a direct for that region, we pick all locales for the same language and other regions in no order. The example of where it returns suboptimal results: 1) Requested locale "en-CA" 2) Available locales ["en-ZA", "en-GB", "en-US"] 3) Negotiated locales ["en-ZA", "en-GB", "en-US"] This would not happen, if the user requested a generic "de", "en" etc.: 1) Requested locale "en" 2) Available locales ["en-ZA", "en-GB", "en-US"] 3) Negotiated locales ["en-US", "en-ZA", "en-GB"] because after not finding a direct match, we would use likelySubtags to extend "en" to "en-Latn-US" and then find the priority match in "en-US". This patch extends this logic to "en-US" or "de-LU" by adding a step which strips the region tag and then applies likelySubtag on the result. This means that in absence of direct match the following fallbacks would happen: "de-LU" -> "de-DE" "es-CL" -> "es-ES" "en-CA" -> "en-US" This does not affect languages that use multiple scripts, so ar, sr and zh are not affected. MozReview-Commit-ID: BR1WrgXSf6a
intl/locale/LocaleService.cpp
intl/locale/LocaleService.h
intl/locale/tests/unit/test_localeService_negotiateLanguages.js
--- a/intl/locale/LocaleService.cpp
+++ b/intl/locale/LocaleService.cpp
@@ -345,17 +345,17 @@ LocaleService::OnLocalesChanged()
             case LangNegStrategy::Filtering: \
               break; \
           }
 
 /**
  * This is the raw algorithm for language negotiation based roughly
  * on RFC4647 language filtering, with changes from LDML language matching.
  *
- * The exact algorithm is custom, and consist of 5 level strategy:
+ * The exact algorithm is custom, and consists of a 6 level strategy:
  *
  * 1) Attempt to find an exact match for each requested locale in available
  *    locales.
  *    Example: ['en-US'] * ['en-US'] = ['en-US']
  *
  * 2) Attempt to match a requested locale to an available locale treated
  *    as a locale range.
  *    Example: ['en-US'] * ['en'] = ['en']
@@ -368,17 +368,24 @@ LocaleService::OnLocalesChanged()
  *               ^^
  *               |-- ICU likelySubtags expands it to 'en-Latn-US'
  *
  * 4) Attempt to look up for a different variant of the same locale.
  *    Example: ['ja-JP-win'] * ['ja-JP-mac'] = ['ja-JP-mac']
  *               ^^^^^^^^^
  *               |----------- replace variant with range: 'ja-JP-*'
  *
- * 5) Attempt to look up for a different region of the same locale.
+ * 5) Attempt to look up for a maximized version of the requested locale,
+ *    stripped of the region code.
+ *    Example: ['en-CA'] * ['en-ZA', 'en-US'] = ['en-US', 'en-ZA']
+ *               ^^^^^
+ *               |----------- look for likelySubtag of 'en': 'en-Latn-US'
+ *
+ *
+ * 6) Attempt to look up for a different region of the same locale.
  *    Example: ['en-GB'] * ['en-AU'] = ['en-AU']
  *               ^^^^^
  *               |----- replace region with range: 'en-*'
  *
  * It uses one of the strategies described in LocaleService.h.
  */
 void
 LocaleService::FilterMatches(const nsTArray<nsCString>& aRequested,
@@ -454,17 +461,25 @@ LocaleService::FilterMatches(const nsTAr
     }
 
     // 4) Try to match against a variant as a range
     requestedLocale.SetVariantRange();
     if (findRangeMatches(requestedLocale)) {
       HANDLE_STRATEGY;
     }
 
-    // 5) Try to match against a region as a range
+    // 5) Try to match against the likely subtag without region
+    if (requestedLocale.AddLikelySubtagsWithoutRegion()) {
+      if (findRangeMatches(requestedLocale)) {
+        HANDLE_STRATEGY;
+      }
+    }
+
+
+    // 6) Try to match against a region as a range
     requestedLocale.SetRegionRange();
     if (findRangeMatches(requestedLocale)) {
       HANDLE_STRATEGY;
     }
   }
 }
 
 bool
@@ -817,38 +832,63 @@ void
 LocaleService::Locale::SetRegionRange()
 {
   mRegion.AssignLiteral("*");
 }
 
 bool
 LocaleService::Locale::AddLikelySubtags()
 {
+  return AddLikelySubtagsForLocale(mLocaleStr);
+}
+
+bool
+LocaleService::Locale::AddLikelySubtagsWithoutRegion()
+{
+  nsAutoCString locale(mLanguage);
+
+  if (!mScript.IsEmpty()) {
+    locale.Append("-");
+    locale.Append(mScript);
+  }
+
+  // We don't add variant here because likelySubtag doesn't care about it.
+
+  return AddLikelySubtagsForLocale(locale);
+}
+
+bool
+LocaleService::Locale::AddLikelySubtagsForLocale(const nsACString& aLocale)
+{
 #ifdef ENABLE_INTL_API
   const int32_t kLocaleMax = 160;
   char maxLocale[kLocaleMax];
+  nsAutoCString locale(aLocale);
 
   UErrorCode status = U_ZERO_ERROR;
-  uloc_addLikelySubtags(mLocaleStr.get(), maxLocale, kLocaleMax, &status);
+  uloc_addLikelySubtags(locale.get(), maxLocale, kLocaleMax, &status);
 
   if (U_FAILURE(status)) {
     return false;
   }
 
   nsDependentCString maxLocStr(maxLocale);
   Locale loc = Locale(maxLocStr, false);
 
   if (loc == *this) {
     return false;
   }
 
   mLanguage = loc.mLanguage;
   mScript = loc.mScript;
   mRegion = loc.mRegion;
-  mVariant = loc.mVariant;
+
+  // We don't update variant from likelySubtag since it's not going to
+  // provide it and we want to preserve the range
+
   return true;
 #else
   return false;
 #endif
 }
 
 NS_IMETHODIMP
 LocaleService::GetRequestedLocales(uint32_t* aCount, char*** aOutArray)
--- a/intl/locale/LocaleService.h
+++ b/intl/locale/LocaleService.h
@@ -266,17 +266,19 @@ private:
     Locale(const nsCString& aLocale, bool aRange);
 
     bool Matches(const Locale& aLocale) const;
     bool LanguageMatches(const Locale& aLocale) const;
 
     void SetVariantRange();
     void SetRegionRange();
 
-    bool AddLikelySubtags(); // returns false if nothing changed
+    // returns false if nothing changed
+    bool AddLikelySubtags();
+    bool AddLikelySubtagsWithoutRegion();
 
     const nsCString& AsString() const {
       return mLocaleStr;
     }
 
     bool operator== (const Locale& aOther) {
       const auto& cmp = nsCaseInsensitiveCStringComparator();
       return mLanguage.Equals(aOther.mLanguage, cmp) &&
@@ -286,16 +288,18 @@ private:
     }
 
   private:
     const nsCString& mLocaleStr;
     nsCString mLanguage;
     nsCString mScript;
     nsCString mRegion;
     nsCString mVariant;
+
+    bool AddLikelySubtagsForLocale(const nsACString& aLocale);
   };
 
   void FilterMatches(const nsTArray<nsCString>& aRequested,
                      const nsTArray<nsCString>& aAvailable,
                      LangNegStrategy aStrategy,
                      nsTArray<nsCString>& aRetVal);
 
   void NegotiateAppLocales(nsTArray<nsCString>& aRetVal);
--- a/intl/locale/tests/unit/test_localeService_negotiateLanguages.js
+++ b/intl/locale/tests/unit/test_localeService_negotiateLanguages.js
@@ -30,16 +30,19 @@ const data = {
       [["fr"], ["fr-CA", "fr-FR"], ["fr-FR", "fr-CA"]],
       [["az-IR"], ["az-Latn", "az-Arab"], ["az-Arab"]],
       [["sr-RU"], ["sr-Cyrl", "sr-Latn"], ["sr-Latn"]],
       [["sr"], ["sr-Latn", "sr-Cyrl"], ["sr-Cyrl"]],
       [["zh-GB"], ["zh-Hans", "zh-Hant"], ["zh-Hant"]],
       [["sr", "ru"], ["sr-Latn", "ru"], ["ru"]],
       [["sr-RU"], ["sr-Latn-RO", "sr-Cyrl"], ["sr-Latn-RO"]],
     ],
+    "should match likelySubtag region over other regions": [
+      [["en-CA"], ["en-ZA", "en-GB", "en-US"], ["en-US", "en-ZA", "en-GB"]],
+    ],
     "should match on a requested locale as a range": [
       [["en-*-US"], ["en-US"], ["en-US"]],
       [["en-Latn-US-*"], ["en-Latn-US"], ["en-Latn-US"]],
       [["en-*-US-*"], ["en-US"], ["en-US"]],
     ],
     "should match cross-region": [
       [["en"], ["en-US"], ["en-US"]],
       [["en-US"], ["en-GB"], ["en-GB"]],