--- a/intl/locale/LocaleService.cpp
+++ b/intl/locale/LocaleService.cpp
@@ -1,20 +1,25 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "LocaleService.h"
+#include <algorithm> // find_if()
#include "mozilla/ClearOnShutdown.h"
#include "mozilla/Services.h"
#include "nsIObserverService.h"
#include "nsIToolkitChromeRegistry.h"
+#ifdef ENABLE_INTL_API
+#include "unicode/uloc.h"
+#endif
+
using namespace mozilla::intl;
NS_IMPL_ISUPPORTS(LocaleService, mozILocaleService)
mozilla::StaticRefPtr<LocaleService> LocaleService::sInstance;
/**
* This function performs the actual language negotiation for the API.
@@ -72,16 +77,180 @@ LocaleService::Refresh()
mAppLocales = Move(newLocales);
nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
if (obs) {
obs->NotifyObservers(nullptr, "intl:app-locales-changed", nullptr);
}
}
}
+// After trying each step of the negotiation algorithm for each requested locale,
+// if a match was found we use this macro to decide whether to return immediately,
+// skip to the next requested locale, or continue searching for additional matches,
+// according to the desired negotiation strategy.
+#define HANDLE_STRATEGY \
+ switch (aStrategy) { \
+ case LangNegStrategy::Lookup: \
+ return; \
+ case LangNegStrategy::Matching: \
+ continue; \
+ case LangNegStrategy::Filtering: \
+ break; \
+ }
+
+/**
+ * This is the raw algorithm for language negotiation based roughly
+ * on RFC4647 language filtering, with changes from LDML language matching.
+ *
+ * The exact algorithm is custom, and consist of 5 level strategy:
+ *
+ * 1) Attempt to find an exact match for each requested locale in available
+ * locales.
+ * Example: ['en-US'] * ['en-US'] = ['en-US']
+ *
+ * 2) Attempt to match a requested locale to an available locale treated
+ * as a locale range.
+ * Example: ['en-US'] * ['en'] = ['en']
+ * ^^
+ * |-- becomes 'en-*-*-*'
+ *
+ * 3) Attempt to use the maximized version of the requested locale, to
+ * find the best match in available locales.
+ * Example: ['en'] * ['en-GB', 'en-US'] = ['en-US']
+ * ^^
+ * |-- ICU likelySubtags expands it to 'en-Latn-US'
+ *
+ * 4) Attempt to look up for a different variant of the same locale.
+ * Example: ['ja-JP-win'] * ['ja-JP-mac'] = ['ja-JP-mac']
+ * ^^^^^^^^^
+ * |----------- replace variant with range: 'ja-JP-*'
+ *
+ * 5) Attempt to look up for a different region of the same locale.
+ * Example: ['en-GB'] * ['en-AU'] = ['en-AU']
+ * ^^^^^
+ * |----- replace region with range: 'en-*'
+ *
+ * It uses one of the strategies described in LocaleService.h.
+ */
+void
+LocaleService::FilterMatches(const nsTArray<nsCString>& aRequested,
+ const nsTArray<nsCString>& aAvailable,
+ LangNegStrategy aStrategy,
+ nsTArray<nsCString>& aRetVal)
+{
+ // Local copy of the list of available locales, in Locale form for flexible
+ // matching. We will remove entries from this list as they get appended to
+ // aRetVal, so that no available locale will be found more than once.
+ AutoTArray<Locale, 100> availLocales;
+ for (auto& avail : aAvailable) {
+ availLocales.AppendElement(Locale(avail, true));
+ }
+
+ // Helper to erase an entry from availLocales once we have copied it to
+ // the result list. Returns an iterator pointing to the entry that was
+ // immediately after the one that was erased (or availLocales.end() if
+ // the target was the last in the array).
+ auto eraseFromAvail = [&](nsTArray<Locale>::iterator aIter) {
+ nsTArray<Locale>::size_type index = aIter - availLocales.begin();
+ availLocales.RemoveElementAt(index);
+ return availLocales.begin() + index;
+ };
+
+ for (auto& requested : aRequested) {
+
+ // 1) Try to find a simple (case-insensitive) string match for the request.
+ auto matchesExactly = [&](const Locale& aLoc) {
+ return requested.Equals(aLoc.AsString(),
+ nsCaseInsensitiveCStringComparator());
+ };
+ auto match = std::find_if(availLocales.begin(), availLocales.end(),
+ matchesExactly);
+ if (match != availLocales.end()) {
+ aRetVal.AppendElement(match->AsString());
+ eraseFromAvail(match);
+ }
+
+ if (!aRetVal.IsEmpty()) {
+ HANDLE_STRATEGY;
+ }
+
+ // 2) Try to match against the available locales treated as ranges.
+ auto findRangeMatches = [&](const Locale& aReq) {
+ auto matchesRange = [&](const Locale& aLoc) {
+ return aReq.Matches(aLoc);
+ };
+ bool foundMatch = false;
+ auto match = availLocales.begin();
+ while ((match = std::find_if(match, availLocales.end(),
+ matchesRange)) != availLocales.end()) {
+ aRetVal.AppendElement(match->AsString());
+ match = eraseFromAvail(match);
+ foundMatch = true;
+ if (aStrategy != LangNegStrategy::Filtering) {
+ return true; // we only want the first match
+ }
+ }
+ return foundMatch;
+ };
+
+ Locale requestedLocale = Locale(requested, false);
+ if (findRangeMatches(requestedLocale)) {
+ HANDLE_STRATEGY;
+ }
+
+ // 3) Try to match against a maximized version of the requested locale
+ if (requestedLocale.AddLikelySubtags()) {
+ if (findRangeMatches(requestedLocale)) {
+ HANDLE_STRATEGY;
+ }
+ }
+
+ // 4) Try to match against a variant as a range
+ requestedLocale.SetVariantRange();
+ if (findRangeMatches(requestedLocale)) {
+ HANDLE_STRATEGY;
+ }
+
+ // 5) Try to match against a region as a range
+ requestedLocale.SetRegionRange();
+ if (findRangeMatches(requestedLocale)) {
+ HANDLE_STRATEGY;
+ }
+ }
+}
+
+bool
+LocaleService::NegotiateLanguages(const nsTArray<nsCString>& aRequested,
+ const nsTArray<nsCString>& aAvailable,
+ const nsACString& aDefaultLocale,
+ LangNegStrategy aStrategy,
+ nsTArray<nsCString>& aRetVal)
+{
+ // If the strategy is Lookup, we require the defaultLocale to be set.
+ if (aStrategy == LangNegStrategy::Lookup && aDefaultLocale.IsEmpty()) {
+ return false;
+ }
+
+ FilterMatches(aRequested, aAvailable, aStrategy, aRetVal);
+
+ if (aStrategy == LangNegStrategy::Lookup) {
+ if (aRetVal.Length() == 0) {
+ // If the strategy is Lookup and Filtering returned no matches, use
+ // the default locale.
+ aRetVal.AppendElement(aDefaultLocale);
+ }
+ } else if (!aDefaultLocale.IsEmpty() && !aRetVal.Contains(aDefaultLocale)) {
+ // If it's not a Lookup strategy, add the default locale only if it's
+ // set and it's not in the results already.
+ aRetVal.AppendElement(aDefaultLocale);
+ }
+ return true;
+}
+
+
/**
* mozILocaleService methods
*/
NS_IMETHODIMP
LocaleService::GetAppLocales(uint32_t* aCount, char*** aOutArray)
{
if (mAppLocales.IsEmpty()) {
ReadAppLocales(mAppLocales);
@@ -101,8 +270,203 @@ NS_IMETHODIMP
LocaleService::GetAppLocale(nsACString& aRetVal)
{
if (mAppLocales.IsEmpty()) {
ReadAppLocales(mAppLocales);
}
aRetVal = mAppLocales[0];
return NS_OK;
}
+
+static LocaleService::LangNegStrategy
+ToLangNegStrategy(int32_t aStrategy)
+{
+ switch (aStrategy) {
+ case 1:
+ return LocaleService::LangNegStrategy::Matching;
+ case 2:
+ return LocaleService::LangNegStrategy::Lookup;
+ default:
+ return LocaleService::LangNegStrategy::Filtering;
+ }
+}
+
+NS_IMETHODIMP
+LocaleService::NegotiateLanguages(const char** aRequested,
+ const char** aAvailable,
+ const char* aDefaultLocale,
+ int32_t aStrategy,
+ uint32_t aRequestedCount,
+ uint32_t aAvailableCount,
+ uint32_t* aCount, char*** aRetVal)
+{
+ if (aStrategy < 0 || aStrategy > 2) {
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ // Check that the given string contains only ASCII characters valid in tags
+ // (i.e. alphanumerics, plus '-' and '_'), and is non-empty.
+ auto validTagChars = [](const char* s) {
+ if (!*s) {
+ return false;
+ }
+ while (*s) {
+ if (isalnum((unsigned char)*s) || *s == '-' || *s == '_' || *s == '*') {
+ s++;
+ } else {
+ return false;
+ }
+ }
+ return true;
+ };
+
+ AutoTArray<nsCString, 100> requestedLocales;
+ for (uint32_t i = 0; i < aRequestedCount; i++) {
+ if (!validTagChars(aRequested[i])) {
+ continue;
+ }
+ requestedLocales.AppendElement(aRequested[i]);
+ }
+
+ AutoTArray<nsCString, 100> availableLocales;
+ for (uint32_t i = 0; i < aAvailableCount; i++) {
+ if (!validTagChars(aAvailable[i])) {
+ continue;
+ }
+ availableLocales.AppendElement(aAvailable[i]);
+ }
+
+ nsAutoCString defaultLocale(aDefaultLocale);
+
+ LangNegStrategy strategy = ToLangNegStrategy(aStrategy);
+
+ AutoTArray<nsCString, 100> supportedLocales;
+ bool result = NegotiateLanguages(requestedLocales, availableLocales,
+ defaultLocale, strategy, supportedLocales);
+
+ if (!result) {
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ *aRetVal =
+ static_cast<char**>(moz_xmalloc(sizeof(char*) * supportedLocales.Length()));
+
+ *aCount = 0;
+ for (const auto& supported : supportedLocales) {
+ (*aRetVal)[(*aCount)++] = moz_xstrdup(supported.get());
+ }
+
+ return NS_OK;
+}
+
+LocaleService::Locale::Locale(const nsCString& aLocale, bool aRange)
+ : mLocaleStr(aLocale)
+{
+ int32_t partNum = 0;
+
+ nsAutoCString normLocale(aLocale);
+ normLocale.ReplaceChar('_', '-');
+
+ for (const nsCSubstring& part : normLocale.Split('-')) {
+ switch (partNum) {
+ case 0:
+ if (part.EqualsLiteral("*") ||
+ part.Length() == 2 || part.Length() == 3) {
+ mLanguage.Assign(part);
+ }
+ break;
+ case 1:
+ if (part.EqualsLiteral("*") || part.Length() == 4) {
+ mScript.Assign(part);
+ break;
+ }
+
+ // fallover to region case
+ partNum++;
+ MOZ_FALLTHROUGH;
+ case 2:
+ if (part.EqualsLiteral("*") || part.Length() == 2) {
+ mRegion.Assign(part);
+ }
+ break;
+ case 3:
+ if (part.EqualsLiteral("*") || part.Length() == 3) {
+ mVariant.Assign(part);
+ }
+ break;
+ }
+ partNum++;
+ }
+
+ if (aRange) {
+ if (mLanguage.IsEmpty()) {
+ mLanguage.Assign(NS_LITERAL_CSTRING("*"));
+ }
+ if (mScript.IsEmpty()) {
+ mScript.Assign(NS_LITERAL_CSTRING("*"));
+ }
+ if (mRegion.IsEmpty()) {
+ mRegion.Assign(NS_LITERAL_CSTRING("*"));
+ }
+ if (mVariant.IsEmpty()) {
+ mVariant.Assign(NS_LITERAL_CSTRING("*"));
+ }
+ }
+}
+
+bool
+LocaleService::Locale::Matches(const LocaleService::Locale& aLocale) const
+{
+ auto subtagMatches = [](const nsCString& aSubtag1,
+ const nsCString& aSubtag2) {
+ return aSubtag1.EqualsLiteral("*") ||
+ aSubtag2.EqualsLiteral("*") ||
+ aSubtag1.Equals(aSubtag2, nsCaseInsensitiveCStringComparator());
+ };
+
+ return subtagMatches(mLanguage, aLocale.mLanguage) &&
+ subtagMatches(mScript, aLocale.mScript) &&
+ subtagMatches(mRegion, aLocale.mRegion) &&
+ subtagMatches(mVariant, aLocale.mVariant);
+}
+
+void
+LocaleService::Locale::SetVariantRange()
+{
+ mVariant.AssignLiteral("*");
+}
+
+void
+LocaleService::Locale::SetRegionRange()
+{
+ mRegion.AssignLiteral("*");
+}
+
+bool
+LocaleService::Locale::AddLikelySubtags()
+{
+#ifdef ENABLE_INTL_API
+ const int32_t kLocaleMax = 160;
+ char maxLocale[kLocaleMax];
+
+ UErrorCode status = U_ZERO_ERROR;
+ uloc_addLikelySubtags(mLocaleStr.get(), maxLocale, kLocaleMax, &status);
+
+ if (U_FAILURE(status)) {
+ return false;
+ }
+
+ nsDependentCString maxLocStr(maxLocale);
+ Locale loc = Locale(maxLocStr, false);
+
+ if (loc == *this) {
+ return false;
+ }
+
+ mLanguage = loc.mLanguage;
+ mScript = loc.mScript;
+ mRegion = loc.mRegion;
+ mVariant = loc.mVariant;
+ return true;
+#else
+ return false;
+#endif
+}
new file mode 100644
--- /dev/null
+++ b/intl/locale/tests/unit/test_localeService_negotiateLanguages.js
@@ -0,0 +1,143 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const localeService =
+ Components.classes["@mozilla.org/intl/localeservice;1"]
+ .getService(Components.interfaces.mozILocaleService);
+
+const data = {
+ "filtering": {
+ "exact match": [
+ [["en"], ["en"], ["en"]],
+ [["en-US"], ["en-US"], ["en-US"]],
+ [["en-Latn-US"], ["en-Latn-US"], ["en-Latn-US"]],
+ [["en-Latn-US-mac"], ["en-Latn-US-mac"], ["en-Latn-US-mac"]],
+ [["fr-FR"], ["de", "it", "fr-FR"], ["fr-FR"]],
+ [["fr", "pl", "de-DE"], ["pl", "en-US", "de-DE"], ["pl", "de-DE"]],
+ ],
+ "available as range": [
+ [["en-US"], ["en"], ["en"]],
+ [["en-Latn-US"], ["en-US"], ["en-US"]],
+ [["en-US-mac"], ["en-US"], ["en-US"]],
+ [["fr-CA", "de-DE"], ["fr", "it", "de"], ["fr", "de"]],
+ [["ja-JP-mac"], ["ja"], ["ja"]],
+ [["en-Latn-GB", "en-Latn-IN"], ["en-IN", "en-GB"], ["en-GB", "en-IN"]],
+ ],
+ "should match on likely subtag": [
+ [["en"], ["en-GB", "de", "en-US"], ["en-US", "en-GB"]],
+ [["en"], ["en-Latn-GB", "de", "en-Latn-US"], ["en-Latn-US", "en-Latn-GB"]],
+ [["fr"], ["fr-CA", "fr-FR"], ["fr-FR", "fr-CA"]],
+ [["az-IR"], ["az-Latn", "az-Arab"], ["az-Arab"]],
+ [["sr-RU"], ["sr-Cyrl", "sr-Latn"], ["sr-Latn"]],
+ [["sr"], ["sr-Latn", "sr-Cyrl"], ["sr-Cyrl"]],
+ [["zh-GB"], ["zh-Hans", "zh-Hant"], ["zh-Hant"]],
+ [["sr", "ru"], ["sr-Latn", "ru"], ["ru"]],
+ [["sr-RU"], ["sr-Latn-RO", "sr-Cyrl"], ["sr-Latn-RO"]],
+ ],
+ "should match on a requested locale as a range": [
+ [["en-*-US"], ["en-US"], ["en-US"]],
+ [["en-Latn-US-*"], ["en-Latn-US"], ["en-Latn-US"]],
+ [["en-*-US-*"], ["en-US"], ["en-US"]],
+ ],
+ "should match cross-region": [
+ [["en"], ["en-US"], ["en-US"]],
+ [["en-US"], ["en-GB"], ["en-GB"]],
+ [["en-Latn-US"], ["en-Latn-GB"], ["en-Latn-GB"]],
+ // This is a cross-region check, because the requested Locale
+ // is really lang: en, script: *, region: undefined
+ [["en-*"], ["en-US"], ["en-US"]],
+ ],
+ "should match cross-variant": [
+ [["en-US-mac"], ["en-US-win"], ["en-US-win"]],
+ ],
+ "should prioritize properly": [
+ // exact match first
+ [["en-US"], ["en-US-mac", "en", "en-US"], ["en-US", "en", "en-US-mac"]],
+ // available as range second
+ [["en-Latn-US"], ["en-GB", "en-US"], ["en-US", "en-GB"]],
+ // likely subtags third
+ [["en"], ["en-Cyrl-US", "en-Latn-US"], ["en-Latn-US"]],
+ // variant range fourth
+ [["en-US-mac"], ["en-US-win", "en-GB-mac"], ["en-US-win", "en-GB-mac"]],
+ // regional range fifth
+ [["en-US-mac"], ["en-GB-win"], ["en-GB-win"]],
+ ],
+ "should prioritize properly (extra tests)": [
+ [["en-US"], ["en-GB", "en"], ["en", "en-GB"]],
+ ],
+ "should handle default locale properly": [
+ [["fr"], ["de", "it"], []],
+ [["fr"], ["de", "it"], "en-US", ["en-US"]],
+ [["fr"], ["de", "en-US"], "en-US", ["en-US"]],
+ [["fr", "de-DE"], ["de-DE", "fr-CA"], "en-US", ["fr-CA", "de-DE", "en-US"]],
+ ],
+ "should handle all matches on the 1st higher than any on the 2nd": [
+ [["fr-CA-mac", "de-DE"], ["de-DE", "fr-FR-win"], ["fr-FR-win", "de-DE"]],
+ ],
+ "should handle cases and underscores": [
+ [["fr_FR"], ["fr-FR"], ["fr-FR"]],
+ [["fr_fr"], ["fr-fr"], ["fr-fr"]],
+ [["fr_Fr"], ["fr-fR"], ["fr-fR"]],
+ [["fr_lAtN_fr"], ["fr-Latn-FR"], ["fr-Latn-FR"]],
+ [["fr_FR"], ["fr_FR"], ["fr_FR"]],
+ [["fr-FR"], ["fr_FR"], ["fr_FR"]],
+ [["fr_Cyrl_FR_mac"], ["fr_Cyrl_fr-mac"], ["fr_Cyrl_fr-mac"]],
+ ],
+ "should not crash on invalid input": [
+ [null, ["fr-FR"], []],
+ [undefined, ["fr-FR"], []],
+ [2, ["fr-FR"], []],
+ ["fr-FR", ["fr-FR"], []],
+ [["fr-FR"], null, []],
+ [["fr-FR"], undefined, []],
+ [["fr-FR"], 2, []],
+ [["fr-FR"], "fr-FR", []],
+ [["2"], ["ąóżł"], []],
+ [[[]], ["fr-FR"], []],
+ [[[]], [[2]], []],
+ ],
+ },
+ "matching": {
+ "should match only one per requested": [
+ [
+ ["fr", "en"],
+ ["en-US", "fr-FR", "en", "fr"], null,
+ localeService.langNegStrategyMatching, ["fr", "en"]
+ ],
+ ]
+ },
+ "lookup": {
+ "should match only one": [
+ [
+ ["fr-FR", "en"],
+ ["en-US", "fr-FR", "en", "fr"], 'en-US',
+ localeService.langNegStrategyLookup, ["fr-FR"]
+ ],
+ ]
+ }
+};
+
+function run_test()
+{
+
+ const nl = localeService.negotiateLanguages;
+
+ const json = JSON.stringify;
+ for (const strategy in data) {
+ for (const groupName in data[strategy]) {
+ const group = data[strategy][groupName];
+ for (const test of group) {
+ const requested = test[0];
+ const available = test[1];
+ const defaultLocale = test.length > 3 ? test[2] : undefined;
+ const strategy = test.length > 4 ? test[3] : undefined;
+ const supported = test[test.length - 1];
+
+ const result = nl(test[0], test[1], defaultLocale, strategy);
+ deepEqual(result, supported,
+ `\nExpected ${json(requested)} * ${json(available)} = ${json(supported)}.\n`);
+ }
+ }
+ }
+}