Bug 1372456 - Convert ISO 3166 code to UN M49 region code draft
authorThomas Nguyen <tnguyen@mozilla.com>
Mon, 31 Jul 2017 15:26:39 +0800
changeset 618313 3b62681375a9a8bc4e27241f34b48b4a8a97e1ac
parent 616205 f1693d664f8e8ee4c79801630c181c28095cad56
child 640036 c2c678f3e3766989c5ae1f6892d5b436d04587bf
push id71297
push userbmo:tnguyen@mozilla.com
push dateMon, 31 Jul 2017 07:46:57 +0000
bugs1372456
milestone56.0a1
Bug 1372456 - Convert ISO 3166 code to UN M49 region code MozReview-Commit-ID: EmLM7sq6p1L
toolkit/components/url-classifier/nsUrlClassifierUtils.cpp
toolkit/components/url-classifier/nsUrlClassifierUtils.h
--- a/toolkit/components/url-classifier/nsUrlClassifierUtils.cpp
+++ b/toolkit/components/url-classifier/nsUrlClassifierUtils.cpp
@@ -11,16 +11,306 @@
 #include "plbase64.h"
 #include "nsPrintfCString.h"
 #include "safebrowsing.pb.h"
 #include "mozilla/Sprintf.h"
 #include "mozilla/Mutex.h"
 
 #define DEFAULT_PROTOCOL_VERSION "2.2"
 
+// Table to look up UN M.49 region code from ISO 3166 alpha 2 country code.
+// M.49 supranational regions are listed in
+// https://en.wikipedia.org/wiki/UN_M.49
+// Sorted in alphabetical order for binary search
+static const struct {
+  const char* isoCode;
+  const char* m49Code;
+} ISO_M49_MAP_TABLE[] = {
+  { "ad", "39" },
+  { "ae", "145" },
+  { "af", "34" },
+  { "ag", "29" },
+  { "ai", "29" },
+  { "al", "39" },
+  { "am", "145" },
+  { "ao", "17" },
+  // { "aq" },", "
+  { "ar", "5" },
+  { "as", "61" },
+  { "at", "155" },
+  { "au", "53" },
+  { "aw", "29" },
+  { "ax", "154" },
+  { "az", "145" },
+  { "ba", "39" },
+  { "bb", "29" },
+  { "bd", "34" },
+  { "be", "155" },
+  { "bf", "11" },
+  { "bg", "151" },
+  { "bh", "145" },
+  { "bi", "14" },
+  { "bj", "11" },
+  { "bl", "29" },
+  { "bm", "21" },
+  { "bn", "35" },
+  { "bo", "5" },
+  { "bq", "29" },
+  { "br", "5" },
+  { "bs", "29" },
+  { "bt", "34" },
+  // { "bv" },", "
+  { "bw", "18" },
+  { "by", "151" },
+  { "bz", "13" },
+  { "ca", "21" },
+  // { "cc" },", "
+  { "cd", "17" },
+  { "cf", "17" },
+  { "cg", "17" },
+  { "ch", "155" },
+  { "ci", "11" },
+  { "ck", "61" },
+  { "cl", "5" },
+  { "cm", "17" },
+  { "cn", "30" },
+  { "co", "5" },
+  { "cr", "13" },
+  { "cu", "29" },
+  { "cv", "11" },
+  { "cw", "29" },
+  // { "cx" },", "
+  { "cy", "145" },
+  { "cz", "151" },
+  { "de", "155" },
+  { "dj", "14" },
+  { "dk", "154" },
+  { "dm", "29" },
+  { "do", "29" },
+  { "dz", "15" },
+  { "ec", "5" },
+  { "ee", "154" },
+  { "eg", "15" },
+  { "eh", "15" },
+  { "er", "14" },
+  { "es", "39" },
+  { "et", "14" },
+  { "fi", "154" },
+  { "fj", "54" },
+  { "fk", "5" },
+  { "fm", "57" },
+  { "fo", "154" },
+  { "fr", "155" },
+  { "ga", "17" },
+  { "gb", "154" },
+  { "gd", "29" },
+  { "ge", "145" },
+  { "gf", "5" },
+  { "gg", "154" },
+  { "gh", "11" },
+  { "gi", "39" },
+  { "gl", "21" },
+  { "gm", "11" },
+  { "gn", "11" },
+  { "gp", "29" },
+  { "gq", "17" },
+  { "gr", "39" },
+  // { "gs" },", "
+  { "gt", "13" },
+  { "gu", "57" },
+  { "gw", "11" },
+  { "gy", "5" },
+  { "hk", "30" },
+  // { "hm" },", "
+  { "hn", "13" },
+  { "hr", "39" },
+  { "ht", "29" },
+  { "hu", "151" },
+  { "id", "35" },
+  { "ie", "154" },
+  { "il", "145" },
+  { "im", "154" },
+  { "in", "34" },
+  // { "io" },", "
+  { "iq", "145" },
+  { "ir", "34" },
+  { "is", "154" },
+  { "it", "39" },
+  { "je", "154" },
+  { "jm", "29" },
+  { "jo", "145" },
+  { "jp", "30" },
+  { "ke", "14" },
+  { "kg", "143" },
+  { "kh", "35" },
+  { "ki", "57" },
+  { "km", "14" },
+  { "kn", "29" },
+  { "kp", "30" },
+  { "kr", "30" },
+  { "kw", "145" },
+  { "ky", "29" },
+  { "kz", "143" },
+  { "la", "35" },
+  { "lb", "145" },
+  { "lc", "29" },
+  { "li", "155" },
+  { "lk", "34" },
+  { "lr", "11" },
+  { "ls", "18" },
+  { "lt", "154" },
+  { "lu", "155" },
+  { "lv", "154" },
+  { "ly", "15" },
+  { "ma", "15" },
+  { "mc", "155" },
+  { "md", "151" },
+  { "me", "39" },
+  { "mf", "29" },
+  { "mg", "14" },
+  { "mh", "57" },
+  { "mk", "39" },
+  { "ml", "11" },
+  { "mm", "35" },
+  { "mn", "30" },
+  { "mo", "30" },
+  { "mp", "57" },
+  { "mq", "29" },
+  { "mr", "11" },
+  { "ms", "29" },
+  { "mt", "39" },
+  { "mu", "14" },
+  { "mv", "34" },
+  { "mw", "14" },
+  { "mx", "13" },
+  { "my", "35" },
+  { "mz", "14" },
+  { "na", "18" },
+  { "nc", "54" },
+  { "ne", "11" },
+  { "nf", "53" },
+  { "ng", "11" },
+  { "ni", "13" },
+  { "nl", "155" },
+  { "no", "154" },
+  { "np", "34" },
+  { "nr", "57" },
+  { "nu", "61" },
+  { "nz", "53" },
+  { "om", "145" },
+  { "pa", "13" },
+  { "pe", "5" },
+  { "pf", "61" },
+  { "pg", "54" },
+  { "ph", "35" },
+  { "pk", "34" },
+  { "pl", "151" },
+  { "pm", "21" },
+  { "pn", "61" },
+  { "pr", "29" },
+  { "ps", "145" },
+  { "pt", "39" },
+  { "pw", "57" },
+  { "py", "5" },
+  { "qa", "145" },
+  { "re", "14" },
+  { "ro", "151" },
+  { "rs", "39" },
+  { "ru", "151" },
+  { "rw", "14" },
+  { "sa", "145" },
+  { "sb", "54" },
+  { "sc", "14" },
+  { "sd", "15" },
+  { "se", "154" },
+  { "sg", "35" },
+  { "sh", "11" },
+  { "si", "39" },
+  { "sj", "154" },
+  { "sk", "151" },
+  { "sl", "11" },
+  { "sm", "39" },
+  { "sn", "11" },
+  { "so", "14" },
+  { "sr", "5" },
+  { "ss", "14" },
+  { "st", "17" },
+  { "sv", "13" },
+  { "sx", "29" },
+  { "sy", "145" },
+  { "sz", "18" },
+  { "tc", "29" },
+  { "td", "17" },
+  // { "tf" },", "
+  { "tg", "11" },
+  { "th", "35" },
+  { "tj", "143" },
+  { "tk", "61" },
+  { "tl", "35" },
+  { "tm", "143" },
+  { "tn", "15" },
+  { "to", "61" },
+  { "tr", "145" },
+  { "tt", "29" },
+  { "tv", "61" },
+  { "tw", "30" },
+  { "tz", "14" },
+  { "ua", "151" },
+  { "ug", "14" },
+  // { "um" },", "
+  { "us", "21" },
+  { "uy", "5" },
+  { "uz", "143" },
+  { "va", "39" },
+  { "vc", "29" },
+  { "ve", "5" },
+  { "vg", "29" },
+  { "vi", "29" },
+  { "vn", "35" },
+  { "vu", "54" },
+  { "wf", "61" },
+  { "ws", "61" },
+  { "ye", "145" },
+  { "yt", "14" },
+  { "za", "18" },
+  { "zm", "14" },
+  { "zw", "14" },
+};
+
+static const char*
+LookupM49RegionCode(const nsCString& aISOCode)
+{
+  if (aISOCode.IsEmpty()) {
+    return "";
+  }
+
+  // "US" is the default region.
+  if (!strcmp(aISOCode.get(), "us")) {
+    return "21";
+  }
+
+  // Binary search m49 code
+  uint32_t lo = 0;
+  uint32_t hi = ArrayLength(ISO_M49_MAP_TABLE);
+
+  while (lo < hi) {
+    uint32_t mid = (lo + hi) >> 1;
+    int32_t result = strcmp(ISO_M49_MAP_TABLE[mid].isoCode, aISOCode.get());
+    if (result < 0) {
+      lo = mid + 1;
+    } else if (result > 0) {
+      hi = mid;
+    } else {
+      return ISO_M49_MAP_TABLE[mid].m49Code;
+    }
+  }
+
+  return "";
+}
+
 static char int_to_hex_digit(int32_t i)
 {
   NS_ASSERTION((i >= 0) && (i <= 15), "int too big in int_to_hex_digit");
   return static_cast<char>(((i < 10) ? (i + '0') : ((i - 10) + 'A')));
 }
 
 static bool
 IsDecimal(const nsACString & num)
@@ -168,16 +458,27 @@ nsUrlClassifierUtils::Init()
   nsCOMPtr<nsIObserverService> observerService =
       mozilla::services::GetObserverService();
   if (!observerService)
     return NS_ERROR_FAILURE;
 
   observerService->AddObserver(this, "xpcom-shutdown-threads", false);
   Preferences::AddStrongObserver(this, "browser.safebrowsing");
 
+  // XXX no countrycode in preference, wo have to fetch manually
+  // https://mozilla.github.io/ichnaea/api/region.html#api-region-latest
+  nsCOMPtr<nsIPrefBranch> prefBranch =
+    do_GetService(NS_PREFSERVICE_CONTRACTID);
+  nsAutoCString countryCode;
+  Preferences::GetCString("browser.search.countryCode", &countryCode);
+  ToLowerCase(countryCode);
+  mRegionCode = LookupM49RegionCode(countryCode);
+
+  Preferences::AddStrongObserver(this, "browser.search.countryCode");
+
   return NS_OK;
 }
 
 NS_IMPL_ISUPPORTS(nsUrlClassifierUtils,
                   nsIUrlClassifierUtils,
                   nsIObserver)
 
 /////////////////////////////////////////////////////////////////////////////
@@ -488,18 +789,25 @@ nsUrlClassifierUtils::ParseFindFullHashR
 //////////////////////////////////////////////////////////
 // nsIObserver
 
 NS_IMETHODIMP
 nsUrlClassifierUtils::Observe(nsISupports *aSubject, const char *aTopic,
                               const char16_t *aData)
 {
   if (0 == strcmp(aTopic, NS_PREFBRANCH_PREFCHANGE_TOPIC_ID)) {
-    MutexAutoLock lock(mProviderDictLock);
-    return ReadProvidersFromPrefs(mProviderDict);
+    if(!strcmp(aTopic, "browser.search.countryCode")) {
+      nsAutoCString countryCode;
+      Preferences::GetCString("browser.search.countryCode", &countryCode);
+      ToLowerCase(countryCode);
+      mRegionCode = LookupM49RegionCode(countryCode);
+    } else {
+      MutexAutoLock lock(mProviderDictLock);
+      return ReadProvidersFromPrefs(mProviderDict);
+    }
   }
 
   if (0 == strcmp(aTopic, "xpcom-shutdown-threads")) {
     nsCOMPtr<nsIPrefBranch> prefs = do_GetService(NS_PREFSERVICE_CONTRACTID);
     NS_ENSURE_TRUE(prefs, NS_ERROR_FAILURE);
     return prefs->RemoveObserver("browser.safebrowsing", this);
   }
 
--- a/toolkit/components/url-classifier/nsUrlClassifierUtils.h
+++ b/toolkit/components/url-classifier/nsUrlClassifierUtils.h
@@ -54,14 +54,21 @@ private:
 
   // Function to tell if we should encode a character.
   bool ShouldURLEscape(const unsigned char c) const;
 
   void CleanupHostname(const nsACString & host, nsACString & _retval);
 
   nsresult ReadProvidersFromPrefs(ProviderDictType& aDict);
 
+  // Our country code stored in preference with ISO 3166 standard is too
+  // specific. Some countries are too small and so these users will be tracked,
+  // M.49 allows us to send the region/sub-region instead of the country
+  void GetM49RegionCode(nsACString& aRegionCode);
+
   // The provider lookup table and its mutex.
   ProviderDictType mProviderDict;
   mozilla::Mutex mProviderDictLock;
+
+  const char* mRegionCode;
 };
 
 #endif // nsUrlClassifierUtils_h_