Bug 1348751 - [Form Autofill] A utility library for handling full name and separated names, r=MattN draft
authorLuke Chang <lchang@mozilla.com>
Wed, 12 Apr 2017 20:05:54 +0800
changeset 569202 cb7ae59ee46021615357078604ea2e42d69c0421
parent 566378 dd530a59750adcaa0d48fa4f69b0cdb52715852a
child 626135 14f8384a7dd2d57f42a56d3c855f988a1aa7cfac
push id56095
push userbmo:lchang@mozilla.com
push dateThu, 27 Apr 2017 05:36:04 +0000
reviewersMattN
bugs1348751
milestone55.0a1
Bug 1348751 - [Form Autofill] A utility library for handling full name and separated names, r=MattN MozReview-Commit-ID: 3rcuxbFHKOq
browser/extensions/formautofill/FormAutofillNameUtils.jsm
browser/extensions/formautofill/ProfileStorage.jsm
browser/extensions/formautofill/content/nameReferences.js
browser/extensions/formautofill/test/unit/test_isCJKName.js
browser/extensions/formautofill/test/unit/test_nameUtils.js
browser/extensions/formautofill/test/unit/xpcshell.ini
toolkit/content/license.html
new file mode 100644
--- /dev/null
+++ b/browser/extensions/formautofill/FormAutofillNameUtils.jsm
@@ -0,0 +1,280 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+"use strict";
+
+const {classes: Cc, interfaces: Ci, utils: Cu, results: Cr} = Components;
+
+// Cu.import loads jsm files based on ISO-Latin-1 for now (see bug 530257).
+// However, the references about name parts include multi-byte characters.
+// Thus, we use |loadSubScript| to load the references instead.
+const NAME_REFERENCES = "chrome://formautofill/content/nameReferences.js";
+
+this.EXPORTED_SYMBOLS = ["FormAutofillNameUtils"];
+
+// FormAutofillNameUtils is initially translated from
+// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util.cc?rcl=b861deff77abecff11ae6a9f6946e9cc844b9817
+var FormAutofillNameUtils = {
+  // Will be loaded from NAME_REFERENCES.
+  NAME_PREFIXES: [],
+  NAME_SUFFIXES: [],
+  FAMILY_NAME_PREFIXES: [],
+  COMMON_CJK_MULTI_CHAR_SURNAMES: [],
+  KOREAN_MULTI_CHAR_SURNAMES: [],
+
+  // The whitespace definition based on
+  // https://cs.chromium.org/chromium/src/base/strings/string_util_constants.cc?l=9&rcl=b861deff77abecff11ae6a9f6946e9cc844b9817
+  WHITESPACE: [
+    "\u0009", // CHARACTER TABULATION
+    "\u000A", // LINE FEED (LF)
+    "\u000B", // LINE TABULATION
+    "\u000C", // FORM FEED (FF)
+    "\u000D", // CARRIAGE RETURN (CR)
+    "\u0020", // SPACE
+    "\u0085", // NEXT LINE (NEL)
+    "\u00A0", // NO-BREAK SPACE
+    "\u1680", // OGHAM SPACE MARK
+    "\u2000", // EN QUAD
+    "\u2001", // EM QUAD
+    "\u2002", // EN SPACE
+    "\u2003", // EM SPACE
+    "\u2004", // THREE-PER-EM SPACE
+    "\u2005", // FOUR-PER-EM SPACE
+    "\u2006", // SIX-PER-EM SPACE
+    "\u2007", // FIGURE SPACE
+    "\u2008", // PUNCTUATION SPACE
+    "\u2009", // THIN SPACE
+    "\u200A", // HAIR SPACE
+    "\u2028", // LINE SEPARATOR
+    "\u2029", // PARAGRAPH SEPARATOR
+    "\u202F", // NARROW NO-BREAK SPACE
+    "\u205F", // MEDIUM MATHEMATICAL SPACE
+    "\u3000", // IDEOGRAPHIC SPACE
+  ],
+
+  // The middle dot is used as a separator for foreign names in Japanese.
+  MIDDLE_DOT: [
+    "\u30FB", // KATAKANA MIDDLE DOT
+    "\u00B7", // A (common?) typo for "KATAKANA MIDDLE DOT"
+  ],
+
+  // The Unicode range is based on Wiki:
+  // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
+  // https://en.wikipedia.org/wiki/Hangul
+  // https://en.wikipedia.org/wiki/Japanese_writing_system
+  CJK_RANGE: [
+    "\u1100-\u11FF", // Hangul Jamo
+    "\u3040-\u309F", // Hiragana
+    "\u30A0-\u30FF", // Katakana
+    "\u3105-\u312C", // Bopomofo
+    "\u3130-\u318F", // Hangul Compatibility Jamo
+    "\u31F0-\u31FF", // Katakana Phonetic Extensions
+    "\u3200-\u32FF", // Enclosed CJK Letters and Months
+    "\u3400-\u4DBF", // CJK unified ideographs Extension A
+    "\u4E00-\u9FFF", // CJK Unified Ideographs
+    "\uA960-\uA97F", // Hangul Jamo Extended-A
+    "\uAC00-\uD7AF", // Hangul Syllables
+    "\uD7B0-\uD7FF", // Hangul Jamo Extended-B
+    "\uFF00-\uFFEF", // Halfwidth and Fullwidth Forms
+  ],
+
+  HANGUL_RANGE: [
+    "\u1100-\u11FF", // Hangul Jamo
+    "\u3130-\u318F", // Hangul Compatibility Jamo
+    "\uA960-\uA97F", // Hangul Jamo Extended-A
+    "\uAC00-\uD7AF", // Hangul Syllables
+    "\uD7B0-\uD7FF", // Hangul Jamo Extended-B
+  ],
+
+  _dataLoaded: false,
+
+  // Returns true if |set| contains |token|, modulo a final period.
+  _containsString(set, token) {
+    let target = token.replace(/\.$/, "").toLowerCase();
+    return set.includes(target);
+  },
+
+  // Removes common name prefixes from |name_tokens|.
+  _stripPrefixes(nameTokens) {
+    for (let i in nameTokens) {
+      if (!this._containsString(this.NAME_PREFIXES, nameTokens[i])) {
+        return nameTokens.slice(i);
+      }
+    }
+    return [];
+  },
+
+  // Removes common name suffixes from |name_tokens|.
+  _stripSuffixes(nameTokens) {
+    for (let i = nameTokens.length - 1; i >= 0; i--) {
+      if (!this._containsString(this.NAME_SUFFIXES, nameTokens[i])) {
+        return nameTokens.slice(0, i + 1);
+      }
+    }
+    return [];
+  },
+
+  _isCJKName(name) {
+    // The name is considered to be a CJK name if it is only CJK characters,
+    // spaces, and "middle dot" separators, with at least one CJK character, and
+    // no more than 2 words.
+    //
+    // Chinese and Japanese names are usually spelled out using the Han
+    // characters (logographs), which constitute the "CJK Unified Ideographs"
+    // block in Unicode, also referred to as Unihan. Korean names are usually
+    // spelled out in the Korean alphabet (Hangul), although they do have a Han
+    // equivalent as well.
+
+    let previousWasCJK = false;
+    let wordCount = 0;
+
+    for (let c of name) {
+      let isMiddleDot = this.MIDDLE_DOT.includes(c);
+      let isCJK = !isMiddleDot && this.reCJK.test(c);
+      if (!isCJK && !isMiddleDot && !this.WHITESPACE.includes(c)) {
+        return false;
+      }
+      if (isCJK && !previousWasCJK) {
+        wordCount++;
+      }
+      previousWasCJK = isCJK;
+    }
+
+    return wordCount > 0 && wordCount < 3;
+  },
+
+  // Tries to split a Chinese, Japanese, or Korean name into its given name &
+  // surname parts. If splitting did not work for whatever reason, returns null.
+  _splitCJKName(nameTokens) {
+    // The convention for CJK languages is to put the surname (last name) first,
+    // and the given name (first name) second. In a continuous text, there is
+    // normally no space between the two parts of the name. When entering their
+    // name into a field, though, some people add a space to disambiguate. CJK
+    // names (almost) never have a middle name.
+
+    let reHangulName = new RegExp(
+      "^[" + this.HANGUL_RANGE.join("") + this.WHITESPACE.join("") + "]+$", "u");
+    let nameParts = {
+      given: "",
+      middle: "",
+      family: "",
+    };
+
+    if (nameTokens.length == 1) {
+      // There is no space between the surname and given name. Try to infer
+      // where to separate between the two. Most Chinese and Korean surnames
+      // have only one character, but there are a few that have 2. If the name
+      // does not start with a surname from a known list, default to one
+      // character.
+      let name = nameTokens[0];
+      let isKorean = reHangulName.test(name);
+      let surnameLength = 0;
+
+      // 4-character Korean names are more likely to be 2/2 than 1/3, so use
+      // the full list of Korean 2-char surnames. (instead of only the common
+      // ones)
+      let multiCharSurnames = (isKorean && name.length > 3) ?
+        this.KOREAN_MULTI_CHAR_SURNAMES :
+        this.COMMON_CJK_MULTI_CHAR_SURNAMES;
+
+      // Default to 1 character if the surname is not in the list.
+      surnameLength =
+        multiCharSurnames.some(surname => name.startsWith(surname)) ? 2 : 1;
+
+      nameParts.family = name.substr(0, surnameLength);
+      nameParts.given = name.substr(surnameLength);
+    } else if (nameTokens.length == 2) {
+      // The user entered a space between the two name parts. This makes our job
+      // easier. Family name first, given name second.
+      nameParts.family = nameTokens[0];
+      nameParts.given = nameTokens[1];
+    } else {
+      return null;
+    }
+
+    return nameParts;
+  },
+
+  init() {
+    if (this._dataLoaded) {
+      return;
+    }
+    let sandbox = {};
+    let scriptLoader = Cc["@mozilla.org/moz/jssubscript-loader;1"]
+                         .getService(Ci.mozIJSSubScriptLoader);
+    scriptLoader.loadSubScript(NAME_REFERENCES, sandbox, "utf-8");
+    Object.assign(this, sandbox.nameReferences);
+    this._dataLoaded = true;
+
+    this.reCJK = new RegExp("[" + this.CJK_RANGE.join("") + "]", "u");
+  },
+
+  splitName(name) {
+    let nameTokens = name.trim().split(/[ ,\u3000\u30FB\u00B7]+/);
+    let nameParts = {
+      given: "",
+      middle: "",
+      family: "",
+    };
+
+    nameTokens = this._stripPrefixes(nameTokens);
+
+    if (this._isCJKName(name)) {
+      let parts = this._splitCJKName(nameTokens);
+      if (parts) {
+        return parts;
+      }
+    }
+
+    // Don't assume "Ma" is a suffix in John Ma.
+    if (nameTokens.length > 2) {
+      nameTokens = this._stripSuffixes(nameTokens);
+    }
+
+    if (!nameTokens.length) {
+      // Bad things have happened; just assume the whole thing is a given name.
+      nameParts.given = name;
+      return nameParts;
+    }
+
+    // Only one token, assume given name.
+    if (nameTokens.length == 1) {
+      nameParts.given = nameTokens[0];
+      return nameParts;
+    }
+
+    // 2 or more tokens. Grab the family, which is the last word plus any
+    // recognizable family prefixes.
+    let familyTokens = [nameTokens.pop()];
+    while (nameTokens.length) {
+      let lastToken = nameTokens[nameTokens.length - 1];
+      if (!this._containsString(this.FAMILY_NAME_PREFIXES, lastToken)) {
+        break;
+      }
+      familyTokens.unshift(lastToken);
+      nameTokens.pop();
+    }
+    nameParts.family = familyTokens.join(" ");
+
+    // Take the last remaining token as the middle name (if there are at least 2
+    // tokens).
+    if (nameTokens.length >= 2) {
+      nameParts.middle = nameTokens.pop();
+    }
+
+    // Remainder is given name.
+    nameParts.given = nameTokens.join(" ");
+
+    return nameParts;
+  },
+
+  joinNameParts({given, middle, family}) {
+    if (this._isCJKName(given) && this._isCJKName(family) && middle == "") {
+      return family + given;
+    }
+    return [given, middle, family].filter(part => part && part.length).join(" ");
+  },
+};
+
+FormAutofillNameUtils.init();
--- a/browser/extensions/formautofill/ProfileStorage.jsm
+++ b/browser/extensions/formautofill/ProfileStorage.jsm
@@ -46,16 +46,18 @@ const {classes: Cc, interfaces: Ci, util
 Cu.import("resource://gre/modules/XPCOMUtils.jsm");
 Cu.import("resource://gre/modules/Services.jsm");
 Cu.import("resource://gre/modules/Task.jsm");
 
 Cu.import("resource://formautofill/FormAutofillUtils.jsm");
 
 XPCOMUtils.defineLazyModuleGetter(this, "JSONFile",
                                   "resource://gre/modules/JSONFile.jsm");
+XPCOMUtils.defineLazyModuleGetter(this, "FormAutofillNameUtils",
+                                  "resource://formautofill/FormAutofillNameUtils.jsm");
 
 XPCOMUtils.defineLazyServiceGetter(this, "gUUIDGenerator",
                                    "@mozilla.org/uuid-generator;1",
                                    "nsIUUIDGenerator");
 
 this.log = null;
 FormAutofillUtils.defineLazyLogGetter(this, this.EXPORTED_SYMBOLS[0]);
 
new file mode 100644
--- /dev/null
+++ b/browser/extensions/formautofill/content/nameReferences.js
@@ -0,0 +1,144 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* exported nameReferences */
+
+"use strict";
+
+// The data below is initially copied from
+// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util.cc?rcl=b861deff77abecff11ae6a9f6946e9cc844b9817
+var nameReferences = {
+  NAME_PREFIXES: [
+    "1lt",
+    "1st",
+    "2lt",
+    "2nd",
+    "3rd",
+    "admiral",
+    "capt",
+    "captain",
+    "col",
+    "cpt",
+    "dr",
+    "gen",
+    "general",
+    "lcdr",
+    "lt",
+    "ltc",
+    "ltg",
+    "ltjg",
+    "maj",
+    "major",
+    "mg",
+    "mr",
+    "mrs",
+    "ms",
+    "pastor",
+    "prof",
+    "rep",
+    "reverend",
+    "rev",
+    "sen",
+    "st",
+  ],
+
+  NAME_SUFFIXES: [
+    "b.a",
+    "ba",
+    "d.d.s",
+    "dds",
+    "i",
+    "ii",
+    "iii",
+    "iv",
+    "ix",
+    "jr",
+    "m.a",
+    "m.d",
+    "ma",
+    "md",
+    "ms",
+    "ph.d",
+    "phd",
+    "sr",
+    "v",
+    "vi",
+    "vii",
+    "viii",
+    "x",
+  ],
+
+  FAMILY_NAME_PREFIXES: [
+    "d'",
+    "de",
+    "del",
+    "der",
+    "di",
+    "la",
+    "le",
+    "mc",
+    "san",
+    "st",
+    "ter",
+    "van",
+    "von",
+  ],
+
+  // The common and non-ambiguous CJK surnames (last names) that have more than
+  // one character.
+  COMMON_CJK_MULTI_CHAR_SURNAMES: [
+    // Korean, taken from the list of surnames:
+    // https://ko.wikipedia.org/wiki/%ED%95%9C%EA%B5%AD%EC%9D%98_%EC%84%B1%EC%94%A8_%EB%AA%A9%EB%A1%9D
+    "남궁",
+    "사공",
+    "서문",
+    "선우",
+    "제갈",
+    "황보",
+    "독고",
+    "망절",
+
+    // Chinese, taken from the top 10 Chinese 2-character surnames:
+    // https://zh.wikipedia.org/wiki/%E8%A4%87%E5%A7%93#.E5.B8.B8.E8.A6.8B.E7.9A.84.E8.A4.87.E5.A7.93
+    // Simplified Chinese (mostly mainland China)
+    "欧阳",
+    "令狐",
+    "皇甫",
+    "上官",
+    "司徒",
+    "诸葛",
+    "司马",
+    "宇文",
+    "呼延",
+    "端木",
+    // Traditional Chinese (mostly Taiwan)
+    "張簡",
+    "歐陽",
+    "諸葛",
+    "申屠",
+    "尉遲",
+    "司馬",
+    "軒轅",
+    "夏侯",
+  ],
+
+  // All Korean surnames that have more than one character, even the
+  // rare/ambiguous ones.
+  KOREAN_MULTI_CHAR_SURNAMES: [
+    "강전",
+    "남궁",
+    "독고",
+    "동방",
+    "망절",
+    "사공",
+    "서문",
+    "선우",
+    "소봉",
+    "어금",
+    "장곡",
+    "제갈",
+    "황목",
+    "황보",
+  ],
+};
new file mode 100644
--- /dev/null
+++ b/browser/extensions/formautofill/test/unit/test_isCJKName.js
@@ -0,0 +1,76 @@
+/**
+ * Tests the "isCJKName" function of FormAutofillNameUtils object.
+ */
+
+"use strict";
+
+Cu.import("resource://gre/modules/Task.jsm");
+Cu.import("resource://formautofill/FormAutofillNameUtils.jsm");
+
+// Test cases is initially copied from
+// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util_unittest.cc
+const TESTCASES = [
+  {
+    // Non-CJK language with only ASCII characters.
+    fullName: "Homer Jay Simpson",
+    expectedResult: false,
+  },
+  {
+    // Non-CJK language with some ASCII characters.
+    fullName: "Éloïse Paré",
+    expectedResult: false,
+  },
+  {
+    // Non-CJK language with no ASCII characters.
+    fullName: "Σωκράτης",
+    expectedResult: false,
+  },
+  {
+    // (Simplified) Chinese name, Unihan.
+    fullName: "刘翔",
+    expectedResult: true,
+  },
+  {
+    // (Simplified) Chinese name, Unihan, with an ASCII space.
+    fullName: "成 龙",
+    expectedResult: true,
+  },
+  {
+    // Korean name, Hangul.
+    fullName: "송지효",
+    expectedResult: true,
+  },
+  {
+    // Korean name, Hangul, with an 'IDEOGRAPHIC SPACE' (U+3000).
+    fullName: "김 종국",
+    expectedResult: true,
+  },
+  {
+    // Japanese name, Unihan.
+    fullName: "山田貴洋",
+    expectedResult: true,
+  },
+  {
+    // Japanese name, Katakana, with a 'KATAKANA MIDDLE DOT' (U+30FB).
+    fullName: "ビル・ゲイツ",
+    expectedResult: true,
+  },
+  {
+    // Japanese name, Katakana, with a 'MIDDLE DOT' (U+00B7) (likely a typo).
+    fullName: "ビル·ゲイツ",
+    expectedResult: true,
+  },
+  {
+    // CJK names don't have a middle name, so a 3-part name is bogus to us.
+    fullName: "반 기 문",
+    expectedResult: false,
+  },
+];
+
+add_task(function* test_isCJKName() {
+  TESTCASES.forEach(testcase => {
+    do_print("Starting testcase: " + testcase.fullName);
+    let result = FormAutofillNameUtils._isCJKName(testcase.fullName);
+    do_check_eq(result, testcase.expectedResult);
+  });
+});
new file mode 100644
--- /dev/null
+++ b/browser/extensions/formautofill/test/unit/test_nameUtils.js
@@ -0,0 +1,285 @@
+/**
+ * Tests FormAutofillNameUtils object.
+ */
+
+"use strict";
+
+Cu.import("resource://gre/modules/Task.jsm");
+Cu.import("resource://formautofill/FormAutofillNameUtils.jsm");
+
+// Test cases initially copied from
+// https://cs.chromium.org/chromium/src/components/autofill/core/browser/autofill_data_util_unittest.cc
+const TESTCASES = [
+  {
+    description: "Full name including given, middle and family names",
+    fullName: "Homer Jay Simpson",
+    nameParts: {
+      given: "Homer",
+      middle: "Jay",
+      family: "Simpson",
+    },
+  },
+  {
+    description: "No middle name",
+    fullName: "Moe Szyslak",
+    nameParts: {
+      given: "Moe",
+      middle: "",
+      family: "Szyslak",
+    },
+  },
+  {
+    description: "Common name prefixes removed",
+    fullName: "Reverend Timothy Lovejoy",
+    nameParts: {
+      given: "Timothy",
+      middle: "",
+      family: "Lovejoy",
+    },
+    expectedFullName: "Timothy Lovejoy",
+  },
+  {
+    description: "Common name suffixes removed",
+    fullName: "John Frink Phd",
+    nameParts: {
+      given: "John",
+      middle: "",
+      family: "Frink",
+    },
+    expectedFullName: "John Frink",
+  },
+  {
+    description: "Exception to the name suffix removal",
+    fullName: "John Ma",
+    nameParts: {
+      given: "John",
+      middle: "",
+      family: "Ma",
+    },
+  },
+  {
+    description: "Common family name prefixes not considered a middle name",
+    fullName: "Milhouse Van Houten",
+    nameParts: {
+      given: "Milhouse",
+      middle: "",
+      family: "Van Houten",
+    },
+  },
+
+  // CJK names have reverse order (surname goes first, given name goes second).
+  {
+    description: "Chinese name, Unihan",
+    fullName: "孫 德明",
+    nameParts: {
+      given: "德明",
+      middle: "",
+      family: "孫",
+    },
+    expectedFullName: "孫德明",
+  },
+  {
+    description: "Chinese name, Unihan, \"IDEOGRAPHIC SPACE\"",
+    fullName: "孫 德明",
+    nameParts: {
+      given: "德明",
+      middle: "",
+      family: "孫",
+    },
+    expectedFullName: "孫德明",
+  },
+  {
+    description: "Korean name, Hangul",
+    fullName: "홍 길동",
+    nameParts: {
+      given: "길동",
+      middle: "",
+      family: "홍",
+    },
+    expectedFullName: "홍길동",
+  },
+  {
+    description: "Japanese name, Unihan",
+    fullName: "山田 貴洋",
+    nameParts: {
+      given: "貴洋",
+      middle: "",
+      family: "山田",
+    },
+    expectedFullName: "山田貴洋",
+  },
+
+  // In Japanese, foreign names use 'KATAKANA MIDDLE DOT' (U+30FB) as a
+  // separator. There is no consensus for the ordering. For now, we use the same
+  // ordering as regular Japanese names ("last・first").
+  {
+    description: "Foreign name in Japanese, Katakana",
+    fullName: "ゲイツ・ビル",
+    nameParts: {
+      given: "ビル",
+      middle: "",
+      family: "ゲイツ",
+    },
+    expectedFullName: "ゲイツビル",
+  },
+
+  // 'KATAKANA MIDDLE DOT' is occasionally typoed as 'MIDDLE DOT' (U+00B7).
+  {
+    description: "Foreign name in Japanese, Katakana",
+    fullName: "ゲイツ·ビル",
+    nameParts: {
+      given: "ビル",
+      middle: "",
+      family: "ゲイツ",
+    },
+    expectedFullName: "ゲイツビル",
+  },
+
+  // CJK names don't usually have a space in the middle, but most of the time,
+  // the surname is only one character (in Chinese & Korean).
+  {
+    description: "Korean name, Hangul",
+    fullName: "최성훈",
+    nameParts: {
+      given: "성훈",
+      middle: "",
+      family: "최",
+    },
+  },
+  {
+    description: "(Simplified) Chinese name, Unihan",
+    fullName: "刘翔",
+    nameParts: {
+      given: "翔",
+      middle: "",
+      family: "刘",
+    },
+  },
+  {
+    description: "(Traditional) Chinese name, Unihan",
+    fullName: "劉翔",
+    nameParts: {
+      given: "翔",
+      middle: "",
+      family: "劉",
+    },
+  },
+
+  // There are a few exceptions. Occasionally, the surname has two characters.
+  {
+    description: "Korean name, Hangul",
+    fullName: "남궁도",
+    nameParts: {
+      given: "도",
+      middle: "",
+      family: "남궁",
+    },
+  },
+  {
+    description: "Korean name, Hangul",
+    fullName: "황보혜정",
+    nameParts: {
+      given: "혜정",
+      middle: "",
+      family: "황보",
+    },
+  },
+  {
+    description: "(Traditional) Chinese name, Unihan",
+    fullName: "歐陽靖",
+    nameParts: {
+      given: "靖",
+      middle: "",
+      family: "歐陽",
+    },
+  },
+
+  // In Korean, some 2-character surnames are rare/ambiguous, like "강전": "강"
+  // is a common surname, and "전" can be part of a given name. In those cases,
+  // we assume it's 1/2 for 3-character names, or 2/2 for 4-character names.
+  {
+    description: "Korean name, Hangul",
+    fullName: "강전희",
+    nameParts: {
+      given: "전희",
+      middle: "",
+      family: "강",
+    },
+  },
+  {
+    description: "Korean name, Hangul",
+    fullName: "황목치승",
+    nameParts: {
+      given: "치승",
+      middle: "",
+      family: "황목",
+    },
+  },
+
+  // It occasionally happens that a full name is 2 characters, 1/1.
+  {
+    description: "Korean name, Hangul",
+    fullName: "이도",
+    nameParts: {
+      given: "도",
+      middle: "",
+      family: "이",
+    },
+  },
+  {
+    description: "Korean name, Hangul",
+    fullName: "孫文",
+    nameParts: {
+      given: "文",
+      middle: "",
+      family: "孫",
+    },
+  },
+
+  // These are no CJK names for us, they're just bogus.
+  {
+    description: "Bogus",
+    fullName: "Homer シンプソン",
+    nameParts: {
+      given: "Homer",
+      middle: "",
+      family: "シンプソン",
+    },
+  },
+  {
+    description: "Bogus",
+    fullName: "ホーマー Simpson",
+    nameParts: {
+      given: "ホーマー",
+      middle: "",
+      family: "Simpson",
+    },
+  },
+  {
+    description: "CJK has a middle-name, too unusual",
+    fullName: "반 기 문",
+    nameParts: {
+      given: "반",
+      middle: "기",
+      family: "문",
+    },
+  },
+];
+
+add_task(function* test_splitName() {
+  TESTCASES.forEach(testcase => {
+    if (testcase.fullName) {
+      do_print("Starting testcase: " + testcase.description);
+      let nameParts = FormAutofillNameUtils.splitName(testcase.fullName);
+      Assert.deepEqual(nameParts, testcase.nameParts);
+    }
+  });
+});
+
+add_task(function* test_joinName() {
+  TESTCASES.forEach(testcase => {
+    do_print("Starting testcase: " + testcase.description);
+    let name = FormAutofillNameUtils.joinNameParts(testcase.nameParts);
+    do_check_eq(name, testcase.expectedFullName || testcase.fullName);
+  });
+});
--- a/browser/extensions/formautofill/test/unit/xpcshell.ini
+++ b/browser/extensions/formautofill/test/unit/xpcshell.ini
@@ -3,14 +3,15 @@ firefox-appdir = browser
 head = head.js
 support-files =
 
 [test_autofillFormFields.js]
 [test_collectFormFields.js]
 [test_enabledStatus.js]
 [test_findLabelElements.js]
 [test_getFormInputDetails.js]
+[test_isCJKName.js]
 [test_markAsAutofillField.js]
+[test_nameUtils.js]
 [test_onFormSubmitted.js]
 [test_profileAutocompleteResult.js]
 [test_profileStorage.js]
 [test_savedFieldNames.js]
-
--- a/toolkit/content/license.html
+++ b/toolkit/content/license.html
@@ -2718,16 +2718,18 @@ WITH THE USE OR PERFORMANCE OF THIS SOFT
 
 
     <hr>
 
     <h1><a id="chromium"></a>Chromium License</h1>
 
     <p>This license applies to parts of the code in:</p>
     <ul>
+        <li><span class="path">browser/extensions/formautofill/content/nameReferences.js</span></li>
+        <li><span class="path">browser/extensions/formautofill/FormAutofillNameUtils.jsm</span></li>
         <li><span class="path">browser/extensions/mortar/host/common/opengles2-utils.jsm</span></li>
         <li><span class="path">editor/libeditor/EditorEventListener.cpp</span></li>
         <li><span class="path">security/sandbox/</span></li>
         <li><span class="path">widget/cocoa/GfxInfo.mm</span></li>
     </ul>
     <p>and also some files in these directories:</p>
     <ul>
         <li><span class="path">browser/extensions/mortar/ppapi/</span></li>