Bug 1423204 - Improve the address line parser to detect address-line* more correctly. r=lchang draft
authorSean Lee <selee@mozilla.com>
Sun, 07 Jan 2018 15:42:41 +0800
changeset 717062 2ddd5355c004cd0c02dbd4875475ac4a91ef99df
parent 717028 739484451a6399c7f156a0d960335606aa6c1221
child 745140 89d7ad921e0c1e200c1adb788f732649170c120c
push id94550
push userbmo:selee@mozilla.com
push dateMon, 08 Jan 2018 06:54:42 +0000
reviewerslchang
bugs1423204
milestone59.0a1
Bug 1423204 - Improve the address line parser to detect address-line* more correctly. r=lchang MozReview-Commit-ID: JHqKSnBZsVV
browser/extensions/formautofill/FormAutofillHeuristics.jsm
browser/extensions/formautofill/content/heuristicsRegexp.js
browser/extensions/formautofill/test/fixtures/autocomplete_basic.html
browser/extensions/formautofill/test/unit/heuristics/test_basic.js
browser/extensions/formautofill/test/unit/test_getAdaptedProfiles.js
--- a/browser/extensions/formautofill/FormAutofillHeuristics.jsm
+++ b/browser/extensions/formautofill/FormAutofillHeuristics.jsm
@@ -529,27 +529,66 @@ this.FormAutofillHeuristics = {
    * @param {FieldScanner} fieldScanner
    *        The current parsing status for all elements
    * @returns {boolean}
    *          Return true if there is any field can be recognized in the parser,
    *          otherwise false.
    */
   _parseAddressFields(fieldScanner) {
     let parsedFields = false;
-    let addressLines = ["address-line1", "address-line2", "address-line3"];
-    for (let i = 0; !fieldScanner.parsingFinished && i < addressLines.length; i++) {
+    const addressLines = ["address-line1", "address-line2", "address-line3"];
+
+    // TODO: These address-line* regexps are for the lines with numbers, and
+    // they are the subset of the regexps in `heuristicsRegexp.js`. We have to
+    // find a better way to make them consistent.
+    const addressLineRegexps = {
+      "address-line1": new RegExp(
+        "address[_-]?line(1|one)|address1|addr1" +
+        "|addrline1|address_1" + // Extra rules by Firefox
+        "|indirizzo1" + // it-IT
+        "|住所1" + // ja-JP
+        "|地址1" + // zh-CN
+        "|주소.?1", // ko-KR
+        "iu"
+      ),
+      "address-line2": new RegExp(
+        "address[_-]?line(2|two)|address2|addr2" +
+        "|addrline2|address_2" + // Extra rules by Firefox
+        "|indirizzo2" + // it-IT
+        "|住所2" + // ja-JP
+        "|地址2" + // zh-CN
+        "|주소.?2", // ko-KR
+        "iu"
+      ),
+      "address-line3": new RegExp(
+        "address[_-]?line(3|three)|address3|addr3" +
+        "|addrline3|address_3" + // Extra rules by Firefox
+        "|indirizzo3" + // it-IT
+        "|住所3" + // ja-JP
+        "|地址3" + // zh-CN
+        "|주소.?3", // ko-KR
+        "iu"
+      ),
+    };
+    while (!fieldScanner.parsingFinished) {
       let detail = fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex);
-      if (!detail || !addressLines.includes(detail.fieldName)) {
-        // When the field is not related to any address-line[1-3] fields, it
-        // means the parsing process can be terminated.
+      if (!detail || !addressLines.includes(detail.fieldName) || detail._reason == "autocomplete") {
+        // When the field is not related to any address-line[1-3] fields or
+        // determined by autocomplete attr, it means the parsing process can be
+        // terminated.
         break;
       }
-      fieldScanner.updateFieldName(fieldScanner.parsingIndex, addressLines[i]);
+      const elem = detail.elementWeakRef.get();
+      for (let regexp of Object.keys(addressLineRegexps)) {
+        if (this._matchRegexp(elem, addressLineRegexps[regexp])) {
+          fieldScanner.updateFieldName(fieldScanner.parsingIndex, regexp);
+          parsedFields = true;
+        }
+      }
       fieldScanner.parsingIndex++;
-      parsedFields = true;
     }
 
     return parsedFields;
   },
 
   /**
    * Try to look for expiration date fields and revise the field names if needed.
    *
--- a/browser/extensions/formautofill/content/heuristicsRegexp.js
+++ b/browser/extensions/formautofill/content/heuristicsRegexp.js
@@ -61,44 +61,47 @@ var HeuristicsRegExp = {
       "iu"
     ),
     "street-address": new RegExp(
       "streetaddress|street-address",
       "iu"
     ),
     "address-line1": new RegExp(
       "^address$|address[_-]?line(one)?|address1|addr1|street" +
+      "|addrline1|address_1" + // Extra rules by Firefox
       "|(?:shipping|billing)address$" +
       "|strasse|straße|hausnummer|housenumber" + // de-DE
       "|house.?name" + // en-GB
       "|direccion|dirección" + // es
       "|adresse" + // fr-FR
       "|indirizzo" + // it-IT
       "|^住所$|住所1" + // ja-JP
       "|morada|endereço" + // pt-BR, pt-PT
       "|Адрес" + // ru
       "|地址" + // zh-CN
       "|^주소.?$|주소.?1", // ko-KR
       "iu"
     ),
     "address-line2": new RegExp(
       "address[_-]?line(2|two)|address2|addr2|street|suite|unit" +
+      "|addrline2|address_2" + // Extra rules by Firefox
       "|adresszusatz|ergänzende.?angaben" + // de-DE
       "|direccion2|colonia|adicional" + // es
       "|addresssuppl|complementnom|appartement" + // fr-FR
       "|indirizzo2" + // it-IT
       "|住所2" + // ja-JP
       "|complemento|addrcomplement" + // pt-BR, pt-PT
       "|Улица" + // ru
       "|地址2" + // zh-CN
       "|주소.?2", // ko-KR
       "iu"
     ),
     "address-line3": new RegExp(
       "address[_-]?line(3|three)|address3|addr3|street|suite|unit" +
+      "|addrline3|address_3" + // Extra rules by Firefox
       "|adresszusatz|ergänzende.?angaben" + // de-DE
       "|direccion3|colonia|adicional" + // es
       "|addresssuppl|complementnom|appartement" + // fr-FR
       "|indirizzo3" + // it-IT
       "|住所3" + // ja-JP
       "|complemento|addrcomplement" + // pt-BR, pt-PT
       "|Улица" + // ru
       "|地址3" + // zh-CN
--- a/browser/extensions/formautofill/test/fixtures/autocomplete_basic.html
+++ b/browser/extensions/formautofill/test/fixtures/autocomplete_basic.html
@@ -35,11 +35,19 @@
     <p><label>cc-name <input type="text" id="B_cc-name" autocomplete="cc-name" /></label></p>
     <p><label>cc-exp-month <input type="text" id="B_cc-exp-month" autocomplete="cc-exp-month" /></label></p>
     <p><label>cc-exp-year <input type="text" id="B_cc-exp-year" autocomplete="cc-exp-year" /></label></p>
     <hr>
     <p><input type="submit" /></p>
     <p><button type="reset">Reset</button></p>
   </form>
 
+  <form id="formC">
+    <p><label><input type="text" name="someprefixAddrLine1" /></label></p>
+    <p><label>City: <input type="text" name="address-level2" /></label></p>
+    <p><label><input type="text" name="someprefixAddrLine2" /></label></p>
+    <p><label>Organization: <input type="text" name="organization" /></label></p>
+    <p><label><input type="text" name="someprefixAddrLine3" /></label></p>
+  </form>
+
 </body>
 </html>
 
--- a/browser/extensions/formautofill/test/unit/heuristics/test_basic.js
+++ b/browser/extensions/formautofill/test/unit/heuristics/test_basic.js
@@ -27,12 +27,19 @@ runHeuristicsTest([
         {"section": "", "addressType": "", "contactType": "", "fieldName": "country"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "tel"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "email"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-number"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-name"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp-month"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp-year"},
       ]],
+      [[
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "address-line1"},
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "address-level2"},
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "address-line2"},
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "organization"},
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "address-line3"},
+      ]],
     ],
   },
 ], "../../fixtures/");
 
--- a/browser/extensions/formautofill/test/unit/test_getAdaptedProfiles.js
+++ b/browser/extensions/formautofill/test/unit/test_getAdaptedProfiles.js
@@ -115,29 +115,56 @@ const TESTCASES = [
       "address-line3": "line3",
       "address-level1": "CA",
       "country": "US",
       "tel": "+19876543210",
       "tel-national": "9876543210",
     }],
   },
   {
-    description: "Address form with street-address, address-line[1, 3]",
+    description: "Address form with street-address, address-line[1, 3]" +
+                 ", determined by autocomplete attr",
     document: `<form>
                <input id="street-addr" autocomplete="street-address">
                <input id="line1" autocomplete="address-line1">
                <input id="line3" autocomplete="address-line3">
                </form>`,
     profileData: [Object.assign({}, DEFAULT_ADDRESS_RECORD)],
     expectedResult: [{
       "guid": "123",
       "street-address": "2 Harrison St line2 line3",
       "-moz-street-address-one-line": "2 Harrison St line2 line3",
-      "address-line1": "2 Harrison St",
-      "address-line2": "line2 line3",
+      // Since the form is missing address-line2 field, the value of
+      // address-line1 should contain line2 value as well.
+      "address-line1": "2 Harrison St line2",
+      "address-line2": "line2",
+      "address-line3": "line3",
+      "address-level1": "CA",
+      "country": "US",
+      "tel": "+19876543210",
+      "tel-national": "9876543210",
+    }],
+  },
+  {
+    description: "Address form with street-address, address-line[1, 3]" +
+                 ", determined by heuristics",
+    document: `<form>
+               <input id="street-address">
+               <input id="address-line1">
+               <input id="address-line3">
+               </form>`,
+    profileData: [Object.assign({}, DEFAULT_ADDRESS_RECORD)],
+    expectedResult: [{
+      "guid": "123",
+      "street-address": "2 Harrison St line2 line3",
+      "-moz-street-address-one-line": "2 Harrison St line2 line3",
+      // Since the form is missing address-line2 field, the value of
+      // address-line1 should contain line2 value as well.
+      "address-line1": "2 Harrison St line2",
+      "address-line2": "line2",
       "address-line3": "line3",
       "address-level1": "CA",
       "country": "US",
       "tel": "+19876543210",
       "tel-national": "9876543210",
     }],
   },
   {