Bug 1392947 - Add more credit card expiration date matching patterns to enhance prediction. r=lchang, seanlee draft
authorRay Lin <ralin@mozilla.com>
Wed, 15 Nov 2017 17:33:01 +0800
changeset 700528 0ea7e4a1948f834dc8c7ca16ab3b8fdb336ad92e
parent 700338 dd08f8b19cc32da161811abb2f7093e0f5392e69
child 740917 a7a615042cadd01d2f9b026eaed03d9949e35215
push id89881
push userbmo:ralin@mozilla.com
push dateMon, 20 Nov 2017 09:54:38 +0000
reviewerslchang, seanlee
bugs1392947
milestone59.0a1
Bug 1392947 - Add more credit card expiration date matching patterns to enhance prediction. r=lchang, seanlee MozReview-Commit-ID: 3WyU6wSC8wD
browser/extensions/formautofill/FormAutofillHeuristics.jsm
browser/extensions/formautofill/test/unit/heuristics/third_party/test_QVC.js
browser/extensions/formautofill/test/unit/heuristics/third_party/test_Sears.js
browser/extensions/formautofill/test/unit/test_extractLabelStrings.js
--- a/browser/extensions/formautofill/FormAutofillHeuristics.jsm
+++ b/browser/extensions/formautofill/FormAutofillHeuristics.jsm
@@ -190,30 +190,37 @@ this.LabelUtils = {
 
   // A map object, whose keys are the id's of form fields and each value is an
   // array consisting of label elements correponding to the id.
   // @type {Map<string, array>}
   _mappedLabels: null,
 
   // An array consisting of label elements whose correponding form field doesn't
   // have an id attribute.
-  // @type {Array.<HTMLLabelElement>}
+  // @type {Array<HTMLLabelElement>}
   _unmappedLabels: null,
 
+  // A weak map consisting of label element and extracted strings pairs.
+  // @type {WeakMap<HTMLLabelElement, array>}
+  _labelStrings: null,
+
   /**
    * Extract all strings of an element's children to an array.
    * "element.textContent" is a string which is merged of all children nodes,
    * and this function provides an array of the strings contains in an element.
    *
    * @param  {Object} element
    *         A DOM element to be extracted.
    * @returns {Array}
    *          All strings in an element.
    */
   extractLabelStrings(element) {
+    if (this._labelStrings.has(element)) {
+      return this._labelStrings.get(element);
+    }
     let strings = [];
     let _extractLabelStrings = (el) => {
       if (this.EXCLUDED_TAGS.includes(el.tagName)) {
         return;
       }
 
       if (el.nodeType == Ci.nsIDOMNode.TEXT_NODE || el.childNodes.length == 0) {
         let trimmedText = el.textContent.trim();
@@ -227,16 +234,17 @@ this.LabelUtils = {
         let nodeType = node.nodeType;
         if (nodeType != Ci.nsIDOMNode.ELEMENT_NODE && nodeType != Ci.nsIDOMNode.TEXT_NODE) {
           continue;
         }
         _extractLabelStrings(node);
       }
     };
     _extractLabelStrings(element);
+    this._labelStrings.set(element, strings);
     return strings;
   },
 
   generateLabelMap(doc) {
     let mappedLabels = new Map();
     let unmappedLabels = [];
 
     for (let label of doc.querySelectorAll("label")) {
@@ -257,21 +265,23 @@ this.LabelUtils = {
         }
       } else {
         unmappedLabels.push(label);
       }
     }
 
     this._mappedLabels = mappedLabels;
     this._unmappedLabels = unmappedLabels;
+    this._labelStrings = new WeakMap();
   },
 
   clearLabelMap() {
     this._mappedLabels = null;
     this._unmappedLabels = null;
+    this._labelStrings = null;
   },
 
   findLabelElements(element) {
     if (!this._mappedLabels) {
       this.generateLabelMap(element.ownerDocument);
     }
 
     let id = element.id;
@@ -367,17 +377,17 @@ this.FormAutofillHeuristics = {
     let matchingResult;
 
     const GRAMMARS = this.PHONE_FIELD_GRAMMARS;
     for (let i = 0; i < GRAMMARS.length; i++) {
       let detailStart = fieldScanner.parsingIndex;
       let ruleStart = i;
       for (; i < GRAMMARS.length && GRAMMARS[i][0] && fieldScanner.elementExisting(detailStart); i++, detailStart++) {
         let detail = fieldScanner.getFieldDetailByIndex(detailStart);
-        if (!detail || GRAMMARS[i][0] != detail.fieldName || detail._reason == "autocomplete") {
+        if (!detail || GRAMMARS[i][0] != detail.fieldName || (detail._reason && detail._reason == "autocomplete")) {
           break;
         }
         let element = detail.elementWeakRef.get();
         if (!element) {
           break;
         }
         if (GRAMMARS[i][2] && (!element.maxLength || GRAMMARS[i][2] < element.maxLength)) {
           break;
@@ -470,18 +480,19 @@ this.FormAutofillHeuristics = {
       return false;
     }
 
     const savedIndex = fieldScanner.parsingIndex;
     const monthAndYearFieldNames = ["cc-exp-month", "cc-exp-year"];
     const detail = fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex);
     const element = detail.elementWeakRef.get();
 
-    // Skip the uninteresting fields
-    if (!detail || !["cc-exp", ...monthAndYearFieldNames].includes(detail.fieldName)) {
+    // Respect to autocomplete attr and skip the uninteresting fields
+    if (!detail || (detail._reason && detail._reason == "autocomplete") ||
+        !["cc-exp", ...monthAndYearFieldNames].includes(detail.fieldName)) {
       return false;
     }
 
     // If the input type is a month picker, then assume it's cc-exp.
     if (element.type == "month") {
       fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp");
       fieldScanner.parsingIndex++;
 
@@ -526,25 +537,53 @@ this.FormAutofillHeuristics = {
           fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp-year");
           fieldScanner.parsingIndex++;
           return true;
         }
       }
     }
     fieldScanner.parsingIndex = savedIndex;
 
-    // If no possible regular expiration fields are detected in current parsing window
-    // fallback to "cc-exp" as there's no such case that cc-exp-month or cc-exp-year
-    // presents alone.
-    // TODO: bug 1392947 - We should eventually remove this fallback, since we don't
-    // want to mess up deduplication if meanwhile a birthday was fallback to cc-exp
-    // that preceding the actual expiration fields.
-    fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp");
+    // Look for MM and/or YY(YY).
+    if (this._matchRegexp(element, /^mm$/ig)) {
+      fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp-month");
+      fieldScanner.parsingIndex++;
+      if (!fieldScanner.parsingFinished) {
+        const nextDetail = fieldScanner.getFieldDetailByIndex(fieldScanner.parsingIndex);
+        const nextElement = nextDetail.elementWeakRef.get();
+        if (this._matchRegexp(nextElement, /^(yy|yyyy)$/)) {
+          fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp-year");
+          fieldScanner.parsingIndex++;
+
+          return true;
+        }
+      }
+    }
+    fieldScanner.parsingIndex = savedIndex;
+
+    // Look for a cc-exp with 2-digit or 4-digit year.
+    if (this._matchRegexp(element, /(?:exp.*date[^y\\n\\r]*|mm\\s*[-/]?\\s*)yy(?:[^y]|$)/ig) ||
+        this._matchRegexp(element, /(?:exp.*date[^y\\n\\r]*|mm\\s*[-/]?\\s*)yyyy(?:[^y]|$)/ig)) {
+      fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp");
+      fieldScanner.parsingIndex++;
+      return true;
+    }
+    fieldScanner.parsingIndex = savedIndex;
+
+    // Match general cc-exp regexp at last.
+    if (this._findMatchedFieldName(element, ["cc-exp"])) {
+      fieldScanner.updateFieldName(fieldScanner.parsingIndex, "cc-exp");
+      fieldScanner.parsingIndex++;
+      return true;
+    }
+    fieldScanner.parsingIndex = savedIndex;
+
+    // Set current field name to null as it failed to match any patterns.
+    fieldScanner.updateFieldName(fieldScanner.parsingIndex, null);
     fieldScanner.parsingIndex++;
-
     return true;
   },
 
   /**
    * This function should provide all field details of a form. The details
    * contain the autocomplete info (e.g. fieldName, section, etc).
    *
    * `allowDuplicates` is used for the xpcshell-test purpose currently because
@@ -599,17 +638,17 @@ this.FormAutofillHeuristics = {
     }
     this._regexpList[this._regExpTableHashValue(b0, b1, b2)] = regexps;
   },
 
   _getRegExpListCache(b0, b1, b2) {
     if (!this._regexpList) {
       return null;
     }
-    return this._regexpList[this._regExpTableHashValue(b0, b1, b2)];
+    return this._regexpList[this._regExpTableHashValue(b0, b1, b2)] || null;
   },
 
   _getRegExpList(isAutoCompleteOff, elementTagName) {
     let isSelectElem = elementTagName == "SELECT";
     let regExpListCache = this._getRegExpListCache(
       isAutoCompleteOff,
       FormAutofillUtils.isAutofillCreditCardsAvailable,
       isSelectElem
@@ -694,17 +733,16 @@ this.FormAutofillHeuristics = {
         addressType: "",
         contactType: "",
       };
     }
 
     return null;
   },
 
-
   /**
    * @typedef ElementStrings
    * @type {object}
    * @yield {string} id - element id.
    * @yield {string} name - element name.
    * @yield {Array<string>} labels - extracted labels.
    */
 
@@ -754,16 +792,34 @@ this.FormAutofillHeuristics = {
           return regexp;
         }
       }
     }
 
     return null;
   },
 
+  /**
+   * Determine whether the regexp can match any of element strings.
+   *
+   * @param {HTMLElement} element
+   * @param {RegExp} regexp
+   *
+   * @returns {boolean}
+   */
+  _matchRegexp(element, regexp) {
+    const elemStrings = this._getElementStrings(element);
+    for (const str of elemStrings) {
+      if (regexp.test(str)) {
+        return true;
+      }
+    }
+    return false;
+  },
+
 /**
  * Phone field grammars - first matched grammar will be parsed. Grammars are
  * separated by { REGEX_SEPARATOR, FIELD_NONE, 0 }. Suffix and extension are
  * parsed separately unless they are necessary parts of the match.
  * The following notation is used to describe the patterns:
  * <cc> - country code field.
  * <ac> - area code field.
  * <phone> - phone or prefix.
--- a/browser/extensions/formautofill/test/unit/heuristics/third_party/test_QVC.js
+++ b/browser/extensions/formautofill/test/unit/heuristics/third_party/test_QVC.js
@@ -9,23 +9,18 @@ runHeuristicsTest([
       [
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "tel"}, // ac-off
         {"section": "", "addressType": "", "contactType": "", "fieldName": "email"},
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "bday-month"}, // select
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "bday-day"}, // select
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "bday-year"},
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-type"},
 
-        // FIXME: bug 1392947 - this is a compound cc-exp field rather than the
-        // separated ones below. the birthday fields are misdetected as
-        // cc-exp-year and cc-exp-month.
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-number"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp"},
-//      {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp-year"},
-        {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-number"},
-//      {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp"},
 
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-csc"},
       ],
       [
         {"section": "", "addressType": "", "contactType": "", "fieldName": "email"},
       ],
     ],
   }, {
@@ -34,23 +29,18 @@ runHeuristicsTest([
       [
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "tel"}, // ac-off
         {"section": "", "addressType": "", "contactType": "", "fieldName": "email"},
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "bday-month"}, // select
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "bday-day"}, // select
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "bday-year"}, // select
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-type"}, // select
 
-        // FIXME: bug 1392947 - this is a compound cc-exp field rather than the
-        // separated ones below. the birthday fields are misdetected as
-        // cc-exp-year and cc-exp-month.
-        {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp"}, // select
-//      {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp-year"},
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-number"}, // ac-off
-//      {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp"},
+        {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp"},
 
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-csc"},
       ],
       [
         {"section": "", "addressType": "", "contactType": "", "fieldName": "email"},
       ],
     ],
   }, {
--- a/browser/extensions/formautofill/test/unit/heuristics/third_party/test_Sears.js
+++ b/browser/extensions/formautofill/test/unit/heuristics/third_party/test_Sears.js
@@ -78,18 +78,16 @@ runHeuristicsTest([
         // FIXME: bug 1392950 - the bank routing number should not be detected
         // as cc-number.
         {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-number"},
 
         // FIXME: bug 1392934 - this should be detected as address-level1 since
         // it's for Driver's license or state identification.
         {"section": "", "addressType": "", "contactType": "", "fieldName": "address-level1"},
 
-        // FIXME: bug 1392947 - this is for birthday actually.
-        {"section": "", "addressType": "", "contactType": "", "fieldName": "cc-exp"},
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "bday-month"},
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "bday-day"},
 //      {"section": "", "addressType": "", "contactType": "", "fieldName": "bday-year"},
       ],
       [
         {"section": "", "addressType": "", "contactType": "", "fieldName": "email"},
       ],
     ],
--- a/browser/extensions/formautofill/test/unit/test_extractLabelStrings.js
+++ b/browser/extensions/formautofill/test/unit/test_extractLabelStrings.js
@@ -49,16 +49,17 @@ const TESTCASES = [
     expectedStrings: ["label type C", "inner div prefix", "test C-1",
       "test C-2", "inner div postfix"],
   },
 ];
 
 TESTCASES.forEach(testcase => {
   add_task(async function() {
     do_print("Starting testcase: " + testcase.description);
+    LabelUtils._labelStrings = new WeakMap();
 
     let doc = MockDocument.createTestDocument(
       "http://localhost:8080/test/", testcase.document);
 
     let element = doc.getElementById(testcase.inputId);
     let strings = LabelUtils.extractLabelStrings(element);
 
     Assert.deepEqual(strings, testcase.expectedStrings);