Bug 1283803 - part2 : modify vtt parsing algorithm. draft
authorAlastor Wu <alwu@mozilla.com>
Tue, 19 Jul 2016 16:30:51 +0800
changeset 389451 5614ff3654265e06ceb68713b28777cf18fe8ed1
parent 389450 6b6e518a3c388f9cca0cb2ae870a8baabf52b51c
child 389452 3cbf04dbd22445f0900ce1ae0941ee6bf3f19c9a
push id23406
push useralwu@mozilla.com
push dateTue, 19 Jul 2016 09:08:14 +0000
bugs1283803
milestone50.0a1
Bug 1283803 - part2 : modify vtt parsing algorithm. MozReview-Commit-ID: HYt1rr0YRPn
dom/media/webvtt/vtt.jsm
testing/web-platform/meta/webvtt/webvtt-file-format-parsing/webvtt-file-parsing/001.html.ini
--- a/dom/media/webvtt/vtt.jsm
+++ b/dom/media/webvtt/vtt.jsm
@@ -1264,16 +1264,52 @@ this.EXPORTED_SYMBOLS = ["WebVTT"];
         }
         if (buffer[pos] === '\n') {
           ++pos;
         }
         self.buffer = buffer.substr(pos);
         return line;
       }
 
+      function createCueIfNeeded() {
+        if (!self.cue) {
+          self.cue = new self.window.VTTCue(0, 0, "");
+        }
+      }
+
+      // Parsing cue identifier and the identifier should be unique.
+      // Return true if the input is a cue identifier.
+      function parseCueIdentifier(input) {
+        if (maybeIsTimeStampFormat(line)) {
+          self.state = "CUE";
+          return false;
+        }
+
+        createCueIfNeeded();
+        // TODO : ensure the cue identifier is unique among all cue identifiers.
+        self.cue.id = containsTimeDirectionSymbol(input) ? "" : input;
+        self.state = "CUE";
+        return true;
+      }
+
+      // Parsing the timestamp and cue settings.
+      // See spec, https://w3c.github.io/webvtt/#collect-webvtt-cue-timings-and-settings
+      function parseCueMayThrow(input) {
+        try {
+          createCueIfNeeded();
+          parseCue(input, self.cue, self.regionList);
+          self.state = "CUETEXT";
+        } catch (e) {
+          self.reportOrThrowError(e);
+          // In case of an error ignore rest of the cue.
+          self.cue = null;
+          self.state = "BADCUE";
+        }
+      }
+
       // 3.4 WebVTT region and WebVTT region settings syntax
       function parseRegion(input) {
         var settings = new Settings();
 
         parseOptions(input, function (k, v) {
           switch (k) {
           case "id":
             settings.set(k, v);
@@ -1386,102 +1422,65 @@ this.EXPORTED_SYMBOLS = ["WebVTT"];
       // 5.1 WebVTT file parsing.
       try {
         if (self.state === "INITIAL") {
           parseSignature();
           self.state = "HEADER";
         }
 
         var line;
-        var alreadyCollectedLine = false;
+        if (self.state === "HEADER") {
+          line = parseHeader();
+        }
+
         while (self.buffer) {
-          // We can't parse a line until we have the full line.
-          if (!/\r\n|\n/.test(self.buffer)) {
-            return this;
-          }
-
-          if (!alreadyCollectedLine) {
+          if (!line) {
             line = collectNextLine();
-          } else {
-            alreadyCollectedLine = false;
           }
 
           switch (self.state) {
-          case "HEADER":
-            // 13-18 - Allow a header (metadata) under the WEBVTT line.
-            if (/:/.test(line)) {
-              parseHeader(line);
-            } else if (!line) {
-              // An empty line terminates the header and starts the body (cues).
-              self.state = "ID";
-            }
-            continue;
-          case "NOTE":
-            // Ignore NOTE blocks.
-            if (!line) {
-              self.state = "ID";
-            }
-            continue;
           case "ID":
-            // Check for the start of NOTE blocks.
-            if (/^NOTE($|[ \t])/.test(line)) {
-              self.state = "NOTE";
+            // Ignore NOTE and line terminator
+            if (/^NOTE($|[ \t])/.test(line) || !line) {
               break;
             }
-            // 19-29 - Allow any number of line terminators, then initialize new cue values.
-            if (!line) {
+            // If there is no cue identifier, keep the line and reuse this line
+            // in next iteration.
+            if (!parseCueIdentifier(line)) {
               continue;
             }
-            self.cue = new self.window.VTTCue(0, 0, "");
-            self.state = "CUE";
-            // 30-39 - Check if self line contains an optional identifier or timing data.
-            if (line.indexOf("-->") === -1) {
-              self.cue.id = line;
-              continue;
-            }
-            // Process line as start of a cue.
-            /*falls through*/
+            break;
           case "CUE":
-            // 40 - Collect cue timings and settings.
-            try {
-              parseCue(line, self.cue, self.regionList);
-            } catch (e) {
-              self.reportOrThrowError(e);
-              // In case of an error ignore rest of the cue.
-              self.cue = null;
-              self.state = "BADCUE";
-              continue;
-            }
-            self.state = "CUETEXT";
-            continue;
+            parseCueMayThrow(line);
+            break;
           case "CUETEXT":
-            var hasSubstring = line.indexOf("-->") !== -1;
-            // 34 - If we have an empty line then report the cue.
-            // 35 - If we have the special substring '-->' then report the cue,
-            // but do not collect the line as we need to process the current
-            // one as a new cue.
-            if (!line || hasSubstring && (alreadyCollectedLine = true)) {
+            // Report the cue when (1) get an empty line (2) get the "-->""
+            if (!line || containsTimeDirectionSymbol(line)) {
               // We are done parsing self cue.
               self.oncue && self.oncue(self.cue);
               self.cue = null;
               self.state = "ID";
+              // Keep the line and reuse this line in next iteration.
               continue;
             }
             if (self.cue.text) {
               self.cue.text += "\n";
             }
             self.cue.text += line;
-            continue;
+            break;
           case "BADCUE": // BADCUE
             // 54-62 - Collect and discard the remaining cue.
             if (!line) {
               self.state = "ID";
             }
-            continue;
+            break;
           }
+          // The line was already parsed, empty it to ensure we can get the
+          // new line in next iteration.
+          line = null;
         }
       } catch (e) {
         self.reportOrThrowError(e);
 
         // If we are currently parsing a cue, report what we have.
         if (self.state === "CUETEXT" && self.cue && self.oncue) {
           self.oncue(self.cue);
         }
--- a/testing/web-platform/meta/webvtt/webvtt-file-format-parsing/webvtt-file-parsing/001.html.ini
+++ b/testing/web-platform/meta/webvtt/webvtt-file-format-parsing/webvtt-file-parsing/001.html.ini
@@ -1,16 +1,4 @@
 [001.html]
   type: testharness
   [WebVTT parser tests, nulls.vtt]
-    expected: FAIL
-
-  [WebVTT parser tests, header-timings.vtt]
-    expected: FAIL
-
-  [WebVTT parser tests, header-space.vtt]
-    expected: FAIL
-
-  [WebVTT parser tests, header-tab.vtt]
-    expected: FAIL
-
-  [WebVTT parser tests, arrows.vtt]
     expected: FAIL
\ No newline at end of file