Bug 1283803 - part1 : modify function parsingHeader. draft
authorAlastor Wu <alwu@mozilla.com>
Tue, 19 Jul 2016 16:30:46 +0800
changeset 389450 6b6e518a3c388f9cca0cb2ae870a8baabf52b51c
parent 389281 feaaf1af1065257b9178faca8b67eed9657b4a17
child 389451 5614ff3654265e06ceb68713b28777cf18fe8ed1
push id23406
push useralwu@mozilla.com
push dateTue, 19 Jul 2016 09:08:14 +0000
bugs1283803
milestone50.0a1
Bug 1283803 - part1 : modify function parsingHeader. MozReview-Commit-ID: Fujoh7ysVPy
dom/media/webvtt/vtt.jsm
--- a/dom/media/webvtt/vtt.jsm
+++ b/dom/media/webvtt/vtt.jsm
@@ -254,16 +254,28 @@ this.EXPORTED_SYMBOLS = ["WebVTT"];
     skipWhitespace();
     cue.endTime = consumeTimeStamp();     // (5) collect cue end time
 
     // 4.1 WebVTT cue settings list.
     skipWhitespace();
     consumeCueSettings(input, cue);
   }
 
+  function onlyContainsWhiteSpaces(input) {
+    return /^[ \f\n\r\t]+$/.test(input);
+  }
+
+  function containsTimeDirectionSymbol(input) {
+    return input.indexOf("-->") !== -1;
+  }
+
+  function maybeIsTimeStampFormat(input) {
+    return /^\s*(\d+:)?(\d{2}):(\d{2})\.(\d+)\s*-->\s*(\d+:)?(\d{2}):(\d{2})\.(\d+)\s*/.test(input);
+  }
+
   var ESCAPE = {
     "&amp;": "&",
     "&lt;": "<",
     "&gt;": ">",
     "&lrm;": "\u200e",
     "&rlm;": "\u200f",
     "&nbsp;": "\u00a0"
   };
@@ -1320,26 +1332,60 @@ this.EXPORTED_SYMBOLS = ["WebVTT"];
       // WebVTT parser algorithm step1 - step9.
       function parseSignature(input) {
         let signature = collectNextLine();
         if (!/^WEBVTT([ \t].*)?$/.test(signature)) {
           throw new ParsingError(ParsingError.Errors.BadSignature);
         }
       }
 
-      // 3.2 WebVTT metadata header syntax
-      function parseHeader(input) {
-        parseOptions(input, function (k, v) {
-          switch (k) {
-          case "Region":
-            // 3.3 WebVTT region metadata header syntax
-            parseRegion(v);
+      // Parsing the region and style information.
+      // See spec, https://w3c.github.io/webvtt/#collect-a-webvtt-block
+      //
+      // There are sereval things would appear in header,
+      //   1. Region or Style setting
+      //   2. Garbage (meaningless string)
+      //   3. Empty line
+      //   4. Cue's timestamp
+      // The case 4 happens when there is no line interval between the header
+      // and the cue blocks. In this case, we should preserve the line and
+      // return it for the next phase parsing.
+      function parseHeader() {
+        let line = null;
+        while (self.buffer && self.state === "HEADER") {
+          line = collectNextLine();
+
+          if (/^REGION|^STYLE/i.test(line)) {
+            parseOptions(line, function (k, v) {
+              switch (k.toUpperCase()) {
+              case "REGION":
+                parseRegion(v);
+                break;
+              case "STYLE":
+                // TODO : not supported yet.
+                break;
+              }
+            }, ":");
+          } else if (maybeIsTimeStampFormat(line)) {
+            self.state = "CUE";
+            break;
+          } else if (!line ||
+                     onlyContainsWhiteSpaces(line) ||
+                     containsTimeDirectionSymbol(line)) {
+            // empty line, whitespaces or string contains "-->"
             break;
           }
-        }, /:/);
+        }
+
+        // End parsing header part and doesn't see the timestamp.
+        if (self.state === "HEADER") {
+          self.state = "ID";
+          line = null
+        }
+        return line;
       }
 
       // 5.1 WebVTT file parsing.
       try {
         if (self.state === "INITIAL") {
           parseSignature();
           self.state = "HEADER";
         }