forked from Open-CT/openct-tasks
701 lines
19 KiB
JavaScript
701 lines
19 KiB
JavaScript
// Any copyright is dedicated to the Public Domain.
|
|
// http://creativecommons.org/publicdomain/zero/1.0/
|
|
|
|
// Not intended to be fast, but if you can make it faster, please help out!
|
|
|
|
var WebVTTParser = function() {
|
|
this.parse = function(input, mode) {
|
|
//XXX need global search and replace for \0
|
|
var NEWLINE = /\r\n|\r|\n/,
|
|
startTime = Date.now(),
|
|
linePos = 0,
|
|
lines = input.split(NEWLINE),
|
|
alreadyCollected = false,
|
|
cues = [],
|
|
errors = []
|
|
function err(message, col) {
|
|
errors.push({message:message, line:linePos+1, col:col})
|
|
}
|
|
|
|
var line = lines[linePos],
|
|
lineLength = line.length,
|
|
signature = "WEBVTT",
|
|
bom = 0,
|
|
signature_length = signature.length
|
|
|
|
/* Byte order mark */
|
|
if (line[0] === "\ufeff") {
|
|
bom = 1
|
|
signature_length += 1
|
|
}
|
|
/* SIGNATURE */
|
|
if (
|
|
lineLength < signature_length ||
|
|
line.indexOf(signature) !== 0+bom ||
|
|
lineLength > signature_length &&
|
|
line[signature_length] !== " " &&
|
|
line[signature_length] !== "\t"
|
|
) {
|
|
err("No valid signature. (File needs to start with \"WEBVTT\".)")
|
|
}
|
|
|
|
linePos++
|
|
|
|
/* HEADER */
|
|
while(lines[linePos] != "" && lines[linePos] != undefined) {
|
|
err("No blank line after the signature.")
|
|
if(lines[linePos].indexOf("-->") != -1) {
|
|
alreadyCollected = true
|
|
break
|
|
}
|
|
linePos++
|
|
}
|
|
|
|
/* CUE LOOP */
|
|
while(lines[linePos] != undefined) {
|
|
var cue
|
|
while(!alreadyCollected && lines[linePos] == "") {
|
|
linePos++
|
|
}
|
|
if(!alreadyCollected && lines[linePos] == undefined)
|
|
break
|
|
|
|
/* CUE CREATION */
|
|
cue = {
|
|
id:"",
|
|
startTime:0,
|
|
endTime:0,
|
|
pauseOnExit:false,
|
|
direction:"horizontal",
|
|
snapToLines:true,
|
|
linePosition:"auto",
|
|
textPosition:50,
|
|
size:100,
|
|
alignment:"middle",
|
|
text:"",
|
|
tree:null
|
|
}
|
|
|
|
var parseTimings = true
|
|
|
|
if(lines[linePos].indexOf("-->") == -1) {
|
|
cue.id = lines[linePos]
|
|
|
|
/* COMMENTS
|
|
Not part of the specification's parser as these would just be ignored. However,
|
|
we want them to be conforming and not get "Cue identifier cannot be standalone".
|
|
*/
|
|
if(/^NOTE($|[ \t])/.test(cue.id)) { // .startsWith fails in Chrome
|
|
linePos++
|
|
while(lines[linePos] != "" && lines[linePos] != undefined) {
|
|
if(lines[linePos].indexOf("-->") != -1)
|
|
err("Cannot have timestamp in a comment.")
|
|
linePos++
|
|
}
|
|
continue
|
|
}
|
|
|
|
linePos++
|
|
|
|
if(lines[linePos] == "" || lines[linePos] == undefined) {
|
|
err("Cue identifier cannot be standalone.")
|
|
continue
|
|
}
|
|
|
|
if(lines[linePos].indexOf("-->") == -1) {
|
|
parseTimings = false
|
|
err("Cue identifier needs to be followed by timestamp.")
|
|
}
|
|
}
|
|
|
|
/* TIMINGS */
|
|
alreadyCollected = false
|
|
var timings = new WebVTTCueTimingsAndSettingsParser(lines[linePos], err)
|
|
var previousCueStart = 0
|
|
if(cues.length > 0) {
|
|
previousCueStart = cues[cues.length-1].startTime
|
|
}
|
|
if(parseTimings && !timings.parse(cue, previousCueStart)) {
|
|
/* BAD CUE */
|
|
|
|
cue = null
|
|
linePos++
|
|
|
|
/* BAD CUE LOOP */
|
|
while(lines[linePos] != "" && lines[linePos] != undefined) {
|
|
if(lines[linePos].indexOf("-->") != -1) {
|
|
alreadyCollected = true
|
|
break
|
|
}
|
|
linePos++
|
|
}
|
|
continue
|
|
}
|
|
linePos++
|
|
|
|
/* CUE TEXT LOOP */
|
|
while(lines[linePos] != "" && lines[linePos] != undefined) {
|
|
if(lines[linePos].indexOf("-->") != -1) {
|
|
err("Blank line missing before cue.")
|
|
alreadyCollected = true
|
|
break
|
|
}
|
|
if(cue.text != "")
|
|
cue.text += "\n"
|
|
cue.text += lines[linePos]
|
|
linePos++
|
|
}
|
|
|
|
/* CUE TEXT PROCESSING */
|
|
var cuetextparser = new WebVTTCueTextParser(cue.text, err, mode)
|
|
cue.tree = cuetextparser.parse(cue.startTime, cue.endTime)
|
|
cues.push(cue)
|
|
}
|
|
cues.sort(function(a, b) {
|
|
if (a.startTime < b.startTime)
|
|
return -1
|
|
if (a.startTime > b.startTime)
|
|
return 1
|
|
if (a.endTime > b.endTime)
|
|
return -1
|
|
if (a.endTime < b.endTime)
|
|
return 1
|
|
return 0
|
|
})
|
|
/* END */
|
|
return {cues:cues, errors:errors, time:Date.now()-startTime}
|
|
}
|
|
}
|
|
|
|
var WebVTTCueTimingsAndSettingsParser = function(line, errorHandler) {
|
|
var SPACE = /[\u0020\t\f]/,
|
|
NOSPACE = /[^\u0020\t\f]/,
|
|
line = line,
|
|
pos = 0,
|
|
err = function(message) {
|
|
errorHandler(message, pos+1)
|
|
},
|
|
spaceBeforeSetting = true
|
|
function skip(pattern) {
|
|
while(
|
|
line[pos] != undefined &&
|
|
pattern.test(line[pos])
|
|
) {
|
|
pos++
|
|
}
|
|
}
|
|
function collect(pattern) {
|
|
var str = ""
|
|
while(
|
|
line[pos] != undefined &&
|
|
pattern.test(line[pos])
|
|
) {
|
|
str += line[pos]
|
|
pos++
|
|
}
|
|
return str
|
|
}
|
|
/* http://dev.w3.org/html5/webvtt/#collect-a-webvtt-timestamp */
|
|
function timestamp() {
|
|
var units = "minutes",
|
|
val1,
|
|
val2,
|
|
val3,
|
|
val4
|
|
// 3
|
|
if(line[pos] == undefined) {
|
|
err("No timestamp found.")
|
|
return
|
|
}
|
|
// 4
|
|
if(!/\d/.test(line[pos])) {
|
|
err("Timestamp must start with a character in the range 0-9.")
|
|
return
|
|
}
|
|
// 5-7
|
|
val1 = collect(/\d/)
|
|
if(val1.length > 2 || parseInt(val1, 10) > 59) {
|
|
units = "hours"
|
|
}
|
|
// 8
|
|
if(line[pos] != ":") {
|
|
err("No time unit separator found.")
|
|
return
|
|
}
|
|
pos++
|
|
// 9-11
|
|
val2 = collect(/\d/)
|
|
if(val2.length != 2) {
|
|
err("Must be exactly two digits.")
|
|
return
|
|
}
|
|
// 12
|
|
if(units == "hours" || line[pos] == ":") {
|
|
if(line[pos] != ":") {
|
|
err("No seconds found or minutes is greater than 59.")
|
|
return
|
|
}
|
|
pos++
|
|
val3 = collect(/\d/)
|
|
if(val3.length != 2) {
|
|
err("Must be exactly two digits.")
|
|
return
|
|
}
|
|
} else {
|
|
val3 = val2
|
|
val2 = val1
|
|
val1 = "0"
|
|
}
|
|
// 13
|
|
if(line[pos] != ".") {
|
|
err("No decimal separator (\".\") found.")
|
|
return
|
|
}
|
|
pos++
|
|
// 14-16
|
|
val4 = collect(/\d/)
|
|
if(val4.length != 3) {
|
|
err("Milliseconds must be given in three digits.")
|
|
return
|
|
}
|
|
// 17
|
|
if(parseInt(val2, 10) > 59) {
|
|
err("You cannot have more than 59 minutes.")
|
|
return
|
|
}
|
|
if(parseInt(val3, 10) > 59) {
|
|
err("You cannot have more than 59 seconds.")
|
|
return
|
|
}
|
|
return parseInt(val1, 10) * 60 * 60 + parseInt(val2, 10) * 60 + parseInt(val3, 10) + parseInt(val4, 10) / 1000
|
|
}
|
|
|
|
/* http://dev.w3.org/html5/webvtt/#parse-the-webvtt-settings */
|
|
function parseSettings(input, cue) {
|
|
var settings = input.split(SPACE),
|
|
seen = []
|
|
for(var i=0; i < settings.length; i++) {
|
|
if(settings[i] == "")
|
|
continue
|
|
|
|
var index = settings[i].indexOf(':'),
|
|
setting = settings[i].slice(0, index)
|
|
value = settings[i].slice(index + 1)
|
|
|
|
if(seen.indexOf(setting) != -1) {
|
|
err("Duplicate setting.")
|
|
}
|
|
seen.push(setting)
|
|
|
|
if(value == "") {
|
|
err("No value for setting defined.")
|
|
return
|
|
}
|
|
|
|
if(setting == "vertical") { // writing direction
|
|
if(value != "rl" && value != "lr") {
|
|
err("Writing direction can only be set to 'rl' or 'rl'.")
|
|
continue
|
|
}
|
|
cue.direction = value
|
|
} else if(setting == "line") { // line position
|
|
if(!/\d/.test(value)) {
|
|
err("Line position takes a number or percentage.")
|
|
continue
|
|
}
|
|
if(value.indexOf("-", 1) != -1) {
|
|
err("Line position can only have '-' at the start.")
|
|
continue
|
|
}
|
|
if(value.indexOf("%") != -1 && value.indexOf("%") != value.length-1) {
|
|
err("Line position can only have '%' at the end.")
|
|
continue
|
|
}
|
|
if(value[0] == "-" && value[value.length-1] == "%") {
|
|
err("Line position cannot be a negative percentage.")
|
|
continue
|
|
}
|
|
if(value[value.length-1] == "%") {
|
|
if(parseInt(value, 10) > 100) {
|
|
err("Line position cannot be >100%.")
|
|
continue
|
|
}
|
|
cue.snapToLines = false
|
|
}
|
|
cue.linePosition = parseInt(value, 10)
|
|
} else if(setting == "position") { // text position
|
|
if(value[value.length-1] != "%") {
|
|
err("Text position must be a percentage.")
|
|
continue
|
|
}
|
|
if(parseInt(value, 10) > 100) {
|
|
err("Size cannot be >100%.")
|
|
continue
|
|
}
|
|
cue.textPosition = parseInt(value, 10)
|
|
} else if(setting == "size") { // size
|
|
if(value[value.length-1] != "%") {
|
|
err("Size must be a percentage.")
|
|
continue
|
|
}
|
|
if(parseInt(value, 10) > 100) {
|
|
err("Size cannot be >100%.")
|
|
continue
|
|
}
|
|
cue.size = parseInt(value, 10)
|
|
} else if(setting == "align") { // alignment
|
|
var alignValues = ["start", "middle", "end", "left", "right"]
|
|
if(alignValues.indexOf(value) == -1) {
|
|
err("Alignment can only be set to one of " + alignValues.join(", ") + ".")
|
|
continue
|
|
}
|
|
cue.alignment = value
|
|
} else {
|
|
err("Invalid setting.")
|
|
}
|
|
}
|
|
}
|
|
|
|
this.parse = function(cue, previousCueStart) {
|
|
skip(SPACE)
|
|
cue.startTime = timestamp()
|
|
if(cue.startTime == undefined) {
|
|
return
|
|
}
|
|
if(cue.startTime < previousCueStart) {
|
|
err("Start timestamp is not greater than or equal to start timestamp of previous cue.")
|
|
}
|
|
if(NOSPACE.test(line[pos])) {
|
|
err("Timestamp not separated from '-->' by whitespace.")
|
|
}
|
|
skip(SPACE)
|
|
// 6-8
|
|
if(line[pos] != "-") {
|
|
err("No valid timestamp separator found.")
|
|
return
|
|
}
|
|
pos++
|
|
if(line[pos] != "-") {
|
|
err("No valid timestamp separator found.")
|
|
return
|
|
}
|
|
pos++
|
|
if(line[pos] != ">") {
|
|
err("No valid timestamp separator found.")
|
|
return
|
|
}
|
|
pos++
|
|
if(NOSPACE.test(line[pos])) {
|
|
err("'-->' not separated from timestamp by whitespace.")
|
|
}
|
|
skip(SPACE)
|
|
cue.endTime = timestamp()
|
|
if(cue.endTime == undefined) {
|
|
return
|
|
}
|
|
if(cue.endTime <= cue.startTime) {
|
|
err("End timestamp is not greater than start timestamp.")
|
|
}
|
|
|
|
if(NOSPACE.test(line[pos])) {
|
|
spaceBeforeSetting = false
|
|
}
|
|
skip(SPACE)
|
|
parseSettings(line.substring(pos), cue)
|
|
return true
|
|
}
|
|
this.parseTimestamp = function() {
|
|
var ts = timestamp()
|
|
if(line[pos] != undefined) {
|
|
err("Timestamp must not have trailing characters.")
|
|
return
|
|
}
|
|
return ts
|
|
}
|
|
}
|
|
|
|
var WebVTTCueTextParser = function(line, errorHandler, mode) {
|
|
var line = line,
|
|
pos = 0,
|
|
err = function(message) {
|
|
if(mode == "metadata")
|
|
return
|
|
errorHandler(message, pos+1)
|
|
}
|
|
|
|
this.parse = function(cueStart, cueEnd) {
|
|
var result = {children:[]},
|
|
current = result,
|
|
timestamps = []
|
|
|
|
function attach(token) {
|
|
current.children.push({type:"object", name:token[1], classes:token[2], children:[], parent:current})
|
|
current = current.children[current.children.length-1]
|
|
}
|
|
function inScope(name) {
|
|
var node = current
|
|
while(node) {
|
|
if(node.name == name)
|
|
return true
|
|
node = node.parent
|
|
}
|
|
return
|
|
}
|
|
|
|
while(line[pos] != undefined) {
|
|
var token = nextToken()
|
|
if(token[0] == "text") {
|
|
current.children.push({type:"text", value:token[1], parent:current})
|
|
} else if(token[0] == "start tag") {
|
|
if(mode == "chapters")
|
|
err("Start tags not allowed in chapter title text.")
|
|
var name = token[1]
|
|
if(name != "v" && name != "lang" && token[3] != "") {
|
|
err("Only <v> and <lang> can have an annotation.")
|
|
}
|
|
if(
|
|
name == "c" ||
|
|
name == "i" ||
|
|
name == "b" ||
|
|
name == "u" ||
|
|
name == "ruby"
|
|
) {
|
|
attach(token)
|
|
} else if(name == "rt" && current.name == "ruby") {
|
|
attach(token)
|
|
} else if(name == "v") {
|
|
if(inScope("v")) {
|
|
err("<v> cannot be nested inside itself.")
|
|
}
|
|
attach(token)
|
|
current.value = token[3] // annotation
|
|
if(!token[3]) {
|
|
err("<v> requires an annotation.")
|
|
}
|
|
} else if(name == "lang") {
|
|
attach(token)
|
|
current.value = token[3] // language
|
|
} else {
|
|
err("Incorrect start tag.")
|
|
}
|
|
} else if(token[0] == "end tag") {
|
|
if(mode == "chapters")
|
|
err("End tags not allowed in chapter title text.")
|
|
// XXX check <ruby> content
|
|
if(token[1] == current.name) {
|
|
current = current.parent
|
|
} else if(token[1] == "ruby" && current.name == "rt") {
|
|
current = current.parent.parent
|
|
} else {
|
|
err("Incorrect end tag.")
|
|
}
|
|
} else if(token[0] == "timestamp") {
|
|
if(mode == "chapters")
|
|
err("Timestamp not allowed in chapter title text.")
|
|
var timings = new WebVTTCueTimingsAndSettingsParser(token[1], err),
|
|
timestamp = timings.parseTimestamp()
|
|
if(timestamp != undefined) {
|
|
if(timestamp <= cueStart || timestamp >= cueEnd) {
|
|
err("Timestamp must be between start timestamp and end timestamp.")
|
|
}
|
|
if(timestamps.length > 0 && timestamps[timestamps.length-1] >= timestamp) {
|
|
err("Timestamp must be greater than any previous timestamp.")
|
|
}
|
|
current.children.push({type:"timestamp", value:timestamp, parent:current})
|
|
timestamps.push(timestamp)
|
|
}
|
|
}
|
|
}
|
|
while(current.parent) {
|
|
if(current.name != "v") {
|
|
err("Required end tag missing.")
|
|
}
|
|
current = current.parent
|
|
}
|
|
return result
|
|
}
|
|
|
|
function nextToken() {
|
|
var state = "data",
|
|
result = "",
|
|
buffer = "",
|
|
classes = []
|
|
while(line[pos-1] != undefined || pos == 0) {
|
|
var c = line[pos]
|
|
if(state == "data") {
|
|
if(c == "&") {
|
|
buffer = c
|
|
state = "escape"
|
|
} else if(c == "<" && result == "") {
|
|
state = "tag"
|
|
} else if(c == "<" || c == undefined) {
|
|
return ["text", result]
|
|
} else {
|
|
result += c
|
|
}
|
|
} else if(state == "escape") {
|
|
if(c == "&") {
|
|
err("Incorrect escape.")
|
|
result += buffer
|
|
buffer = c
|
|
} else if(/[abglmnsprt]/.test(c)) {
|
|
buffer += c
|
|
} else if(c == ";") {
|
|
if(buffer == "&") {
|
|
result += "&"
|
|
} else if(buffer == "<") {
|
|
result += "<"
|
|
} else if(buffer == ">") {
|
|
result += ">"
|
|
} else if(buffer == "&lrm") {
|
|
result += "\u200e"
|
|
} else if(buffer == "&rlm") {
|
|
result += "\u200f"
|
|
} else if(buffer == " ") {
|
|
result += "\u00A0"
|
|
} else {
|
|
err("Incorrect escape.")
|
|
result += buffer + ";"
|
|
}
|
|
state = "data"
|
|
} else if(c == "<" || c == undefined) {
|
|
err("Incorrect escape.")
|
|
result += buffer
|
|
return ["text", result]
|
|
} else {
|
|
err("Incorrect escape.")
|
|
result += buffer + c
|
|
state = "data"
|
|
}
|
|
} else if(state == "tag") {
|
|
if(c == "\t" || c == "\n" || c == "\f" || c == " ") {
|
|
state = "start tag annotation"
|
|
} else if(c == ".") {
|
|
state = "start tag class"
|
|
} else if(c == "/") {
|
|
state = "end tag"
|
|
} else if(/\d/.test(c)) {
|
|
result = c
|
|
state = "timestamp tag"
|
|
} else if(c == ">" || c == undefined) {
|
|
if(c == ">") {
|
|
pos++
|
|
}
|
|
return ["start tag", "", [], ""]
|
|
} else {
|
|
result = c
|
|
state = "start tag"
|
|
}
|
|
} else if(state == "start tag") {
|
|
if(c == "\t" || c == "\f" || c == " ") {
|
|
state = "start tag annotation"
|
|
} else if(c == "\n") {
|
|
buffer = c
|
|
state = "start tag annotation"
|
|
} else if(c == ".") {
|
|
state = "start tag class"
|
|
} else if(c == ">" || c == undefined) {
|
|
if(c == ">") {
|
|
pos++
|
|
}
|
|
return ["start tag", result, [], ""]
|
|
} else {
|
|
result += c
|
|
}
|
|
} else if(state == "start tag class") {
|
|
if(c == "\t" || c == "\f" || c == " ") {
|
|
classes.push(buffer)
|
|
buffer = ""
|
|
state = "start tag annotation"
|
|
} else if(c == "\n") {
|
|
classes.push(buffer)
|
|
buffer = c
|
|
state = "start tag annotation"
|
|
} else if(c == ".") {
|
|
classes.push(buffer)
|
|
buffer = ""
|
|
} else if(c == ">" || c == undefined) {
|
|
if(c == ">") {
|
|
pos++
|
|
}
|
|
classes.push(buffer)
|
|
return ["start tag", result, classes, ""]
|
|
} else {
|
|
buffer += c
|
|
}
|
|
} else if(state == "start tag annotation") {
|
|
if(c == ">" || c == undefined) {
|
|
if(c == ">") {
|
|
pos++
|
|
}
|
|
buffer = buffer.split(/[\u0020\t\f\r\n]+/).filter(function(item) { if(item) return true }).join(" ")
|
|
return ["start tag", result, classes, buffer]
|
|
} else {
|
|
buffer +=c
|
|
}
|
|
} else if(state == "end tag") {
|
|
if(c == ">" || c == undefined) {
|
|
if(c == ">") {
|
|
pos++
|
|
}
|
|
return ["end tag", result]
|
|
} else {
|
|
result += c
|
|
}
|
|
} else if(state == "timestamp tag") {
|
|
if(c == ">" || c == undefined) {
|
|
if(c == ">") {
|
|
pos++
|
|
}
|
|
return ["timestamp", result]
|
|
} else {
|
|
result += c
|
|
}
|
|
} else {
|
|
err("Never happens.") // The joke is it might.
|
|
}
|
|
// 8
|
|
pos++
|
|
}
|
|
}
|
|
}
|
|
|
|
var WebVTTSerializer = function() {
|
|
function serializeTree(tree) {
|
|
var result = ""
|
|
for (var i = 0; i < tree.length; i++) {
|
|
var node = tree[i]
|
|
if(node.type == "text") {
|
|
result += node.value
|
|
} else if(node.type == "object") {
|
|
result += "<" + node.name
|
|
if(node.classes) {
|
|
for(var y = 0; y < node.classes.length; y++) {
|
|
result += "." + node.classes[y]
|
|
}
|
|
}
|
|
if(node.value) {
|
|
result += " " + node.value
|
|
}
|
|
result += ">"
|
|
if(node.children)
|
|
result += serializeTree(node.children)
|
|
result += "</" + node.name + ">"
|
|
} else {
|
|
result += "<" + node.value + ">"
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
function serializeCue(cue) {
|
|
return cue.startTime + " " + cue.endTime + "\n" + serializeTree(cue.tree.children) + "\n\n"
|
|
}
|
|
this.serialize = function(cues) {
|
|
var result = ""
|
|
for(var i=0;i<cues.length;i++) {
|
|
result += serializeCue(cues[i])
|
|
}
|
|
return result
|
|
}
|
|
}
|