190 lines
5.9 KiB
JavaScript
190 lines
5.9 KiB
JavaScript
import Stream, { DEFAULT_ENCODING, getEncoding } from './text_decoder_index.js'
|
|
import { end_of_stream, finished, codePointsToString } from './text_decoder_utils.js'
|
|
import { decoders } from './table.js'
|
|
|
|
// 8.1 Interface TextDecoder
|
|
|
|
class TextDecoder {
|
|
/**
|
|
* @param {string=} label The label of the encoding; defaults to 'utf-8'.
|
|
* @param {Object=} options
|
|
*/
|
|
constructor(label = DEFAULT_ENCODING, options = {}) {
|
|
// A TextDecoder object has an associated encoding, decoder,
|
|
// stream, ignore BOM flag (initially unset), BOM seen flag
|
|
// (initially unset), error mode (initially replacement), and do
|
|
// not flush flag (initially unset).
|
|
|
|
/** @private */
|
|
this._encoding = null
|
|
/** @private @type {?Decoder} */
|
|
this._decoder = null
|
|
/** @private @type {boolean} */
|
|
this._ignoreBOM = false
|
|
/** @private @type {boolean} */
|
|
this._BOMseen = false
|
|
/** @private @type {string} */
|
|
this._error_mode = 'replacement'
|
|
/** @private @type {boolean} */
|
|
this._do_not_flush = false
|
|
|
|
|
|
// 1. Let encoding be the result of getting an encoding from
|
|
// label.
|
|
const encoding = getEncoding(label)
|
|
|
|
// 2. If encoding is failure or replacement, throw a RangeError.
|
|
if (encoding === null || encoding.name == 'replacement')
|
|
throw RangeError('Unknown encoding: ' + label)
|
|
if (!decoders[encoding.name]) {
|
|
throw Error('Decoder not present.' +
|
|
' Did you forget to include encoding-indexes.js first?')
|
|
}
|
|
|
|
// 4. Set dec's encoding to encoding.
|
|
this._encoding = encoding
|
|
|
|
// 5. If options's fatal member is true, set dec's error mode to
|
|
// fatal.
|
|
if (options['fatal'])
|
|
this._error_mode = 'fatal'
|
|
|
|
// 6. If options's ignoreBOM member is true, set dec's ignore BOM
|
|
// flag.
|
|
if (options['ignoreBOM'])
|
|
this._ignoreBOM = true
|
|
}
|
|
|
|
get encoding() {
|
|
return this._encoding.name.toLowerCase()
|
|
}
|
|
get fatal() {
|
|
return this._error_mode === 'fatal'
|
|
}
|
|
get ignoreBOM() {
|
|
return this._ignoreBOM
|
|
}
|
|
/**
|
|
* @param {BufferSource=} input The buffer of bytes to decode.
|
|
* @param {Object=} options
|
|
* @return The decoded string.
|
|
*/
|
|
decode(input, options = {}) {
|
|
let bytes
|
|
if (typeof input === 'object' && input instanceof ArrayBuffer) {
|
|
bytes = new Uint8Array(input)
|
|
} else if (typeof input === 'object' && 'buffer' in input &&
|
|
input.buffer instanceof ArrayBuffer) {
|
|
bytes = new Uint8Array(input.buffer,
|
|
input.byteOffset,
|
|
input.byteLength)
|
|
} else {
|
|
bytes = new Uint8Array(0)
|
|
}
|
|
|
|
// 1. If the do not flush flag is unset, set decoder to a new
|
|
// encoding's decoder, set stream to a new stream, and unset the
|
|
// BOM seen flag.
|
|
if (!this._do_not_flush) {
|
|
this._decoder = decoders[this._encoding.name]({
|
|
fatal: this._error_mode === 'fatal' })
|
|
this._BOMseen = false
|
|
}
|
|
|
|
// 2. If options's stream is true, set the do not flush flag, and
|
|
// unset the do not flush flag otherwise.
|
|
this._do_not_flush = Boolean(options['stream'])
|
|
|
|
// 3. If input is given, push a copy of input to stream.
|
|
// TODO: Align with spec algorithm - maintain stream on instance.
|
|
const input_stream = new Stream(bytes)
|
|
|
|
// 4. Let output be a new stream.
|
|
const output = []
|
|
|
|
/** @type {?(number|!Array.<number>)} */
|
|
let result
|
|
|
|
// 5. While true:
|
|
while (true) {
|
|
// 1. Let token be the result of reading from stream.
|
|
const token = input_stream.read()
|
|
|
|
// 2. If token is end-of-stream and the do not flush flag is
|
|
// set, return output, serialized.
|
|
// TODO: Align with spec algorithm.
|
|
if (token === end_of_stream)
|
|
break
|
|
|
|
// 3. Otherwise, run these subsubsteps:
|
|
|
|
// 1. Let result be the result of processing token for decoder,
|
|
// stream, output, and error mode.
|
|
result = this._decoder.handler(input_stream, token)
|
|
|
|
// 2. If result is finished, return output, serialized.
|
|
if (result === finished)
|
|
break
|
|
|
|
if (result !== null) {
|
|
if (Array.isArray(result))
|
|
output.push.apply(output, /**@type {!Array.<number>}*/(result))
|
|
else
|
|
output.push(result)
|
|
}
|
|
|
|
// 3. Otherwise, if result is error, throw a TypeError.
|
|
// (Thrown in handler)
|
|
|
|
// 4. Otherwise, do nothing.
|
|
}
|
|
// TODO: Align with spec algorithm.
|
|
if (!this._do_not_flush) {
|
|
do {
|
|
result = this._decoder.handler(input_stream, input_stream.read())
|
|
if (result === finished)
|
|
break
|
|
if (result === null)
|
|
continue
|
|
if (Array.isArray(result))
|
|
output.push.apply(output, /**@type {!Array.<number>}*/(result))
|
|
else
|
|
output.push(result)
|
|
} while (!input_stream.endOfStream())
|
|
this._decoder = null
|
|
}
|
|
|
|
return this.serializeStream(output)
|
|
}
|
|
// A TextDecoder object also has an associated serialize stream
|
|
// algorithm...
|
|
/**
|
|
* @param {!Array.<number>} stream
|
|
*/
|
|
serializeStream(stream) {
|
|
// 1. Let token be the result of reading from stream.
|
|
// (Done in-place on array, rather than as a stream)
|
|
|
|
// 2. If encoding is UTF-8, UTF-16BE, or UTF-16LE, and ignore
|
|
// BOM flag and BOM seen flag are unset, run these subsubsteps:
|
|
if (['UTF-8', 'UTF-16LE', 'UTF-16BE'].includes(this._encoding.name) &&
|
|
!this._ignoreBOM && !this._BOMseen) {
|
|
if (stream.length > 0 && stream[0] === 0xFEFF) {
|
|
// 1. If token is U+FEFF, set BOM seen flag.
|
|
this._BOMseen = true
|
|
stream.shift()
|
|
} else if (stream.length > 0) {
|
|
// 2. Otherwise, if token is not end-of-stream, set BOM seen
|
|
// flag and append token to stream.
|
|
this._BOMseen = true
|
|
} else {
|
|
// 3. Otherwise, if token is not end-of-stream, append token
|
|
// to output.
|
|
// (no-op)
|
|
}
|
|
}
|
|
// 4. Otherwise, return output.
|
|
return codePointsToString(stream)
|
|
}
|
|
}
|
|
export {TextDecoder} |