441 lines
14 KiB
JavaScript
441 lines
14 KiB
JavaScript
|
import { inRange, decoderError, encoderError, isASCIICodePoint,
|
|||
|
end_of_stream, finished, floor } from './text_decoder_utils.js'
|
|||
|
import index, { indexCodePointFor, indexPointerFor } from './text_decoder_indexes.js'
|
|||
|
|
|||
|
// 13.2 iso-2022-jp
|
|||
|
|
|||
|
// 13.2.1 iso-2022-jp decoder
|
|||
|
/**
|
|||
|
* @implements {Decoder}
|
|||
|
*/
|
|||
|
export class ISO2022JPDecoder {
|
|||
|
constructor(options) {
|
|||
|
const { fatal } = options
|
|||
|
this.fatal = fatal
|
|||
|
/** @enum */
|
|||
|
this.states = {
|
|||
|
ASCII: 0,
|
|||
|
Roman: 1,
|
|||
|
Katakana: 2,
|
|||
|
LeadByte: 3,
|
|||
|
TrailByte: 4,
|
|||
|
EscapeStart: 5,
|
|||
|
Escape: 6,
|
|||
|
}
|
|||
|
// iso-2022-jp's decoder has an associated iso-2022-jp decoder
|
|||
|
// state (initially ASCII), iso-2022-jp decoder output state
|
|||
|
// (initially ASCII), iso-2022-jp lead (initially 0x00), and
|
|||
|
// iso-2022-jp output flag (initially unset).
|
|||
|
this.iso2022jp_decoder_state = this.states.ASCII
|
|||
|
this.iso2022jp_decoder_output_state = this.states.ASCII,
|
|||
|
this.iso2022jp_lead = 0x00
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
}
|
|||
|
/**
|
|||
|
* @param {Stream} stream The stream of bytes being decoded.
|
|||
|
* @param {number} bite The next byte read from the stream.
|
|||
|
*/
|
|||
|
handler(stream, bite) {
|
|||
|
// switching on iso-2022-jp decoder state:
|
|||
|
switch (this.iso2022jp_decoder_state) {
|
|||
|
default:
|
|||
|
case this.states.ASCII:
|
|||
|
// ASCII
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
this.iso2022jp_decoder_state = this.states.EscapeStart
|
|||
|
return null
|
|||
|
}
|
|||
|
|
|||
|
// 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B
|
|||
|
if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E
|
|||
|
&& bite !== 0x0F && bite !== 0x1B) {
|
|||
|
// Unset the iso-2022-jp output flag and return a code point
|
|||
|
// whose value is byte.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
return bite
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Return finished.
|
|||
|
return finished
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Unset the iso-2022-jp output flag and return error.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
return decoderError(this.fatal)
|
|||
|
|
|||
|
case this.states.Roman:
|
|||
|
// Roman
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
this.iso2022jp_decoder_state = this.states.EscapeStart
|
|||
|
return null
|
|||
|
}
|
|||
|
|
|||
|
// 0x5C
|
|||
|
if (bite === 0x5C) {
|
|||
|
// Unset the iso-2022-jp output flag and return code point
|
|||
|
// U+00A5.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
return 0x00A5
|
|||
|
}
|
|||
|
|
|||
|
// 0x7E
|
|||
|
if (bite === 0x7E) {
|
|||
|
// Unset the iso-2022-jp output flag and return code point
|
|||
|
// U+203E.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
return 0x203E
|
|||
|
}
|
|||
|
|
|||
|
// 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E
|
|||
|
if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F
|
|||
|
&& bite !== 0x1B && bite !== 0x5C && bite !== 0x7E) {
|
|||
|
// Unset the iso-2022-jp output flag and return a code point
|
|||
|
// whose value is byte.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
return bite
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Return finished.
|
|||
|
return finished
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Unset the iso-2022-jp output flag and return error.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
return decoderError(this.fatal)
|
|||
|
|
|||
|
case this.states.Katakana:
|
|||
|
// Katakana
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
this.iso2022jp_decoder_state = this.states.EscapeStart
|
|||
|
return null
|
|||
|
}
|
|||
|
|
|||
|
// 0x21 to 0x5F
|
|||
|
if (inRange(bite, 0x21, 0x5F)) {
|
|||
|
// Unset the iso-2022-jp output flag and return a code point
|
|||
|
// whose value is 0xFF61 − 0x21 + byte.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
return 0xFF61 - 0x21 + bite
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Return finished.
|
|||
|
return finished
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Unset the iso-2022-jp output flag and return error.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
return decoderError(this.fatal)
|
|||
|
|
|||
|
case this.states.LeadByte:
|
|||
|
// Lead byte
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
this.iso2022jp_decoder_state = this.states.EscapeStart
|
|||
|
return null
|
|||
|
}
|
|||
|
|
|||
|
// 0x21 to 0x7E
|
|||
|
if (inRange(bite, 0x21, 0x7E)) {
|
|||
|
// Unset the iso-2022-jp output flag, set iso-2022-jp lead
|
|||
|
// to byte, iso-2022-jp decoder state to trail byte, and
|
|||
|
// return continue.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
this.iso2022jp_lead = bite
|
|||
|
this.iso2022jp_decoder_state = this.states.TrailByte
|
|||
|
return null
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Return finished.
|
|||
|
return finished
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Unset the iso-2022-jp output flag and return error.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
return decoderError(this.fatal)
|
|||
|
|
|||
|
case this.states.TrailByte:
|
|||
|
// Trail byte
|
|||
|
// Based on byte:
|
|||
|
|
|||
|
// 0x1B
|
|||
|
if (bite === 0x1B) {
|
|||
|
// Set iso-2022-jp decoder state to escape start and return
|
|||
|
// continue.
|
|||
|
this.iso2022jp_decoder_state = this.states.EscapeStart
|
|||
|
return decoderError(this.fatal)
|
|||
|
}
|
|||
|
|
|||
|
// 0x21 to 0x7E
|
|||
|
if (inRange(bite, 0x21, 0x7E)) {
|
|||
|
// 1. Set the iso-2022-jp decoder state to lead byte.
|
|||
|
this.iso2022jp_decoder_state = this.states.LeadByte
|
|||
|
|
|||
|
// 2. Let pointer be (iso-2022-jp lead − 0x21) × 94 + byte − 0x21.
|
|||
|
const pointer = (this.iso2022jp_lead - 0x21) * 94 + bite - 0x21
|
|||
|
|
|||
|
// 3. Let code point be the index code point for pointer in
|
|||
|
// index jis0208.
|
|||
|
const code_point = indexCodePointFor(pointer, index('jis0208'))
|
|||
|
|
|||
|
// 4. If code point is null, return error.
|
|||
|
if (code_point === null)
|
|||
|
return decoderError(this.fatal)
|
|||
|
|
|||
|
// 5. Return a code point whose value is code point.
|
|||
|
return code_point
|
|||
|
}
|
|||
|
|
|||
|
// end-of-stream
|
|||
|
if (bite === end_of_stream) {
|
|||
|
// Set the iso-2022-jp decoder state to lead byte, prepend
|
|||
|
// byte to stream, and return error.
|
|||
|
this.iso2022jp_decoder_state = this.states.LeadByte
|
|||
|
stream.prepend(bite)
|
|||
|
return decoderError(this.fatal)
|
|||
|
}
|
|||
|
|
|||
|
// Otherwise
|
|||
|
// Set iso-2022-jp decoder state to lead byte and return
|
|||
|
// error.
|
|||
|
this.iso2022jp_decoder_state = this.states.LeadByte
|
|||
|
return decoderError(this.fatal)
|
|||
|
|
|||
|
case this.states.EscapeStart:
|
|||
|
// Escape start
|
|||
|
|
|||
|
// 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to
|
|||
|
// byte, iso-2022-jp decoder state to escape, and return
|
|||
|
// continue.
|
|||
|
if (bite === 0x24 || bite === 0x28) {
|
|||
|
this.iso2022jp_lead = bite
|
|||
|
this.iso2022jp_decoder_state = this.states.Escape
|
|||
|
return null
|
|||
|
}
|
|||
|
|
|||
|
// 2. Prepend byte to stream.
|
|||
|
stream.prepend(bite)
|
|||
|
|
|||
|
// 3. Unset the iso-2022-jp output flag, set iso-2022-jp
|
|||
|
// decoder state to iso-2022-jp decoder output state, and
|
|||
|
// return error.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
this.iso2022jp_decoder_state = this.iso2022jp_decoder_output_state
|
|||
|
return decoderError(this.fatal)
|
|||
|
|
|||
|
case this.states.Escape: {
|
|||
|
// Escape
|
|||
|
|
|||
|
// 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to
|
|||
|
// 0x00.
|
|||
|
const lead = this.iso2022jp_lead
|
|||
|
this.iso2022jp_lead = 0x00
|
|||
|
|
|||
|
// 2. Let state be null.
|
|||
|
let state = null
|
|||
|
|
|||
|
// 3. If lead is 0x28 and byte is 0x42, set state to ASCII.
|
|||
|
if (lead === 0x28 && bite === 0x42)
|
|||
|
state = this.states.ASCII
|
|||
|
|
|||
|
// 4. If lead is 0x28 and byte is 0x4A, set state to Roman.
|
|||
|
if (lead === 0x28 && bite === 0x4A)
|
|||
|
state = this.states.Roman
|
|||
|
|
|||
|
// 5. If lead is 0x28 and byte is 0x49, set state to Katakana.
|
|||
|
if (lead === 0x28 && bite === 0x49)
|
|||
|
state = this.states.Katakana
|
|||
|
|
|||
|
// 6. If lead is 0x24 and byte is either 0x40 or 0x42, set
|
|||
|
// state to lead byte.
|
|||
|
if (lead === 0x24 && (bite === 0x40 || bite === 0x42))
|
|||
|
state = this.states.LeadByte
|
|||
|
|
|||
|
// 7. If state is non-null, run these substeps:
|
|||
|
if (state !== null) {
|
|||
|
// 1. Set iso-2022-jp decoder state and iso-2022-jp decoder
|
|||
|
// output state to this.states.
|
|||
|
this.iso2022jp_decoder_state = this.iso2022jp_decoder_state = state
|
|||
|
|
|||
|
// 2. Let output flag be the iso-2022-jp output flag.
|
|||
|
const output_flag = this.iso2022jp_output_flag
|
|||
|
|
|||
|
// 3. Set the iso-2022-jp output flag.
|
|||
|
this.iso2022jp_output_flag = true
|
|||
|
|
|||
|
// 4. Return continue, if output flag is unset, and error
|
|||
|
// otherwise.
|
|||
|
return !output_flag ? null : decoderError(this.fatal)
|
|||
|
}
|
|||
|
|
|||
|
// 8. Prepend lead and byte to stream.
|
|||
|
stream.prepend([lead, bite])
|
|||
|
|
|||
|
// 9. Unset the iso-2022-jp output flag, set iso-2022-jp
|
|||
|
// decoder state to iso-2022-jp decoder output state and
|
|||
|
// return error.
|
|||
|
this.iso2022jp_output_flag = false
|
|||
|
this.iso2022jp_decoder_state = this.iso2022jp_decoder_output_state
|
|||
|
return decoderError(this.fatal)
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
|
|||
|
// 13.2.2 iso-2022-jp encoder
|
|||
|
/**
|
|||
|
* @implements {Encoder}
|
|||
|
*/
|
|||
|
export class ISO2022JPEncoder {
|
|||
|
constructor() {
|
|||
|
// iso-2022-jp's encoder has an associated iso-2022-jp encoder
|
|||
|
// state which is one of ASCII, Roman, and jis0208 (initially
|
|||
|
// ASCII).
|
|||
|
/** @enum */
|
|||
|
this.states = {
|
|||
|
ASCII: 0,
|
|||
|
Roman: 1,
|
|||
|
jis0208: 2,
|
|||
|
}
|
|||
|
this.iso2022jp_state = this.states.ASCII
|
|||
|
}
|
|||
|
/**
|
|||
|
* @param {Stream} stream Input stream.
|
|||
|
* @param {number} code_point Next code point read from the stream.
|
|||
|
*/
|
|||
|
handler(stream, code_point) {
|
|||
|
// 1. If code point is end-of-stream and iso-2022-jp encoder
|
|||
|
// state is not ASCII, prepend code point to stream, set
|
|||
|
// iso-2022-jp encoder state to ASCII, and return three bytes
|
|||
|
// 0x1B 0x28 0x42.
|
|||
|
if (code_point === end_of_stream &&
|
|||
|
this.iso2022jp_state !== this.states.ASCII) {
|
|||
|
stream.prepend(code_point)
|
|||
|
this.iso2022jp_state = this.states.ASCII
|
|||
|
return [0x1B, 0x28, 0x42]
|
|||
|
}
|
|||
|
|
|||
|
// 2. If code point is end-of-stream and iso-2022-jp encoder
|
|||
|
// state is ASCII, return finished.
|
|||
|
if (code_point === end_of_stream && this.iso2022jp_state === this.states.ASCII)
|
|||
|
return finished
|
|||
|
|
|||
|
// 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
|
|||
|
// point is U+000E, U+000F, or U+001B, return error with U+FFFD.
|
|||
|
if ((this.iso2022jp_state === this.states.ASCII ||
|
|||
|
this.iso2022jp_state === this.states.Roman) &&
|
|||
|
(code_point === 0x000E || code_point === 0x000F ||
|
|||
|
code_point === 0x001B)) {
|
|||
|
return encoderError(0xFFFD)
|
|||
|
}
|
|||
|
|
|||
|
// 4. If iso-2022-jp encoder state is ASCII and code point is an
|
|||
|
// ASCII code point, return a byte whose value is code point.
|
|||
|
if (this.iso2022jp_state === this.states.ASCII &&
|
|||
|
isASCIICodePoint(code_point))
|
|||
|
return code_point
|
|||
|
|
|||
|
// 5. If iso-2022-jp encoder state is Roman and code point is an
|
|||
|
// ASCII code point, excluding U+005C and U+007E, or is U+00A5
|
|||
|
// or U+203E, run these substeps:
|
|||
|
if (this.iso2022jp_state === this.states.Roman &&
|
|||
|
((isASCIICodePoint(code_point) &&
|
|||
|
code_point !== 0x005C && code_point !== 0x007E) ||
|
|||
|
(code_point == 0x00A5 || code_point == 0x203E))) {
|
|||
|
// 1. If code point is an ASCII code point, return a byte
|
|||
|
// whose value is code point.
|
|||
|
if (isASCIICodePoint(code_point))
|
|||
|
return code_point
|
|||
|
|
|||
|
// 2. If code point is U+00A5, return byte 0x5C.
|
|||
|
if (code_point === 0x00A5)
|
|||
|
return 0x5C
|
|||
|
|
|||
|
// 3. If code point is U+203E, return byte 0x7E.
|
|||
|
if (code_point === 0x203E)
|
|||
|
return 0x7E
|
|||
|
}
|
|||
|
|
|||
|
// 6. If code point is an ASCII code point, and iso-2022-jp
|
|||
|
// encoder state is not ASCII, prepend code point to stream, set
|
|||
|
// iso-2022-jp encoder state to ASCII, and return three bytes
|
|||
|
// 0x1B 0x28 0x42.
|
|||
|
if (isASCIICodePoint(code_point) &&
|
|||
|
this.iso2022jp_state !== this.states.ASCII) {
|
|||
|
stream.prepend(code_point)
|
|||
|
this.iso2022jp_state = this.states.ASCII
|
|||
|
return [0x1B, 0x28, 0x42]
|
|||
|
}
|
|||
|
|
|||
|
// 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
|
|||
|
// encoder state is not Roman, prepend code point to stream, set
|
|||
|
// iso-2022-jp encoder state to Roman, and return three bytes
|
|||
|
// 0x1B 0x28 0x4A.
|
|||
|
if ((code_point === 0x00A5 || code_point === 0x203E) &&
|
|||
|
this.iso2022jp_state !== this.states.Roman) {
|
|||
|
stream.prepend(code_point)
|
|||
|
this.iso2022jp_state = this.states.Roman
|
|||
|
return [0x1B, 0x28, 0x4A]
|
|||
|
}
|
|||
|
|
|||
|
// 8. If code point is U+2212, set it to U+FF0D.
|
|||
|
if (code_point === 0x2212)
|
|||
|
code_point = 0xFF0D
|
|||
|
|
|||
|
// 9. Let pointer be the index pointer for code point in index
|
|||
|
// jis0208.
|
|||
|
const pointer = indexPointerFor(code_point, index('jis0208'))
|
|||
|
|
|||
|
// 10. If pointer is null, return error with code point.
|
|||
|
if (pointer === null)
|
|||
|
return encoderError(code_point)
|
|||
|
|
|||
|
// 11. If iso-2022-jp encoder state is not jis0208, prepend code
|
|||
|
// point to stream, set iso-2022-jp encoder state to jis0208,
|
|||
|
// and return three bytes 0x1B 0x24 0x42.
|
|||
|
if (this.iso2022jp_state !== this.states.jis0208) {
|
|||
|
stream.prepend(code_point)
|
|||
|
this.iso2022jp_state = this.states.jis0208
|
|||
|
return [0x1B, 0x24, 0x42]
|
|||
|
}
|
|||
|
|
|||
|
// 12. Let lead be floor(pointer / 94) + 0x21.
|
|||
|
const lead = floor(pointer / 94) + 0x21
|
|||
|
|
|||
|
// 13. Let trail be pointer % 94 + 0x21.
|
|||
|
const trail = pointer % 94 + 0x21
|
|||
|
|
|||
|
// 14. Return two bytes whose values are lead and trail.
|
|||
|
return [lead, trail]
|
|||
|
}
|
|||
|
}
|