6/18/ds/18js/lib/iso-2022-jp.js

441 lines
14 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { inRange, decoderError, encoderError, isASCIICodePoint,
end_of_stream, finished, floor } from './text_decoder_utils.js'
import index, { indexCodePointFor, indexPointerFor } from './text_decoder_indexes.js'
// 13.2 iso-2022-jp
// 13.2.1 iso-2022-jp decoder
/**
* @implements {Decoder}
*/
export class ISO2022JPDecoder {
constructor(options) {
const { fatal } = options
this.fatal = fatal
/** @enum */
this.states = {
ASCII: 0,
Roman: 1,
Katakana: 2,
LeadByte: 3,
TrailByte: 4,
EscapeStart: 5,
Escape: 6,
}
// iso-2022-jp's decoder has an associated iso-2022-jp decoder
// state (initially ASCII), iso-2022-jp decoder output state
// (initially ASCII), iso-2022-jp lead (initially 0x00), and
// iso-2022-jp output flag (initially unset).
this.iso2022jp_decoder_state = this.states.ASCII
this.iso2022jp_decoder_output_state = this.states.ASCII,
this.iso2022jp_lead = 0x00
this.iso2022jp_output_flag = false
}
/**
* @param {Stream} stream The stream of bytes being decoded.
* @param {number} bite The next byte read from the stream.
*/
handler(stream, bite) {
// switching on iso-2022-jp decoder state:
switch (this.iso2022jp_decoder_state) {
default:
case this.states.ASCII:
// ASCII
// Based on byte:
// 0x1B
if (bite === 0x1B) {
// Set iso-2022-jp decoder state to escape start and return
// continue.
this.iso2022jp_decoder_state = this.states.EscapeStart
return null
}
// 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B
if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E
&& bite !== 0x0F && bite !== 0x1B) {
// Unset the iso-2022-jp output flag and return a code point
// whose value is byte.
this.iso2022jp_output_flag = false
return bite
}
// end-of-stream
if (bite === end_of_stream) {
// Return finished.
return finished
}
// Otherwise
// Unset the iso-2022-jp output flag and return error.
this.iso2022jp_output_flag = false
return decoderError(this.fatal)
case this.states.Roman:
// Roman
// Based on byte:
// 0x1B
if (bite === 0x1B) {
// Set iso-2022-jp decoder state to escape start and return
// continue.
this.iso2022jp_decoder_state = this.states.EscapeStart
return null
}
// 0x5C
if (bite === 0x5C) {
// Unset the iso-2022-jp output flag and return code point
// U+00A5.
this.iso2022jp_output_flag = false
return 0x00A5
}
// 0x7E
if (bite === 0x7E) {
// Unset the iso-2022-jp output flag and return code point
// U+203E.
this.iso2022jp_output_flag = false
return 0x203E
}
// 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E
if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F
&& bite !== 0x1B && bite !== 0x5C && bite !== 0x7E) {
// Unset the iso-2022-jp output flag and return a code point
// whose value is byte.
this.iso2022jp_output_flag = false
return bite
}
// end-of-stream
if (bite === end_of_stream) {
// Return finished.
return finished
}
// Otherwise
// Unset the iso-2022-jp output flag and return error.
this.iso2022jp_output_flag = false
return decoderError(this.fatal)
case this.states.Katakana:
// Katakana
// Based on byte:
// 0x1B
if (bite === 0x1B) {
// Set iso-2022-jp decoder state to escape start and return
// continue.
this.iso2022jp_decoder_state = this.states.EscapeStart
return null
}
// 0x21 to 0x5F
if (inRange(bite, 0x21, 0x5F)) {
// Unset the iso-2022-jp output flag and return a code point
// whose value is 0xFF61 0x21 + byte.
this.iso2022jp_output_flag = false
return 0xFF61 - 0x21 + bite
}
// end-of-stream
if (bite === end_of_stream) {
// Return finished.
return finished
}
// Otherwise
// Unset the iso-2022-jp output flag and return error.
this.iso2022jp_output_flag = false
return decoderError(this.fatal)
case this.states.LeadByte:
// Lead byte
// Based on byte:
// 0x1B
if (bite === 0x1B) {
// Set iso-2022-jp decoder state to escape start and return
// continue.
this.iso2022jp_decoder_state = this.states.EscapeStart
return null
}
// 0x21 to 0x7E
if (inRange(bite, 0x21, 0x7E)) {
// Unset the iso-2022-jp output flag, set iso-2022-jp lead
// to byte, iso-2022-jp decoder state to trail byte, and
// return continue.
this.iso2022jp_output_flag = false
this.iso2022jp_lead = bite
this.iso2022jp_decoder_state = this.states.TrailByte
return null
}
// end-of-stream
if (bite === end_of_stream) {
// Return finished.
return finished
}
// Otherwise
// Unset the iso-2022-jp output flag and return error.
this.iso2022jp_output_flag = false
return decoderError(this.fatal)
case this.states.TrailByte:
// Trail byte
// Based on byte:
// 0x1B
if (bite === 0x1B) {
// Set iso-2022-jp decoder state to escape start and return
// continue.
this.iso2022jp_decoder_state = this.states.EscapeStart
return decoderError(this.fatal)
}
// 0x21 to 0x7E
if (inRange(bite, 0x21, 0x7E)) {
// 1. Set the iso-2022-jp decoder state to lead byte.
this.iso2022jp_decoder_state = this.states.LeadByte
// 2. Let pointer be (iso-2022-jp lead 0x21) × 94 + byte 0x21.
const pointer = (this.iso2022jp_lead - 0x21) * 94 + bite - 0x21
// 3. Let code point be the index code point for pointer in
// index jis0208.
const code_point = indexCodePointFor(pointer, index('jis0208'))
// 4. If code point is null, return error.
if (code_point === null)
return decoderError(this.fatal)
// 5. Return a code point whose value is code point.
return code_point
}
// end-of-stream
if (bite === end_of_stream) {
// Set the iso-2022-jp decoder state to lead byte, prepend
// byte to stream, and return error.
this.iso2022jp_decoder_state = this.states.LeadByte
stream.prepend(bite)
return decoderError(this.fatal)
}
// Otherwise
// Set iso-2022-jp decoder state to lead byte and return
// error.
this.iso2022jp_decoder_state = this.states.LeadByte
return decoderError(this.fatal)
case this.states.EscapeStart:
// Escape start
// 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to
// byte, iso-2022-jp decoder state to escape, and return
// continue.
if (bite === 0x24 || bite === 0x28) {
this.iso2022jp_lead = bite
this.iso2022jp_decoder_state = this.states.Escape
return null
}
// 2. Prepend byte to stream.
stream.prepend(bite)
// 3. Unset the iso-2022-jp output flag, set iso-2022-jp
// decoder state to iso-2022-jp decoder output state, and
// return error.
this.iso2022jp_output_flag = false
this.iso2022jp_decoder_state = this.iso2022jp_decoder_output_state
return decoderError(this.fatal)
case this.states.Escape: {
// Escape
// 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to
// 0x00.
const lead = this.iso2022jp_lead
this.iso2022jp_lead = 0x00
// 2. Let state be null.
let state = null
// 3. If lead is 0x28 and byte is 0x42, set state to ASCII.
if (lead === 0x28 && bite === 0x42)
state = this.states.ASCII
// 4. If lead is 0x28 and byte is 0x4A, set state to Roman.
if (lead === 0x28 && bite === 0x4A)
state = this.states.Roman
// 5. If lead is 0x28 and byte is 0x49, set state to Katakana.
if (lead === 0x28 && bite === 0x49)
state = this.states.Katakana
// 6. If lead is 0x24 and byte is either 0x40 or 0x42, set
// state to lead byte.
if (lead === 0x24 && (bite === 0x40 || bite === 0x42))
state = this.states.LeadByte
// 7. If state is non-null, run these substeps:
if (state !== null) {
// 1. Set iso-2022-jp decoder state and iso-2022-jp decoder
// output state to this.states.
this.iso2022jp_decoder_state = this.iso2022jp_decoder_state = state
// 2. Let output flag be the iso-2022-jp output flag.
const output_flag = this.iso2022jp_output_flag
// 3. Set the iso-2022-jp output flag.
this.iso2022jp_output_flag = true
// 4. Return continue, if output flag is unset, and error
// otherwise.
return !output_flag ? null : decoderError(this.fatal)
}
// 8. Prepend lead and byte to stream.
stream.prepend([lead, bite])
// 9. Unset the iso-2022-jp output flag, set iso-2022-jp
// decoder state to iso-2022-jp decoder output state and
// return error.
this.iso2022jp_output_flag = false
this.iso2022jp_decoder_state = this.iso2022jp_decoder_output_state
return decoderError(this.fatal)
}
}
}
}
// 13.2.2 iso-2022-jp encoder
/**
* @implements {Encoder}
*/
export class ISO2022JPEncoder {
constructor() {
// iso-2022-jp's encoder has an associated iso-2022-jp encoder
// state which is one of ASCII, Roman, and jis0208 (initially
// ASCII).
/** @enum */
this.states = {
ASCII: 0,
Roman: 1,
jis0208: 2,
}
this.iso2022jp_state = this.states.ASCII
}
/**
* @param {Stream} stream Input stream.
* @param {number} code_point Next code point read from the stream.
*/
handler(stream, code_point) {
// 1. If code point is end-of-stream and iso-2022-jp encoder
// state is not ASCII, prepend code point to stream, set
// iso-2022-jp encoder state to ASCII, and return three bytes
// 0x1B 0x28 0x42.
if (code_point === end_of_stream &&
this.iso2022jp_state !== this.states.ASCII) {
stream.prepend(code_point)
this.iso2022jp_state = this.states.ASCII
return [0x1B, 0x28, 0x42]
}
// 2. If code point is end-of-stream and iso-2022-jp encoder
// state is ASCII, return finished.
if (code_point === end_of_stream && this.iso2022jp_state === this.states.ASCII)
return finished
// 3. If ISO-2022-JP encoder state is ASCII or Roman, and code
// point is U+000E, U+000F, or U+001B, return error with U+FFFD.
if ((this.iso2022jp_state === this.states.ASCII ||
this.iso2022jp_state === this.states.Roman) &&
(code_point === 0x000E || code_point === 0x000F ||
code_point === 0x001B)) {
return encoderError(0xFFFD)
}
// 4. If iso-2022-jp encoder state is ASCII and code point is an
// ASCII code point, return a byte whose value is code point.
if (this.iso2022jp_state === this.states.ASCII &&
isASCIICodePoint(code_point))
return code_point
// 5. If iso-2022-jp encoder state is Roman and code point is an
// ASCII code point, excluding U+005C and U+007E, or is U+00A5
// or U+203E, run these substeps:
if (this.iso2022jp_state === this.states.Roman &&
((isASCIICodePoint(code_point) &&
code_point !== 0x005C && code_point !== 0x007E) ||
(code_point == 0x00A5 || code_point == 0x203E))) {
// 1. If code point is an ASCII code point, return a byte
// whose value is code point.
if (isASCIICodePoint(code_point))
return code_point
// 2. If code point is U+00A5, return byte 0x5C.
if (code_point === 0x00A5)
return 0x5C
// 3. If code point is U+203E, return byte 0x7E.
if (code_point === 0x203E)
return 0x7E
}
// 6. If code point is an ASCII code point, and iso-2022-jp
// encoder state is not ASCII, prepend code point to stream, set
// iso-2022-jp encoder state to ASCII, and return three bytes
// 0x1B 0x28 0x42.
if (isASCIICodePoint(code_point) &&
this.iso2022jp_state !== this.states.ASCII) {
stream.prepend(code_point)
this.iso2022jp_state = this.states.ASCII
return [0x1B, 0x28, 0x42]
}
// 7. If code point is either U+00A5 or U+203E, and iso-2022-jp
// encoder state is not Roman, prepend code point to stream, set
// iso-2022-jp encoder state to Roman, and return three bytes
// 0x1B 0x28 0x4A.
if ((code_point === 0x00A5 || code_point === 0x203E) &&
this.iso2022jp_state !== this.states.Roman) {
stream.prepend(code_point)
this.iso2022jp_state = this.states.Roman
return [0x1B, 0x28, 0x4A]
}
// 8. If code point is U+2212, set it to U+FF0D.
if (code_point === 0x2212)
code_point = 0xFF0D
// 9. Let pointer be the index pointer for code point in index
// jis0208.
const pointer = indexPointerFor(code_point, index('jis0208'))
// 10. If pointer is null, return error with code point.
if (pointer === null)
return encoderError(code_point)
// 11. If iso-2022-jp encoder state is not jis0208, prepend code
// point to stream, set iso-2022-jp encoder state to jis0208,
// and return three bytes 0x1B 0x24 0x42.
if (this.iso2022jp_state !== this.states.jis0208) {
stream.prepend(code_point)
this.iso2022jp_state = this.states.jis0208
return [0x1B, 0x24, 0x42]
}
// 12. Let lead be floor(pointer / 94) + 0x21.
const lead = floor(pointer / 94) + 0x21
// 13. Let trail be pointer % 94 + 0x21.
const trail = pointer % 94 + 0x21
// 14. Return two bytes whose values are lead and trail.
return [lead, trail]
}
}