// Copyright 2014 Joshua Bell. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // If we're in node require encoding-indexes and attach it to the global. /** * @fileoverview Global |this| required for resolving indexes in node. * @suppress {globalThis} */ if (typeof module !== "undefined" && module.exports) { this["encoding-indexes"] = require("./encoding-indexes.js")["encoding-indexes"]; } (function(global) { 'use strict'; // // Utilities // /** * @param {number} a The number to test. * @param {number} min The minimum value in the range, inclusive. * @param {number} max The maximum value in the range, inclusive. * @return {boolean} True if a >= min and a <= max. */ function inRange(a, min, max) { return min <= a && a <= max; } /** * @param {number} n The numerator. * @param {number} d The denominator. * @return {number} The result of the integer division of n by d. */ function div(n, d) { return Math.floor(n / d); } /** * @param {*} o * @return {Object} */ function ToDictionary(o) { if (o === undefined) return {}; if (o === Object(o)) return o; throw TypeError('Could not convert argument to dictionary'); } /** * @param {string} string Input string of UTF-16 code units. * @return {!Array.} Code points. */ function stringToCodePoints(string) { // http://heycam.github.io/webidl/#dfn-obtain-unicode // 1. Let S be the DOMString value. var s = String(string); // 2. Let n be the length of S. var n = s.length; // 3. Initialize i to 0. var i = 0; // 4. Initialize U to be an empty sequence of Unicode characters. var u = []; // 5. While i < n: while (i < n) { // 1. Let c be the code unit in S at index i. var c = s.charCodeAt(i); // 2. Depending on the value of c: // c < 0xD800 or c > 0xDFFF if (c < 0xD800 || c > 0xDFFF) { // Append to U the Unicode character with code point c. u.push(c); } // 0xDC00 ≤ c ≤ 0xDFFF else if (0xDC00 <= c && c <= 0xDFFF) { // Append to U a U+FFFD REPLACEMENT CHARACTER. u.push(0xFFFD); } // 0xD800 ≤ c ≤ 0xDBFF else if (0xD800 <= c && c <= 0xDBFF) { // 1. If i = n−1, then append to U a U+FFFD REPLACEMENT // CHARACTER. if (i === n - 1) { u.push(0xFFFD); } // 2. Otherwise, i < n−1: else { // 1. Let d be the code unit in S at index i+1. var d = string.charCodeAt(i + 1); // 2. If 0xDC00 ≤ d ≤ 0xDFFF, then: if (0xDC00 <= d && d <= 0xDFFF) { // 1. Let a be c & 0x3FF. var a = c & 0x3FF; // 2. Let b be d & 0x3FF. var b = d & 0x3FF; // 3. Append to U the Unicode character with code point // 2^16+2^10*a+b. u.push(0x10000 + (a << 10) + b); // 4. Set i to i+1. i += 1; } // 3. Otherwise, d < 0xDC00 or d > 0xDFFF. Append to U a // U+FFFD REPLACEMENT CHARACTER. else { u.push(0xFFFD); } } } // 3. Set i to i+1. i += 1; } // 6. Return U. return u; } /** * @param {!Array.} code_points Array of code points. * @return {string} string String of UTF-16 code units. */ function codePointsToString(code_points) { var s = ''; for (var i = 0; i < code_points.length; ++i) { var cp = code_points[i]; if (cp <= 0xFFFF) { s += String.fromCharCode(cp); } else { cp -= 0x10000; s += String.fromCharCode((cp >> 10) + 0xD800, (cp & 0x3FF) + 0xDC00); } } return s; } // // Implementation of Encoding specification // http://dvcs.w3.org/hg/encoding/raw-file/tip/Overview.html // // // 3. Terminology // /** * End-of-stream is a special token that signifies no more tokens * are in the stream. * @const */ var end_of_stream = -1; /** * A stream represents an ordered sequence of tokens. * * @constructor * @param {!(Array.|Uint8Array)} tokens Array of tokens that provide the * stream. */ function Stream(tokens) { /** @type {!Array.} */ this.tokens = [].slice.call(tokens); } Stream.prototype = { /** * @return {boolean} True if end-of-stream has been hit. */ endOfStream: function() { return !this.tokens.length; }, /** * When a token is read from a stream, the first token in the * stream must be returned and subsequently removed, and * end-of-stream must be returned otherwise. * * @return {number} Get the next token from the stream, or * end_of_stream. */ read: function() { if (!this.tokens.length) return end_of_stream; return this.tokens.shift(); }, /** * When one or more tokens are prepended to a stream, those tokens * must be inserted, in given order, before the first token in the * stream. * * @param {(number|!Array.)} token The token(s) to prepend to the stream. */ prepend: function(token) { if (Array.isArray(token)) { var tokens = /**@type {!Array.}*/(token); while (tokens.length) this.tokens.unshift(tokens.pop()); } else { this.tokens.unshift(token); } }, /** * When one or more tokens are pushed to a stream, those tokens * must be inserted, in given order, after the last token in the * stream. * * @param {(number|!Array.)} token The tokens(s) to prepend to the stream. */ push: function(token) { if (Array.isArray(token)) { var tokens = /**@type {!Array.}*/(token); while (tokens.length) this.tokens.push(tokens.shift()); } else { this.tokens.push(token); } } }; // // 4. Encodings // // 4.1 Encoders and decoders /** @const */ var finished = -1; /** * @param {boolean} fatal If true, decoding errors raise an exception. * @param {number=} opt_code_point Override the standard fallback code point. * @return {number} The code point to insert on a decoding error. */ function decoderError(fatal, opt_code_point) { if (fatal) throw TypeError('Decoder error'); return opt_code_point || 0xFFFD; } /** * @param {number} code_point The code point that could not be encoded. * @return {number} Always throws, no value is actually returned. */ function encoderError(code_point) { throw TypeError('The code point ' + code_point + ' could not be encoded.'); } /** @interface */ function Decoder() {} Decoder.prototype = { /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point, or |finished|. */ handler: function(stream, bite) {} }; /** @interface */ function Encoder() {} Encoder.prototype = { /** * @param {Stream} stream The stream of code points being encoded. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit, or |finished|. */ handler: function(stream, code_point) {} }; // 4.2 Names and labels // TODO: Define @typedef for Encoding: {name:string,labels:Array.} // https://github.com/google/closure-compiler/issues/247 /** * @param {string} label The encoding label. * @return {?{name:string,labels:Array.}} */ function getEncoding(label) { // 1. Remove any leading and trailing ASCII whitespace from label. label = String(label).trim().toLowerCase(); // 2. If label is an ASCII case-insensitive match for any of the // labels listed in the table below, return the corresponding // encoding, and failure otherwise. if (Object.prototype.hasOwnProperty.call(label_to_encoding, label)) { return label_to_encoding[label]; } return null; } /** * Encodings table: http://encoding.spec.whatwg.org/encodings.json * @const * @type {!Array.<{ * heading: string, * encodings: Array.<{name:string,labels:Array.}> * }>} */ var encodings = [ { "encodings": [ { "labels": [ "unicode-1-1-utf-8", "utf-8", "utf8" ], "name": "utf-8" } ], "heading": "The Encoding" }, { "encodings": [ { "labels": [ "866", "cp866", "csibm866", "ibm866" ], "name": "ibm866" }, { "labels": [ "csisolatin2", "iso-8859-2", "iso-ir-101", "iso8859-2", "iso88592", "iso_8859-2", "iso_8859-2:1987", "l2", "latin2" ], "name": "iso-8859-2" }, { "labels": [ "csisolatin3", "iso-8859-3", "iso-ir-109", "iso8859-3", "iso88593", "iso_8859-3", "iso_8859-3:1988", "l3", "latin3" ], "name": "iso-8859-3" }, { "labels": [ "csisolatin4", "iso-8859-4", "iso-ir-110", "iso8859-4", "iso88594", "iso_8859-4", "iso_8859-4:1988", "l4", "latin4" ], "name": "iso-8859-4" }, { "labels": [ "csisolatincyrillic", "cyrillic", "iso-8859-5", "iso-ir-144", "iso8859-5", "iso88595", "iso_8859-5", "iso_8859-5:1988" ], "name": "iso-8859-5" }, { "labels": [ "arabic", "asmo-708", "csiso88596e", "csiso88596i", "csisolatinarabic", "ecma-114", "iso-8859-6", "iso-8859-6-e", "iso-8859-6-i", "iso-ir-127", "iso8859-6", "iso88596", "iso_8859-6", "iso_8859-6:1987" ], "name": "iso-8859-6" }, { "labels": [ "csisolatingreek", "ecma-118", "elot_928", "greek", "greek8", "iso-8859-7", "iso-ir-126", "iso8859-7", "iso88597", "iso_8859-7", "iso_8859-7:1987", "sun_eu_greek" ], "name": "iso-8859-7" }, { "labels": [ "csiso88598e", "csisolatinhebrew", "hebrew", "iso-8859-8", "iso-8859-8-e", "iso-ir-138", "iso8859-8", "iso88598", "iso_8859-8", "iso_8859-8:1988", "visual" ], "name": "iso-8859-8" }, { "labels": [ "csiso88598i", "iso-8859-8-i", "logical" ], "name": "iso-8859-8-i" }, { "labels": [ "csisolatin6", "iso-8859-10", "iso-ir-157", "iso8859-10", "iso885910", "l6", "latin6" ], "name": "iso-8859-10" }, { "labels": [ "iso-8859-13", "iso8859-13", "iso885913" ], "name": "iso-8859-13" }, { "labels": [ "iso-8859-14", "iso8859-14", "iso885914" ], "name": "iso-8859-14" }, { "labels": [ "csisolatin9", "iso-8859-15", "iso8859-15", "iso885915", "iso_8859-15", "l9" ], "name": "iso-8859-15" }, { "labels": [ "iso-8859-16" ], "name": "iso-8859-16" }, { "labels": [ "cskoi8r", "koi", "koi8", "koi8-r", "koi8_r" ], "name": "koi8-r" }, { "labels": [ "koi8-u" ], "name": "koi8-u" }, { "labels": [ "csmacintosh", "mac", "macintosh", "x-mac-roman" ], "name": "macintosh" }, { "labels": [ "dos-874", "iso-8859-11", "iso8859-11", "iso885911", "tis-620", "windows-874" ], "name": "windows-874" }, { "labels": [ "cp1250", "windows-1250", "x-cp1250" ], "name": "windows-1250" }, { "labels": [ "cp1251", "windows-1251", "x-cp1251" ], "name": "windows-1251" }, { "labels": [ "ansi_x3.4-1968", "ascii", "cp1252", "cp819", "csisolatin1", "ibm819", "iso-8859-1", "iso-ir-100", "iso8859-1", "iso88591", "iso_8859-1", "iso_8859-1:1987", "l1", "latin1", "us-ascii", "windows-1252", "x-cp1252" ], "name": "windows-1252" }, { "labels": [ "cp1253", "windows-1253", "x-cp1253" ], "name": "windows-1253" }, { "labels": [ "cp1254", "csisolatin5", "iso-8859-9", "iso-ir-148", "iso8859-9", "iso88599", "iso_8859-9", "iso_8859-9:1989", "l5", "latin5", "windows-1254", "x-cp1254" ], "name": "windows-1254" }, { "labels": [ "cp1255", "windows-1255", "x-cp1255" ], "name": "windows-1255" }, { "labels": [ "cp1256", "windows-1256", "x-cp1256" ], "name": "windows-1256" }, { "labels": [ "cp1257", "windows-1257", "x-cp1257" ], "name": "windows-1257" }, { "labels": [ "cp1258", "windows-1258", "x-cp1258" ], "name": "windows-1258" }, { "labels": [ "x-mac-cyrillic", "x-mac-ukrainian" ], "name": "x-mac-cyrillic" } ], "heading": "Legacy single-byte encodings" }, { "encodings": [ { "labels": [ "chinese", "csgb2312", "csiso58gb231280", "gb2312", "gb_2312", "gb_2312-80", "gbk", "iso-ir-58", "x-gbk" ], "name": "gbk" }, { "labels": [ "gb18030" ], "name": "gb18030" } ], "heading": "Legacy multi-byte Chinese (simplified) encodings" }, { "encodings": [ { "labels": [ "big5", "big5-hkscs", "cn-big5", "csbig5", "x-x-big5" ], "name": "big5" } ], "heading": "Legacy multi-byte Chinese (traditional) encodings" }, { "encodings": [ { "labels": [ "cseucpkdfmtjapanese", "euc-jp", "x-euc-jp" ], "name": "euc-jp" }, { "labels": [ "csiso2022jp", "iso-2022-jp" ], "name": "iso-2022-jp" }, { "labels": [ "csshiftjis", "ms_kanji", "shift-jis", "shift_jis", "sjis", "windows-31j", "x-sjis" ], "name": "shift_jis" } ], "heading": "Legacy multi-byte Japanese encodings" }, { "encodings": [ { "labels": [ "cseuckr", "csksc56011987", "euc-kr", "iso-ir-149", "korean", "ks_c_5601-1987", "ks_c_5601-1989", "ksc5601", "ksc_5601", "windows-949" ], "name": "euc-kr" } ], "heading": "Legacy multi-byte Korean encodings" }, { "encodings": [ { "labels": [ "csiso2022kr", "hz-gb-2312", "iso-2022-cn", "iso-2022-cn-ext", "iso-2022-kr" ], "name": "replacement" }, { "labels": [ "utf-16be" ], "name": "utf-16be" }, { "labels": [ "utf-16", "utf-16le" ], "name": "utf-16le" }, { "labels": [ "x-user-defined" ], "name": "x-user-defined" } ], "heading": "Legacy miscellaneous encodings" } ]; // Label to encoding registry. /** @type {Object.}>} */ var label_to_encoding = {}; encodings.forEach(function(category) { category.encodings.forEach(function(encoding) { encoding.labels.forEach(function(label) { label_to_encoding[label] = encoding; }); }); }); // Registry of of encoder/decoder factories, by encoding name. /** @type {Object.} */ var encoders = {}; /** @type {Object.} */ var decoders = {}; // // 5. Indexes // /** * @param {number} pointer The |pointer| to search for. * @param {(!Array.|undefined)} index The |index| to search within. * @return {?number} The code point corresponding to |pointer| in |index|, * or null if |code point| is not in |index|. */ function indexCodePointFor(pointer, index) { if (!index) return null; return index[pointer] || null; } /** * @param {number} code_point The |code point| to search for. * @param {!Array.} index The |index| to search within. * @return {?number} The first pointer corresponding to |code point| in * |index|, or null if |code point| is not in |index|. */ function indexPointerFor(code_point, index) { var pointer = index.indexOf(code_point); return pointer === -1 ? null : pointer; } /** * @param {string} name Name of the index. * @return {(!Array.|!Array.>)} * */ function index(name) { if (!('encoding-indexes' in global)) { throw Error("Indexes missing." + " Did you forget to include encoding-indexes.js?"); } return global['encoding-indexes'][name]; } /** * @param {number} pointer The |pointer| to search for in the gb18030 index. * @return {?number} The code point corresponding to |pointer| in |index|, * or null if |code point| is not in the gb18030 index. */ function indexGB18030RangesCodePointFor(pointer) { // 1. If pointer is greater than 39419 and less than 189000, or // pointer is greater than 1237575, return null. if ((pointer > 39419 && pointer < 189000) || (pointer > 1237575)) return null; // 2. Let offset be the last pointer in index gb18030 ranges that // is equal to or less than pointer and let code point offset be // its corresponding code point. var offset = 0; var code_point_offset = 0; var idx = index('gb18030'); var i; for (i = 0; i < idx.length; ++i) { /** @type {!Array.} */ var entry = idx[i]; if (entry[0] <= pointer) { offset = entry[0]; code_point_offset = entry[1]; } else { break; } } // 3. Return a code point whose value is code point offset + // pointer − offset. return code_point_offset + pointer - offset; } /** * @param {number} code_point The |code point| to locate in the gb18030 index. * @return {number} The first pointer corresponding to |code point| in the * gb18030 index. */ function indexGB18030RangesPointerFor(code_point) { // 1. Let offset be the last code point in index gb18030 ranges // that is equal to or less than code point and let pointer offset // be its corresponding pointer. var offset = 0; var pointer_offset = 0; var idx = index('gb18030'); var i; for (i = 0; i < idx.length; ++i) { /** @type {!Array.} */ var entry = idx[i]; if (entry[1] <= code_point) { offset = entry[1]; pointer_offset = entry[0]; } else { break; } } // 2. Return a pointer whose value is pointer offset + code point // − offset. return pointer_offset + code_point - offset; } /** * @param {number} code_point The |code_point| to search for in the shift_jis index. * @return {?number} The code point corresponding to |pointer| in |index|, * or null if |code point| is not in the shift_jis index. */ function indexShiftJISPointerFor(code_point) { // 1. Let index be index jis0208 excluding all pointers in the // range 8272 to 8835. var pointer = indexPointerFor(code_point, index('jis0208')); if (pointer === null || inRange(pointer, 8272, 8835)) return null; // 2. Return the index pointer for code point in index. return pointer; } // // 7. API // /** @const */ var DEFAULT_ENCODING = 'utf-8'; // 7.1 Interface TextDecoder /** * @constructor * @param {string=} encoding The label of the encoding; * defaults to 'utf-8'. * @param {Object=} options */ function TextDecoder(encoding, options) { if (!(this instanceof TextDecoder)) { return new TextDecoder(encoding, options); } encoding = encoding !== undefined ? String(encoding) : DEFAULT_ENCODING; options = ToDictionary(options); /** @private */ this._encoding = getEncoding(encoding); if (this._encoding === null || this._encoding.name === 'replacement') throw RangeError('Unknown encoding: ' + encoding); if (!decoders[this._encoding.name]) { throw Error('Decoder not present.' + ' Did you forget to include encoding-indexes.js?'); } /** @private @type {boolean} */ this._streaming = false; /** @private @type {boolean} */ this._BOMseen = false; /** @private @type {?Decoder} */ this._decoder = null; /** @private @type {boolean} */ this._fatal = Boolean(options['fatal']); /** @private @type {boolean} */ this._ignoreBOM = Boolean(options['ignoreBOM']); if (Object.defineProperty) { Object.defineProperty(this, 'encoding', {value: this._encoding.name}); Object.defineProperty(this, 'fatal', {value: this._fatal}); Object.defineProperty(this, 'ignoreBOM', {value: this._ignoreBOM}); } else { this.encoding = this._encoding.name; this.fatal = this._fatal; this.ignoreBOM = this._ignoreBOM; } return this; } TextDecoder.prototype = { /** * @param {ArrayBufferView=} input The buffer of bytes to decode. * @param {Object=} options * @return {string} The decoded string. */ decode: function decode(input, options) { var bytes; if (typeof input === 'object' && input instanceof ArrayBuffer) { bytes = new Uint8Array(input); } else if (typeof input === 'object' && 'buffer' in input && input.buffer instanceof ArrayBuffer) { bytes = new Uint8Array(input.buffer, input.byteOffset, input.byteLength); } else { bytes = new Uint8Array(0); } options = ToDictionary(options); if (!this._streaming) { this._decoder = decoders[this._encoding.name]({fatal: this._fatal}); this._BOMseen = false; } this._streaming = Boolean(options['stream']); var input_stream = new Stream(bytes); var code_points = []; /** @type {?(number|!Array.)} */ var result; while (!input_stream.endOfStream()) { result = this._decoder.handler(input_stream, input_stream.read()); if (result === finished) break; if (result === null) continue; if (Array.isArray(result)) code_points.push.apply(code_points, /**@type {!Array.}*/(result)); else code_points.push(result); } if (!this._streaming) { do { result = this._decoder.handler(input_stream, input_stream.read()); if (result === finished) break; if (result === null) continue; if (Array.isArray(result)) code_points.push.apply(code_points, /**@type {!Array.}*/(result)); else code_points.push(result); } while (!input_stream.endOfStream()); this._decoder = null; } if (code_points.length) { // If encoding is one of utf-8, utf-16be, and utf-16le, and // ignore BOM flag and BOM seen flag are unset, run these // subsubsteps: if (['utf-8', 'utf-16le', 'utf-16be'].indexOf(this.encoding) !== -1 && !this._ignoreBOM && !this._BOMseen) { // If token is U+FEFF, set BOM seen flag. if (code_points[0] === 0xFEFF) { this._BOMseen = true; code_points.shift(); } else { // Otherwise, if token is not end-of-stream, set BOM seen // flag and append token to output. this._BOMseen = true; } } } return codePointsToString(code_points); } }; // 7.2 Interface TextEncoder /** * @constructor * @param {string=} encoding The label of the encoding; * defaults to 'utf-8'. * @param {Object=} options */ function TextEncoder(encoding, options) { if (!(this instanceof TextEncoder)) return new TextEncoder(encoding, options); encoding = encoding !== undefined ? String(encoding) : DEFAULT_ENCODING; options = ToDictionary(options); /** @private */ this._encoding = getEncoding(encoding); if (this._encoding === null || this._encoding.name === 'replacement') throw RangeError('Unknown encoding: ' + encoding); var allowLegacyEncoding = Boolean(options['NONSTANDARD_allowLegacyEncoding']); var isLegacyEncoding = (this._encoding.name !== 'utf-8' && this._encoding.name !== 'utf-16le' && this._encoding.name !== 'utf-16be'); if (this._encoding === null || (isLegacyEncoding && !allowLegacyEncoding)) throw RangeError('Unknown encoding: ' + encoding); if (!encoders[this._encoding.name]) { throw Error('Encoder not present.' + ' Did you forget to include encoding-indexes.js?'); } /** @private @type {boolean} */ this._streaming = false; /** @private @type {?Encoder} */ this._encoder = null; /** @private @type {{fatal: boolean}} */ this._options = {fatal: Boolean(options['fatal'])}; if (Object.defineProperty) Object.defineProperty(this, 'encoding', {value: this._encoding.name}); else this.encoding = this._encoding.name; return this; } TextEncoder.prototype = { /** * @param {string=} opt_string The string to encode. * @param {Object=} options * @return {Uint8Array} Encoded bytes, as a Uint8Array. */ encode: function encode(opt_string, options) { opt_string = opt_string ? String(opt_string) : ''; options = ToDictionary(options); // NOTE: This option is nonstandard. None of the encodings // permitted for encoding (i.e. UTF-8, UTF-16) are stateful, // so streaming is not necessary. if (!this._streaming) this._encoder = encoders[this._encoding.name](this._options); this._streaming = Boolean(options['stream']); var bytes = []; var input_stream = new Stream(stringToCodePoints(opt_string)); /** @type {?(number|!Array.)} */ var result; while (!input_stream.endOfStream()) { result = this._encoder.handler(input_stream, input_stream.read()); if (result === finished) break; if (Array.isArray(result)) bytes.push.apply(bytes, /**@type {!Array.}*/(result)); else bytes.push(result); } if (!this._streaming) { while (true) { result = this._encoder.handler(input_stream, input_stream.read()); if (result === finished) break; if (Array.isArray(result)) bytes.push.apply(bytes, /**@type {!Array.}*/(result)); else bytes.push(result); } this._encoder = null; } return new Uint8Array(bytes); } }; // // 8. The encoding // // 8.1 utf-8 /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ function UTF8Decoder(options) { var fatal = options.fatal; // utf-8's decoder's has an associated utf-8 code point, utf-8 // bytes seen, and utf-8 bytes needed (all initially 0), a utf-8 // lower boundary (initially 0x80), and a utf-8 upper boundary // (initially 0xBF). var /** @type {number} */ utf8_code_point = 0, /** @type {number} */ utf8_bytes_seen = 0, /** @type {number} */ utf8_bytes_needed = 0, /** @type {number} */ utf8_lower_boundary = 0x80, /** @type {number} */ utf8_upper_boundary = 0xBF; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // 1. If byte is end-of-stream and utf-8 bytes needed is not 0, // set utf-8 bytes needed to 0 and return error. if (bite === end_of_stream && utf8_bytes_needed !== 0) { utf8_bytes_needed = 0; return decoderError(fatal); } // 2. If byte is end-of-stream, return finished. if (bite === end_of_stream) return finished; // 3. If utf-8 bytes needed is 0, based on byte: if (utf8_bytes_needed === 0) { // 0x00 to 0x7F if (inRange(bite, 0x00, 0x7F)) { // Return a code point whose value is byte. return bite; } // 0xC2 to 0xDF if (inRange(bite, 0xC2, 0xDF)) { // Set utf-8 bytes needed to 1 and utf-8 code point to byte // − 0xC0. utf8_bytes_needed = 1; utf8_code_point = bite - 0xC0; } // 0xE0 to 0xEF else if (inRange(bite, 0xE0, 0xEF)) { // 1. If byte is 0xE0, set utf-8 lower boundary to 0xA0. if (bite === 0xE0) utf8_lower_boundary = 0xA0; // 2. If byte is 0xED, set utf-8 upper boundary to 0x9F. if (bite === 0xED) utf8_upper_boundary = 0x9F; // 3. Set utf-8 bytes needed to 2 and utf-8 code point to // byte − 0xE0. utf8_bytes_needed = 2; utf8_code_point = bite - 0xE0; } // 0xF0 to 0xF4 else if (inRange(bite, 0xF0, 0xF4)) { // 1. If byte is 0xF0, set utf-8 lower boundary to 0x90. if (bite === 0xF0) utf8_lower_boundary = 0x90; // 2. If byte is 0xF4, set utf-8 upper boundary to 0x8F. if (bite === 0xF4) utf8_upper_boundary = 0x8F; // 3. Set utf-8 bytes needed to 3 and utf-8 code point to // byte − 0xF0. utf8_bytes_needed = 3; utf8_code_point = bite - 0xF0; } // Otherwise else { // Return error. return decoderError(fatal); } // Then (byte is in the range 0xC2 to 0xF4) set utf-8 code // point to utf-8 code point << (6 × utf-8 bytes needed) and // return continue. utf8_code_point = utf8_code_point << (6 * utf8_bytes_needed); return null; } // 4. If byte is not in the range utf-8 lower boundary to utf-8 // upper boundary, run these substeps: if (!inRange(bite, utf8_lower_boundary, utf8_upper_boundary)) { // 1. Set utf-8 code point, utf-8 bytes needed, and utf-8 // bytes seen to 0, set utf-8 lower boundary to 0x80, and set // utf-8 upper boundary to 0xBF. utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0; utf8_lower_boundary = 0x80; utf8_upper_boundary = 0xBF; // 2. Prepend byte to stream. stream.prepend(bite); // 3. Return error. return decoderError(fatal); } // 5. Set utf-8 lower boundary to 0x80 and utf-8 upper boundary // to 0xBF. utf8_lower_boundary = 0x80; utf8_upper_boundary = 0xBF; // 6. Increase utf-8 bytes seen by one and set utf-8 code point // to utf-8 code point + (byte − 0x80) << (6 × (utf-8 bytes // needed − utf-8 bytes seen)). utf8_bytes_seen += 1; utf8_code_point += (bite - 0x80) << (6 * (utf8_bytes_needed - utf8_bytes_seen)); // 7. If utf-8 bytes seen is not equal to utf-8 bytes needed, // continue. if (utf8_bytes_seen !== utf8_bytes_needed) return null; // 8. Let code point be utf-8 code point. var code_point = utf8_code_point; // 9. Set utf-8 code point, utf-8 bytes needed, and utf-8 bytes // seen to 0. utf8_code_point = utf8_bytes_needed = utf8_bytes_seen = 0; // 10. Return a code point whose value is code point. return code_point; }; } /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ function UTF8Encoder(options) { var fatal = options.fatal; /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is in the range U+0000 to U+007F, return a // byte whose value is code point. if (inRange(code_point, 0x0000, 0x007f)) return code_point; // 3. Set count and offset based on the range code point is in: var count, offset; // U+0080 to U+07FF: 1 and 0xC0 if (inRange(code_point, 0x0080, 0x07FF)) { count = 1; offset = 0xC0; } // U+0800 to U+FFFF: 2 and 0xE0 else if (inRange(code_point, 0x0800, 0xFFFF)) { count = 2; offset = 0xE0; } // U+10000 to U+10FFFF: 3 and 0xF0 else if (inRange(code_point, 0x10000, 0x10FFFF)) { count = 3; offset = 0xF0; } // 4.Let bytes be a byte sequence whose first byte is (code // point >> (6 × count)) + offset. var bytes = [(code_point >> (6 * count)) + offset]; // 5. Run these substeps while count is greater than 0: while (count > 0) { // 1. Set temp to code point >> (6 × (count − 1)). var temp = code_point >> (6 * (count - 1)); // 2. Append to bytes 0x80 | (temp & 0x3F). bytes.push(0x80 | (temp & 0x3F)); // 3. Decrease count by one. count -= 1; } // 6. Return bytes bytes, in order. return bytes; }; } /** @param {{fatal: boolean}} options */ encoders['utf-8'] = function(options) { return new UTF8Encoder(options); }; /** @param {{fatal: boolean}} options */ decoders['utf-8'] = function(options) { return new UTF8Decoder(options); }; // // 9. Legacy single-byte encodings // // 9.1 single-byte decoder /** * @constructor * @implements {Decoder} * @param {!Array.} index The encoding index. * @param {{fatal: boolean}} options */ function SingleByteDecoder(index, options) { var fatal = options.fatal; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // 1. If byte is end-of-stream, return finished. if (bite === end_of_stream) return finished; // 2. If byte is in the range 0x00 to 0x7F, return a code point // whose value is byte. if (inRange(bite, 0x00, 0x7F)) return bite; // 3. Let code point be the index code point for byte − 0x80 in // index single-byte. var code_point = index[bite - 0x80]; // 4. If code point is null, return error. if (code_point === null) return decoderError(fatal); // 5. Return a code point whose value is code point. return code_point; }; } // 9.2 single-byte encoder /** * @constructor * @implements {Encoder} * @param {!Array.} index The encoding index. * @param {{fatal: boolean}} options */ function SingleByteEncoder(index, options) { var fatal = options.fatal; /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is in the range U+0000 to U+007F, return a // byte whose value is code point. if (inRange(code_point, 0x0000, 0x007F)) return code_point; // 3. Let pointer be the index pointer for code point in index // single-byte. var pointer = indexPointerFor(code_point, index); // 4. If pointer is null, return error with code point. if (pointer === null) encoderError(code_point); // 5. Return a byte whose value is pointer + 0x80. return pointer + 0x80; }; } (function() { if (!('encoding-indexes' in global)) return; encodings.forEach(function(category) { if (category.heading !== 'Legacy single-byte encodings') return; category.encodings.forEach(function(encoding) { var name = encoding.name; var idx = index(name); /** @param {{fatal: boolean}} options */ decoders[name] = function(options) { return new SingleByteDecoder(idx, options); }; /** @param {{fatal: boolean}} options */ encoders[name] = function(options) { return new SingleByteEncoder(idx, options); }; }); }); }()); // // 10. Legacy multi-byte Chinese (simplified) encodings // // 10.1 gbk // 10.1.1 gbk decoder // gbk's decoder is gb18030's decoder. /** @param {{fatal: boolean}} options */ decoders['gbk'] = function(options) { return new GB18030Decoder(options); }; // 10.1.2 gbk encoder // gbk's encoder is gb18030's encoder with its gbk flag set. /** @param {{fatal: boolean}} options */ encoders['gbk'] = function(options) { return new GB18030Encoder(options, true); }; // 10.2 gb18030 // 10.2.1 gb18030 decoder /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ function GB18030Decoder(options) { var fatal = options.fatal; // gb18030's decoder has an associated gb18030 first, gb18030 // second, and gb18030 third (all initially 0x00). var /** @type {number} */ gb18030_first = 0x00, /** @type {number} */ gb18030_second = 0x00, /** @type {number} */ gb18030_third = 0x00; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // 1. If byte is end-of-stream and gb18030 first, gb18030 // second, and gb18030 third are 0x00, return finished. if (bite === end_of_stream && gb18030_first === 0x00 && gb18030_second === 0x00 && gb18030_third === 0x00) { return finished; } // 2. If byte is end-of-stream, and gb18030 first, gb18030 // second, or gb18030 third is not 0x00, set gb18030 first, // gb18030 second, and gb18030 third to 0x00, and return error. if (bite === end_of_stream && (gb18030_first !== 0x00 || gb18030_second !== 0x00 || gb18030_third !== 0x00)) { gb18030_first = 0x00; gb18030_second = 0x00; gb18030_third = 0x00; decoderError(fatal); } var code_point; // 3. If gb18030 third is not 0x00, run these substeps: if (gb18030_third !== 0x00) { // 1. Let code point be null. code_point = null; // 2. If byte is in the range 0x30 to 0x39, set code point to // the index gb18030 ranges code point for (((gb18030 first − // 0x81) × 10 + gb18030 second − 0x30) × 126 + gb18030 third − // 0x81) × 10 + byte − 0x30. if (inRange(bite, 0x30, 0x39)) { code_point = indexGB18030RangesCodePointFor( (((gb18030_first - 0x81) * 10 + (gb18030_second - 0x30)) * 126 + (gb18030_third - 0x81)) * 10 + bite - 0x30); } // 3. Let buffer be a byte sequence consisting of gb18030 // second, gb18030 third, and byte, in order. var buffer = [gb18030_second, gb18030_third, bite]; // 4. Set gb18030 first, gb18030 second, and gb18030 third to // 0x00. gb18030_first = 0x00; gb18030_second = 0x00; gb18030_third = 0x00; // 5. If code point is null, prepend buffer to stream and // return error. if (code_point === null) { stream.prepend(buffer); return decoderError(fatal); } // 6. Return a code point whose value is code point. return code_point; } // 4. If gb18030 second is not 0x00, run these substeps: if (gb18030_second !== 0x00) { // 1. If byte is in the range 0x81 to 0xFE, set gb18030 third // to byte and return continue. if (inRange(bite, 0x81, 0xFE)) { gb18030_third = bite; return null; } // 2. Prepend gb18030 second followed by byte to stream, set // gb18030 first and gb18030 second to 0x00, and return error. stream.prepend([gb18030_second, bite]); gb18030_first = 0x00; gb18030_second = 0x00; return decoderError(fatal); } // 5. If gb18030 first is not 0x00, run these substeps: if (gb18030_first !== 0x00) { // 1. If byte is in the range 0x30 to 0x39, set gb18030 second // to byte and return continue. if (inRange(bite, 0x30, 0x39)) { gb18030_second = bite; return null; } // 2. Let lead be gb18030 first, let pointer be null, and set // gb18030 first to 0x00. var lead = gb18030_first; var pointer = null; gb18030_first = 0x00; // 3. Let offset be 0x40 if byte is less than 0x7F and 0x41 // otherwise. var offset = bite < 0x7F ? 0x40 : 0x41; // 4. If byte is in the range 0x40 to 0x7E or 0x80 to 0xFE, // set pointer to (lead − 0x81) × 190 + (byte − offset). if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFE)) pointer = (lead - 0x81) * 190 + (bite - offset); // 5. Let code point be null if pointer is null and the index // code point for pointer in index gb18030 otherwise. code_point = pointer === null ? null : indexCodePointFor(pointer, index('gb18030')); // 6. If pointer is null, prepend byte to stream. if (pointer === null) stream.prepend(bite); // 7. If code point is null, return error. if (code_point === null) return decoderError(fatal); // 8. Return a code point whose value is code point. return code_point; } // 6. If byte is in the range 0x00 to 0x7F, return a code point // whose value is byte. if (inRange(bite, 0x00, 0x7F)) return bite; // 7. If byte is 0x80, return code point U+20AC. if (bite === 0x80) return 0x20AC; // 8. If byte is in the range 0x81 to 0xFE, set gb18030 first to // byte and return continue. if (inRange(bite, 0x81, 0xFE)) { gb18030_first = bite; return null; } // 9. Return error. return decoderError(fatal); }; } // 10.2.2 gb18030 encoder /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options * @param {boolean=} gbk_flag */ function GB18030Encoder(options, gbk_flag) { var fatal = options.fatal; // gb18030's decoder has an associated gbk flag (initially unset). /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is in the range U+0000 to U+007F, return a // byte whose value is code point. if (inRange(code_point, 0x0000, 0x007F)) { return code_point; } // 3. If the gbk flag is set and code point is U+20AC, return // byte 0x80. if (gbk_flag && code_point === 0x20AC) return 0x80; // 4. Let pointer be the index pointer for code point in index // gb18030. var pointer = indexPointerFor(code_point, index('gb18030')); // 5. If pointer is not null, run these substeps: if (pointer !== null) { // 1. Let lead be pointer / 190 + 0x81. var lead = div(pointer, 190) + 0x81; // 2. Let trail be pointer % 190. var trail = pointer % 190; // 3. Let offset be 0x40 if trail is less than 0x3F and 0x41 otherwise. var offset = trail < 0x3F ? 0x40 : 0x41; // 4. Return two bytes whose values are lead and trail + offset. return [lead, trail + offset]; } // 6. If gbk flag is set, return error with code point. if (gbk_flag) return encoderError(code_point); // 7. Set pointer to the index gb18030 ranges pointer for code // point. pointer = indexGB18030RangesPointerFor(code_point); // 8. Let byte1 be pointer / 10 / 126 / 10. var byte1 = div(div(div(pointer, 10), 126), 10); // 9. Set pointer to pointer − byte1 × 10 × 126 × 10. pointer = pointer - byte1 * 10 * 126 * 10; // 10. Let byte2 be pointer / 10 / 126. var byte2 = div(div(pointer, 10), 126); // 11. Set pointer to pointer − byte2 × 10 × 126. pointer = pointer - byte2 * 10 * 126; // 12. Let byte3 be pointer / 10. var byte3 = div(pointer, 10); // 13. Let byte4 be pointer − byte3 × 10. var byte4 = pointer - byte3 * 10; // 14. Return four bytes whose values are byte1 + 0x81, byte2 + // 0x30, byte3 + 0x81, byte4 + 0x30. return [byte1 + 0x81, byte2 + 0x30, byte3 + 0x81, byte4 + 0x30]; }; } /** @param {{fatal: boolean}} options */ encoders['gb18030'] = function(options) { return new GB18030Encoder(options); }; /** @param {{fatal: boolean}} options */ decoders['gb18030'] = function(options) { return new GB18030Decoder(options); }; // // 11. Legacy multi-byte Chinese (traditional) encodings // // 11.1 big5 /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ function Big5Decoder(options) { var fatal = options.fatal; // big5's decoder has an associated big5 lead (initially 0x00). var /** @type {number} */ big5_lead = 0x00; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // 1. If byte is end-of-stream and big5 lead is not 0x00, set // big5 lead to 0x00 and return error. if (bite === end_of_stream && big5_lead !== 0x00) { big5_lead = 0x00; return decoderError(fatal); } // 2. If byte is end-of-stream and big5 lead is 0x00, return // finished. if (bite === end_of_stream && big5_lead === 0x00) return finished; // 3. If big5 lead is not 0x00, let lead be big5 lead, let // pointer be null, set big5 lead to 0x00, and then run these // substeps: if (big5_lead !== 0x00) { var lead = big5_lead; var pointer = null; big5_lead = 0x00; // 1. Let offset be 0x40 if byte is less than 0x7F and 0x62 // otherwise. var offset = bite < 0x7F ? 0x40 : 0x62; // 2. If byte is in the range 0x40 to 0x7E or 0xA1 to 0xFE, // set pointer to (lead − 0x81) × 157 + (byte − offset). if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0xA1, 0xFE)) pointer = (lead - 0x81) * 157 + (bite - offset); // 3. If there is a row in the table below whose first column // is pointer, return the two code points listed in its second // column // Pointer | Code points // --------+-------------- // 1133 | U+00CA U+0304 // 1135 | U+00CA U+030C // 1164 | U+00EA U+0304 // 1166 | U+00EA U+030C switch (pointer) { case 1133: return [0x00CA, 0x0304]; case 1135: return [0x00CA, 0x030C]; case 1164: return [0x00EA, 0x0304]; case 1166: return [0x00EA, 0x030C]; } // 4. Let code point be null if pointer is null and the index // code point for pointer in index big5 otherwise. var code_point = (pointer === null) ? null : indexCodePointFor(pointer, index('big5')); // 5. If pointer is null and byte is in the range 0x00 to // 0x7F, prepend byte to stream. if (pointer === null) stream.prepend(bite); // 6. If code point is null, return error. if (code_point === null) return decoderError(fatal); // 7. Return a code point whose value is code point. return code_point; } // 4. If byte is in the range 0x00 to 0x7F, return a code point // whose value is byte. if (inRange(bite, 0x00, 0x7F)) return bite; // 5. If byte is in the range 0x81 to 0xFE, set big5 lead to // byte and return continue. if (inRange(bite, 0x81, 0xFE)) { big5_lead = bite; return null; } // 6. Return error. return decoderError(fatal); }; } /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ function Big5Encoder(options) { var fatal = options.fatal; /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is in the range U+0000 to U+007F, return a // byte whose value is code point. if (inRange(code_point, 0x0000, 0x007F)) return code_point; // 3. Let pointer be the index pointer for code point in index // big5. var pointer = indexPointerFor(code_point, index('big5')); // 4. If pointer is null, return error with code point. if (pointer === null) return encoderError(code_point); // 5. Let lead be pointer / 157 + 0x81. var lead = div(pointer, 157) + 0x81; // 6. If lead is less than 0xA1, return error with code point. if (lead < 0xA1) return encoderError(code_point); // 7. Let trail be pointer % 157. var trail = pointer % 157; // 8. Let offset be 0x40 if trail is less than 0x3F and 0x62 // otherwise. var offset = trail < 0x3F ? 0x40 : 0x62; // Return two bytes whose values are lead and trail + offset. return [lead, trail + offset]; }; } /** @param {{fatal: boolean}} options */ encoders['big5'] = function(options) { return new Big5Encoder(options); }; /** @param {{fatal: boolean}} options */ decoders['big5'] = function(options) { return new Big5Decoder(options); }; // // 12. Legacy multi-byte Japanese encodings // // 12.1 euc-jp /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ function EUCJPDecoder(options) { var fatal = options.fatal; // euc-jp's decoder has an associated euc-jp jis0212 flag // (initially unset) and euc-jp lead (initially 0x00). var /** @type {boolean} */ eucjp_jis0212_flag = false, /** @type {number} */ eucjp_lead = 0x00; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // 1. If byte is end-of-stream and euc-jp lead is not 0x00, set // euc-jp lead to 0x00, and return error. if (bite === end_of_stream && eucjp_lead !== 0x00) { eucjp_lead = 0x00; return decoderError(fatal); } // 2. If byte is end-of-stream and euc-jp lead is 0x00, return // finished. if (bite === end_of_stream && eucjp_lead === 0x00) return finished; // 3. If euc-jp lead is 0x8E and byte is in the range 0xA1 to // 0xDF, set euc-jp lead to 0x00 and return a code point whose // value is 0xFF61 + byte − 0xA1. if (eucjp_lead === 0x8E && inRange(bite, 0xA1, 0xDF)) { eucjp_lead = 0x00; return 0xFF61 + bite - 0xA1; } // 4. If euc-jp lead is 0x8F and byte is in the range 0xA1 to // 0xFE, set the euc-jp jis0212 flag, set euc-jp lead to byte, // and return continue. if (eucjp_lead === 0x8F && inRange(bite, 0xA1, 0xFE)) { eucjp_jis0212_flag = true; eucjp_lead = bite; return null; } // 5. If euc-jp lead is not 0x00, let lead be euc-jp lead, set // euc-jp lead to 0x00, and run these substeps: if (eucjp_lead !== 0x00) { var lead = eucjp_lead; eucjp_lead = 0x00; // 1. Let code point be null. var code_point = null; // 2. If lead and byte are both in the range 0xA1 to 0xFE, set // code point to the index code point for (lead − 0xA1) × 94 + // byte − 0xA1 in index jis0208 if the euc-jp jis0212 flag is // unset and in index jis0212 otherwise. if (inRange(lead, 0xA1, 0xFE) && inRange(bite, 0xA1, 0xFE)) { code_point = indexCodePointFor( (lead - 0xA1) * 94 + (bite - 0xA1), index(!eucjp_jis0212_flag ? 'jis0208' : 'jis0212')); } // 3. Unset the euc-jp jis0212 flag. eucjp_jis0212_flag = false; // 4. If byte is not in the range 0xA1 to 0xFE, prepend byte // to stream. if (!inRange(bite, 0xA1, 0xFE)) stream.prepend(bite); // 5. If code point is null, return error. if (code_point === null) return decoderError(fatal); // 6. Return a code point whose value is code point. return code_point; } // 6. If byte is in the range 0x00 to 0x7F, return a code point // whose value is byte. if (inRange(bite, 0x00, 0x7F)) return bite; // 7. If byte is 0x8E, 0x8F, or in the range 0xA1 to 0xFE, set // euc-jp lead to byte and return continue. if (bite === 0x8E || bite === 0x8F || inRange(bite, 0xA1, 0xFE)) { eucjp_lead = bite; return null; } // 8. Return error. return decoderError(fatal); }; } /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ function EUCJPEncoder(options) { var fatal = options.fatal; /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is in the range U+0000 to U+007F, return a // byte whose value is code point. if (inRange(code_point, 0x0000, 0x007F)) return code_point; // 3. If code point is U+00A5, return byte 0x5C. if (code_point === 0x00A5) return 0x5C; // 4. If code point is U+203E, return byte 0x7E. if (code_point === 0x203E) return 0x7E; // 5. If code point is in the range U+FF61 to U+FF9F, return two // bytes whose values are 0x8E and code point − 0xFF61 + 0xA1. if (inRange(code_point, 0xFF61, 0xFF9F)) return [0x8E, code_point - 0xFF61 + 0xA1]; // 6. Let pointer be the index pointer for code point in index // jis0208. var pointer = indexPointerFor(code_point, index('jis0208')); // 7. If pointer is null, return error with code point. if (pointer === null) return encoderError(code_point); // 8. Let lead be pointer / 94 + 0xA1. var lead = div(pointer, 94) + 0xA1; // 9. Let trail be pointer % 94 + 0xA1. var trail = pointer % 94 + 0xA1; // 10. Return two bytes whose values are lead and trail. return [lead, trail]; }; } /** @param {{fatal: boolean}} options */ encoders['euc-jp'] = function(options) { return new EUCJPEncoder(options); }; /** @param {{fatal: boolean}} options */ decoders['euc-jp'] = function(options) { return new EUCJPDecoder(options); }; // 12.2 iso-2022-jp /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ function ISO2022JPDecoder(options) { var fatal = options.fatal; /** @enum */ var states = { ASCII: 0, Roman: 1, Katakana: 2, LeadByte: 3, TrailByte: 4, EscapeStart: 5, Escape: 6 }; // iso-2022-jp's decoder has an associated iso-2022-jp decoder // state (initially ASCII), iso-2022-jp decoder output state // (initially ASCII), iso-2022-jp lead (initially 0x00), and // iso-2022-jp output flag (initially unset). var /** @type {number} */ iso2022jp_decoder_state = states.ASCII, /** @type {number} */ iso2022jp_decoder_output_state = states.ASCII, /** @type {number} */ iso2022jp_lead = 0x00, /** @type {boolean} */ iso2022jp_output_flag = false; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // switching on iso-2022-jp decoder state: switch (iso2022jp_decoder_state) { default: case states.ASCII: // ASCII // Based on byte: // 0x1B if (bite === 0x1B) { // Set iso-2022-jp decoder state to escape start and return // continue. iso2022jp_decoder_state = states.EscapeStart; return null; } // 0x00 to 0x7F, excluding 0x0E, 0x0F, and 0x1B if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F && bite !== 0x1B) { // Unset the iso-2022-jp output flag and return a code point // whose value is byte. iso2022jp_output_flag = false; return bite; } // end-of-stream if (bite === end_of_stream) { // Return finished. return finished; } // Otherwise // Unset the iso-2022-jp output flag and return error. iso2022jp_output_flag = false; return decoderError(fatal); case states.Roman: // Roman // Based on byte: // 0x1B if (bite === 0x1B) { // Set iso-2022-jp decoder state to escape start and return // continue. iso2022jp_decoder_state = states.EscapeStart; return null; } // 0x5C if (bite === 0x5C) { // Unset the iso-2022-jp output flag and return code point // U+00A5. iso2022jp_output_flag = false; return 0x00A5; } // 0x7E if (bite === 0x7E) { // Unset the iso-2022-jp output flag and return code point // U+203E. iso2022jp_output_flag = false; return 0x203E; } // 0x00 to 0x7F, excluding 0x0E, 0x0F, 0x1B, 0x5C, and 0x7E if (inRange(bite, 0x00, 0x7F) && bite !== 0x0E && bite !== 0x0F && bite !== 0x1B && bite !== 0x5C && bite !== 0x7E) { // Unset the iso-2022-jp output flag and return a code point // whose value is byte. iso2022jp_output_flag = false; return bite; } // end-of-stream if (bite === end_of_stream) { // Return finished. return finished; } // Otherwise // Unset the iso-2022-jp output flag and return error. iso2022jp_output_flag = false; return decoderError(fatal); case states.Katakana: // Katakana // Based on byte: // 0x1B if (bite === 0x1B) { // Set iso-2022-jp decoder state to escape start and return // continue. iso2022jp_decoder_state = states.EscapeStart; return null; } // 0x21 to 0x5F if (inRange(bite, 0x21, 0x5F)) { // Unset the iso-2022-jp output flag and return a code point // whose value is 0xFF61 + byte − 0x21. iso2022jp_output_flag = false; return 0xFF61 + bite - 0x21; } // end-of-stream if (bite === end_of_stream) { // Return finished. return finished; } // Otherwise // Unset the iso-2022-jp output flag and return error. iso2022jp_output_flag = false; return decoderError(fatal); case states.LeadByte: // Lead byte // Based on byte: // 0x1B if (bite === 0x1B) { // Set iso-2022-jp decoder state to escape start and return // continue. iso2022jp_decoder_state = states.EscapeStart; return null; } // 0x21 to 0x7E if (inRange(bite, 0x21, 0x7E)) { // Unset the iso-2022-jp output flag, set iso-2022-jp lead // to byte, iso-2022-jp decoder state to trail byte, and // return continue. iso2022jp_output_flag = false; iso2022jp_lead = bite; iso2022jp_decoder_state = states.TrailByte; return null; } // end-of-stream if (bite === end_of_stream) { // Return finished. return finished; } // Otherwise // Unset the iso-2022-jp output flag and return error. iso2022jp_output_flag = false; return decoderError(fatal); case states.TrailByte: // Trail byte // Based on byte: // 0x1B if (bite === 0x1B) { // Set iso-2022-jp decoder state to escape start and return // continue. iso2022jp_decoder_state = states.EscapeStart; return decoderError(fatal); } // 0x21 to 0x7E if (inRange(bite, 0x21, 0x7E)) { // 1. Set the iso-2022-jp decoder state to lead byte. iso2022jp_decoder_state = states.LeadByte; // 2. Let pointer be (iso-2022-jp lead − 0x21) × 94 + byte − 0x21. var pointer = (iso2022jp_lead - 0x21) * 94 + bite - 0x21; // 3. Let code point be the index code point for pointer in index jis0208. var code_point = indexCodePointFor(pointer, index('jis0208')); // 4. If code point is null, return error. if (code_point === null) return decoderError(fatal); // 5. Return a code point whose value is code point. return code_point; } // end-of-stream if (bite === end_of_stream) { // Set the iso-2022-jp decoder state to lead byte, prepend // byte to stream, and return error. iso2022jp_decoder_state = states.LeadByte; stream.prepend(bite); return decoderError(fatal); } // Otherwise // Set iso-2022-jp decoder state to lead byte and return // error. iso2022jp_decoder_state = states.LeadByte; return decoderError(fatal); case states.EscapeStart: // Escape start // 1. If byte is either 0x24 or 0x28, set iso-2022-jp lead to // byte, iso-2022-jp decoder state to escape, and return // continue. if (bite === 0x24 || bite === 0x28) { iso2022jp_lead = bite; iso2022jp_decoder_state = states.Escape; return null; } // 2. Prepend byte to stream. stream.prepend(bite); // 3. Unset the iso-2022-jp output flag, set iso-2022-jp // decoder state to iso-2022-jp decoder output state, and // return error. iso2022jp_output_flag = false; iso2022jp_decoder_state = iso2022jp_decoder_output_state; return decoderError(fatal); case states.Escape: // Escape // 1. Let lead be iso-2022-jp lead and set iso-2022-jp lead to // 0x00. var lead = iso2022jp_lead; iso2022jp_lead = 0x00; // 2. Let state be null. var state = null; // 3. If lead is 0x28 and byte is 0x42, set state to ASCII. if (lead === 0x28 && bite === 0x42) state = states.ASCII; // 4. If lead is 0x28 and byte is 0x4A, set state to Roman. if (lead === 0x28 && bite === 0x4A) state = states.Roman; // 5. If lead is 0x28 and byte is 0x49, set state to Katakana. if (lead === 0x28 && bite === 0x49) state = states.Katakana; // 6. If lead is 0x24 and byte is either 0x40 or 0x42, set // state to lead byte. if (lead === 0x24 && (bite === 0x40 || bite === 0x42)) state = states.LeadByte; // 7. If state is non-null, run these substeps: if (state !== null) { // 1. Set iso-2022-jp decoder state and iso-2022-jp decoder // output state to states. iso2022jp_decoder_state = iso2022jp_decoder_state = state; // 2. Let output flag be the iso-2022-jp output flag. var output_flag = iso2022jp_output_flag; // 3. Set the iso-2022-jp output flag. iso2022jp_output_flag = true; // 4. Return continue, if output flag is unset, and error // otherwise. return !output_flag ? null : decoderError(fatal); } // 8. Prepend lead and byte to stream. stream.prepend([lead, bite]); // 9. Unset the iso-2022-jp output flag, set iso-2022-jp // decoder state to iso-2022-jp decoder output state and // return error. iso2022jp_output_flag = false; iso2022jp_decoder_state = iso2022jp_decoder_output_state; return decoderError(fatal); } }; } /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ function ISO2022JPEncoder(options) { var fatal = options.fatal; // iso-2022-jp's encoder has an associated iso-2022-jp encoder // state which is one of ASCII, Roman, and jis0208 (initially // ASCII). /** @enum */ var states = { ASCII: 0, Roman: 1, jis0208: 2 }; var /** @type {number} */ iso2022jp_state = states.ASCII; /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1. If code point is end-of-stream and iso-2022-jp encoder // state is not ASCII, prepend code point to stream, set // iso-2022-jp encoder state to ASCII, and return three bytes // 0x1B 0x28 0x42. if (code_point === end_of_stream && iso2022jp_state !== states.ASCII) { stream.prepend(code_point); return [0x1B, 0x28, 0x42]; } // 2. If code point is end-of-stream and iso-2022-jp encoder // state is ASCII, return finished. if (code_point === end_of_stream && iso2022jp_state === states.ASCII) return finished; // 3. If iso-2022-jp encoder state is ASCII and code point is in // the range U+0000 to U+007F, return a byte whose value is code // point. if (iso2022jp_state === states.ASCII && inRange(code_point, 0x0000, 0x007F)) return code_point; // 4. If iso-2022-jp encoder state is Roman and code point is in // the range U+0000 to U+007F, excluding U+005C and U+007E, or // is U+00A5 or U+203E, run these substeps: if (iso2022jp_state === states.Roman && inRange(code_point, 0x0000, 0x007F) && code_point !== 0x005C && code_point !== 0x007E) { // 1. If code point is in the range U+0000 to U+007F, return a // byte whose value is code point. if (inRange(code_point, 0x0000, 0x007F)) return code_point; // 2. If code point is U+00A5, return byte 0x5C. if (code_point === 0x00A5) return 0x5C; // 3. If code point is U+203E, return byte 0x7E. if (code_point === 0x203E) return 0x7E; } // 5. If code point is in the range U+0000 to U+007F, and // iso-2022-jp encoder state is not ASCII, prepend code point to // stream, set iso-2022-jp encoder state to ASCII, and return // three bytes 0x1B 0x28 0x42. if (inRange(code_point, 0x0000, 0x007F) && iso2022jp_state !== states.ASCII) { stream.prepend(code_point); iso2022jp_state = states.ASCII; return [0x1B, 0x28, 0x42]; } // 6. If code point is either U+00A5 or U+203E, and iso-2022-jp // encoder state is not Roman, prepend code point to stream, set // iso-2022-jp encoder state to Roman, and return three bytes // 0x1B 0x28 0x4A. if ((code_point === 0x00A5 || code_point === 0x203E) && iso2022jp_state !== states.Roman) { stream.prepend(code_point); iso2022jp_state = states.Roman; return [0x1B, 0x28, 0x4A]; } // 7. Let pointer be the index pointer for code point in index // jis0208. var pointer = indexPointerFor(code_point, index('jis0208')); // 8. If pointer is null, return error with code point. if (pointer === null) return encoderError(code_point); // 9. If iso-2022-jp encoder state is not jis0208, prepend code // point to stream, set iso-2022-jp encoder state to jis0208, // and return three bytes 0x1B 0x24 0x42. if (iso2022jp_state !== states.jis0208) { stream.prepend(code_point); iso2022jp_state = states.jis0208; return [0x1B, 0x24, 0x42]; } // 10. Let lead be pointer / 94 + 0x21. var lead = div(pointer, 94) + 0x21; // 11. Let trail be pointer % 94 + 0x21. var trail = pointer % 94 + 0x21; // 12. Return two bytes whose values are lead and trail. return [lead, trail]; }; } /** @param {{fatal: boolean}} options */ encoders['iso-2022-jp'] = function(options) { return new ISO2022JPEncoder(options); }; /** @param {{fatal: boolean}} options */ decoders['iso-2022-jp'] = function(options) { return new ISO2022JPDecoder(options); }; // 12.3 shift_jis /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ function ShiftJISDecoder(options) { var fatal = options.fatal; // shift_jis's decoder has an associated shift_jis lead (initially // 0x00). var /** @type {number} */ shiftjis_lead = 0x00; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // 1. If byte is end-of-stream and shift_jis lead is not 0x00, // set shift_jis lead to 0x00 and return error. if (bite === end_of_stream && shiftjis_lead !== 0x00) { shiftjis_lead = 0x00; return decoderError(fatal); } // 2. If byte is end-of-stream and shift_jis lead is 0x00, // return finished. if (bite === end_of_stream && shiftjis_lead === 0x00) return finished; // 3. If shift_jis lead is not 0x00, let lead be shift_jis lead, // let pointer be null, set shift_jis lead to 0x00, and then run // these substeps: if (shiftjis_lead !== 0x00) { var lead = shiftjis_lead; var pointer = null; shiftjis_lead = 0x00; // 1. Let offset be 0x40, if byte is less than 0x7F, and 0x41 // otherwise. var offset = (bite < 0x7F) ? 0x40 : 0x41; // 2. Let lead offset be 0x81, if lead is less than 0xA0, and // 0xC1 otherwise. var lead_offset = (lead < 0xA0) ? 0x81 : 0xC1; // 3. If byte is in the range 0x40 to 0x7E or 0x80 to 0xFC, // set pointer to (lead − lead offset) × 188 + byte − offset. if (inRange(bite, 0x40, 0x7E) || inRange(bite, 0x80, 0xFC)) pointer = (lead - lead_offset) * 188 + bite - offset; // 4. Let code point be null, if pointer is null, and the // index code point for pointer in index jis0208 otherwise. var code_point = (pointer === null) ? null : indexCodePointFor(pointer, index('jis0208')); // 5. If code point is null and pointer is in the range 8836 // to 10528, return a code point whose value is 0xE000 + // pointer − 8836. if (code_point === null && pointer !== null && inRange(pointer, 8836, 10528)) return 0xE000 + pointer - 8836; // 6. If pointer is null, prepend byte to stream. if (pointer === null) stream.prepend(bite); // 7. If code point is null, return error. if (code_point === null) return decoderError(fatal); // 8. Return a code point whose value is code point. return code_point; } // 4. If byte is in the range 0x00 to 0x80, return a code point // whose value is byte. if (inRange(bite, 0x00, 0x80)) return bite; // 5. If byte is in the range 0xA1 to 0xDF, return a code point // whose value is 0xFF61 + byte − 0xA1. if (inRange(bite, 0xA1, 0xDF)) return 0xFF61 + bite - 0xA1; // 6. If byte is in the range 0x81 to 0x9F or 0xE0 to 0xFC, set // shift_jis lead to byte and return continue. if (inRange(bite, 0x81, 0x9F) || inRange(bite, 0xE0, 0xFC)) { shiftjis_lead = bite; return null; } // 7. Return error. return decoderError(fatal); }; } /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ function ShiftJISEncoder(options) { var fatal = options.fatal; /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is in the range U+0000 to U+0080, return a // byte whose value is code point. if (inRange(code_point, 0x0000, 0x0080)) return code_point; // 3. If code point is U+00A5, return byte 0x5C. if (code_point === 0x00A5) return 0x5C; // 4. If code point is U+203E, return byte 0x7E. if (code_point === 0x203E) return 0x7E; // 5. If code point is in the range U+FF61 to U+FF9F, return a // byte whose value is code point − 0xFF61 + 0xA1. if (inRange(code_point, 0xFF61, 0xFF9F)) return code_point - 0xFF61 + 0xA1; // 6. Let pointer be the index shift_jis pointer for code point. var pointer = indexShiftJISPointerFor(code_point); // 7. If pointer is null, return error with code point. if (pointer === null) return encoderError(code_point); // 8. Let lead be pointer / 188. var lead = div(pointer, 188); // 9. Let lead offset be 0x81, if lead is less than 0x1F, and // 0xC1 otherwise. var lead_offset = (lead < 0x1F) ? 0x81 : 0xC1; // 10. Let trail be pointer % 188. var trail = pointer % 188; // 11. Let offset be 0x40, if trail is less than 0x3F, and 0x41 // otherwise. var offset = (trail < 0x3F) ? 0x40 : 0x41; // 12. Return two bytes whose values are lead + lead offset and // trail + offset. return [lead + lead_offset, trail + offset]; }; } /** @param {{fatal: boolean}} options */ encoders['shift_jis'] = function(options) { return new ShiftJISEncoder(options); }; /** @param {{fatal: boolean}} options */ decoders['shift_jis'] = function(options) { return new ShiftJISDecoder(options); }; // // 13. Legacy multi-byte Korean encodings // // 13.1 euc-kr /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ function EUCKRDecoder(options) { var fatal = options.fatal; // euc-kr's decoder has an associated euc-kr lead (initially 0x00). var /** @type {number} */ euckr_lead = 0x00; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // 1. If byte is end-of-stream and euc-kr lead is not 0x00, set // euc-kr lead to 0x00 and return error. if (bite === end_of_stream && euckr_lead !== 0) { euckr_lead = 0x00; return decoderError(fatal); } // 2. If byte is end-of-stream and euc-kr lead is 0x00, return // finished. if (bite === end_of_stream && euckr_lead === 0) return finished; // 3. If euc-kr lead is not 0x00, let lead be euc-kr lead, let // pointer be null, set euc-kr lead to 0x00, and then run these // substeps: if (euckr_lead !== 0x00) { var lead = euckr_lead; var pointer = null; euckr_lead = 0x00; // 1. If byte is in the range 0x41 to 0xFE, set pointer to // (lead − 0x81) × 190 + (byte − 0x41). if (inRange(bite, 0x41, 0xFE)) pointer = (lead - 0x81) * 190 + (bite - 0x41); // 2. Let code point be null, if pointer is null, and the // index code point for pointer in index euc-kr otherwise. var code_point = (pointer === null) ? null : indexCodePointFor(pointer, index('euc-kr')); // 3. If pointer is null and byte is in the range 0x00 to // 0x7F, prepend byte to stream. if (pointer === null && inRange(bite, 0x00, 0x7F)) stream.prepend(bite); // 4. If code point is null, return error. if (code_point === null) return decoderError(fatal); // 5. Return a code point whose value is code point. return code_point; } // 4. If byte is in the range 0x00 to 0x7F, return a code point // whose value is byte. if (inRange(bite, 0x00, 0x7F)) return bite; // 5. If byte is in the range 0x81 to 0xFE, set euc-kr lead to // byte and return continue. if (inRange(bite, 0x81, 0xFE)) { euckr_lead = bite; return null; } // 6. Return error. return decoderError(fatal); }; } /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ function EUCKREncoder(options) { var fatal = options.fatal; /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is in the range U+0000 to U+007F, return a // byte whose value is code point. if (inRange(code_point, 0x0000, 0x007F)) return code_point; // 3. Let pointer be the index pointer for code point in index // euc-kr. var pointer = indexPointerFor(code_point, index('euc-kr')); // 4. If pointer is null, return error with code point. if (pointer === null) return encoderError(code_point); // 5. Let lead be pointer / 190 + 0x81. var lead = div(pointer, 190) + 0x81; // 6. Let trail be pointer % 190 + 0x41. var trail = (pointer % 190) + 0x41; // 7. Return two bytes whose values are lead and trail. return [lead, trail]; }; } /** @param {{fatal: boolean}} options */ encoders['euc-kr'] = function(options) { return new EUCKREncoder(options); }; /** @param {{fatal: boolean}} options */ decoders['euc-kr'] = function(options) { return new EUCKRDecoder(options); }; // // 14. Legacy miscellaneous encodings // // 14.1 replacement // Not needed - API throws RangeError // 14.2 utf-16 /** * @param {number} code_unit * @param {boolean} utf16be * @return {!Array.} bytes */ function convertCodeUnitToBytes(code_unit, utf16be) { // 1. Let byte1 be code unit >> 8. var byte1 = code_unit >> 8; // 2. Let byte2 be code unit & 0x00FF. var byte2 = code_unit & 0x00FF; // 3. Then return the bytes in order: // utf-16be flag is set: byte1, then byte2. if (utf16be) return [byte1, byte2]; // utf-16be flag is unset: byte2, then byte1. return [byte2, byte1]; } /** * @constructor * @implements {Decoder} * @param {boolean} utf16_be True if big-endian, false if little-endian. * @param {{fatal: boolean}} options */ function UTF16Decoder(utf16_be, options) { var fatal = options.fatal; var /** @type {?number} */ utf16_lead_byte = null, /** @type {?number} */ utf16_lead_surrogate = null; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // 1. If byte is end-of-stream and either utf-16 lead byte or // utf-16 lead surrogate is not null, set utf-16 lead byte and // utf-16 lead surrogate to null, and return error. if (bite === end_of_stream && (utf16_lead_byte !== null || utf16_lead_surrogate !== null)) { return decoderError(fatal); } // 2. If byte is end-of-stream and utf-16 lead byte and utf-16 // lead surrogate are null, return finished. if (bite === end_of_stream && utf16_lead_byte === null && utf16_lead_surrogate === null) { return finished; } // 3. If utf-16 lead byte is null, set utf-16 lead byte to byte // and return continue. if (utf16_lead_byte === null) { utf16_lead_byte = bite; return null; } // 4. Let code unit be the result of: var code_unit; if (utf16_be) { // utf-16be decoder flag is set // (utf-16 lead byte << 8) + byte. code_unit = (utf16_lead_byte << 8) + bite; } else { // utf-16be decoder flag is unset // (byte << 8) + utf-16 lead byte. code_unit = (bite << 8) + utf16_lead_byte; } // Then set utf-16 lead byte to null. utf16_lead_byte = null; // 5. If utf-16 lead surrogate is not null, let lead surrogate // be utf-16 lead surrogate, set utf-16 lead surrogate to null, // and then run these substeps: if (utf16_lead_surrogate !== null) { var lead_surrogate = utf16_lead_surrogate; utf16_lead_surrogate = null; // 1. If code unit is in the range U+DC00 to U+DFFF, return a // code point whose value is 0x10000 + ((lead surrogate − // 0xD800) << 10) + (code unit − 0xDC00). if (inRange(code_unit, 0xDC00, 0xDFFF)) { return 0x10000 + (lead_surrogate - 0xD800) * 0x400 + (code_unit - 0xDC00); } // 2. Prepend the sequence resulting of converting code unit // to bytes using utf-16be decoder flag to stream and return // error. stream.prepend(convertCodeUnitToBytes(code_unit, utf16_be)); return decoderError(fatal); } // 6. If code unit is in the range U+D800 to U+DBFF, set utf-16 // lead surrogate to code unit and return continue. if (inRange(code_unit, 0xD800, 0xDBFF)) { utf16_lead_surrogate = code_unit; return null; } // 7. If code unit is in the range U+DC00 to U+DFFF, return // error. if (inRange(code_unit, 0xDC00, 0xDFFF)) return decoderError(fatal); // 8. Return code point code unit. return code_unit; }; } /** * @constructor * @implements {Encoder} * @param {boolean} utf16_be True if big-endian, false if little-endian. * @param {{fatal: boolean}} options */ function UTF16Encoder(utf16_be, options) { var fatal = options.fatal; /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1. If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is in the range U+0000 to U+FFFF, return the // sequence resulting of converting code point to bytes using // utf-16be encoder flag. if (inRange(code_point, 0x0000, 0xFFFF)) return convertCodeUnitToBytes(code_point, utf16_be); // 3. Let lead be ((code point − 0x10000) >> 10) + 0xD800, // converted to bytes using utf-16be encoder flag. var lead = convertCodeUnitToBytes( ((code_point - 0x10000) >> 10) + 0xD800, utf16_be); // 4. Let trail be ((code point − 0x10000) & 0x3FF) + 0xDC00, // converted to bytes using utf-16be encoder flag. var trail = convertCodeUnitToBytes( ((code_point - 0x10000) & 0x3FF) + 0xDC00, utf16_be); // 5. Return a byte sequence of lead followed by trail. return lead.concat(trail); }; } // 14.3 utf-16be /** @param {{fatal: boolean}} options */ encoders['utf-16be'] = function(options) { return new UTF16Encoder(true, options); }; /** @param {{fatal: boolean}} options */ decoders['utf-16be'] = function(options) { return new UTF16Decoder(true, options); }; // 14.4 utf-16le /** @param {{fatal: boolean}} options */ encoders['utf-16le'] = function(options) { return new UTF16Encoder(false, options); }; /** @param {{fatal: boolean}} options */ decoders['utf-16le'] = function(options) { return new UTF16Decoder(false, options); }; // 14.5 x-user-defined /** * @constructor * @implements {Decoder} * @param {{fatal: boolean}} options */ function XUserDefinedDecoder(options) { var fatal = options.fatal; /** * @param {Stream} stream The stream of bytes being decoded. * @param {number} bite The next byte read from the stream. * @return {?(number|!Array.)} The next code point(s) * decoded, or null if not enough data exists in the input * stream to decode a complete code point. */ this.handler = function(stream, bite) { // 1. If byte is end-of-stream, return finished. if (bite === end_of_stream) return finished; // 2. If byte is in the range 0x00 to 0x7F, return a code point // whose value is byte. if (inRange(bite, 0x00, 0x7F)) return bite; // 3. Return a code point whose value is 0xF780 + byte − 0x80. return 0xF780 + bite - 0x80; }; } /** * @constructor * @implements {Encoder} * @param {{fatal: boolean}} options */ function XUserDefinedEncoder(options) { var fatal = options.fatal; /** * @param {Stream} stream Input stream. * @param {number} code_point Next code point read from the stream. * @return {(number|!Array.)} Byte(s) to emit. */ this.handler = function(stream, code_point) { // 1.If code point is end-of-stream, return finished. if (code_point === end_of_stream) return finished; // 2. If code point is in the range U+0000 to U+007F, return a // byte whose value is code point. if (inRange(code_point, 0x0000, 0x007F)) return code_point; // 3. If code point is in the range U+F780 to U+F7FF, return a // byte whose value is code point − 0xF780 + 0x80. if (inRange(code_point, 0xF780, 0xF7FF)) return code_point - 0xF780 + 0x80; // 4. Return error with code point. return encoderError(code_point); }; } /** @param {{fatal: boolean}} options */ encoders['x-user-defined'] = function(options) { return new XUserDefinedEncoder(options); }; /** @param {{fatal: boolean}} options */ decoders['x-user-defined'] = function(options) { return new XUserDefinedDecoder(options); }; if (!('TextEncoder' in global)) global['TextEncoder'] = TextEncoder; if (!('TextDecoder' in global)) global['TextDecoder'] = TextDecoder; global['CustomTextEncoder'] = TextEncoder; global['CustomTextDecoder'] = TextDecoder; }(this));